battle-test parser

2025-03-30 16:27:45 +02:00 · 2019-11-10 18:35:52 -05:00 · 2019-11-10 18:35:52 -05:00 · 41867a6cec
commit 41867a6cec
parent 3b5a705ecf
14 changed files with 228 additions and 51 deletions
--- a/config/prod.secret.exs
+++ b/config/prod.secret.exs
@ -17,7 +17,10 @@ config :bcrypt_elixir,
 config :philomena,
  password_pepper: System.get_env("PASSWORD_PEPPER"),
  otp_secret_key: System.get_env("OTP_SECRET_KEY"),
-  image_url_root: System.get_env("IMAGE_URL_ROOT")
+  image_url_root: System.get_env("IMAGE_URL_ROOT"),
+  camo_host: System.get_env("CAMO_HOST"),
+  camo_key: System.get_env("CAMO_KEY"),
+  cdn_host: System.get_env("CDN_HOST")

 config :philomena, Philomena.Repo,
  # ssl: true,
--- a/lib/camo/image.ex
+++ b/lib/camo/image.ex
@ -0,0 +1,31 @@
+defmodule Camo.Image do
+  def image_url(input) do
+    %{host: host} = URI.parse(input)
+
+    if !host or String.ends_with?(host, cdn_host()) do
+      input
+    else
+      camo_digest = :crypto.hmac(:sha, camo_key(), input) |> Base.encode16()
+      camo_uri = %URI{
+        host: camo_host(),
+        path: "/" <> camo_digest,
+        query: input,
+        scheme: "https"
+      }
+
+      URI.to_string(camo_uri)
+    end
+  end
+
+  defp cdn_host do
+    Application.get_env(:philomena, :cdn_host)
+  end
+
+  defp camo_key do
+    Application.get_env(:philomena, :camo_key)
+  end
+
+  defp camo_host do
+    Application.get_env(:philomena, :camo_host)
+  end
+end
--- a/lib/philomena/textile/renderer.ex
+++ b/lib/philomena/textile/renderer.ex
@ -0,0 +1,114 @@
+defmodule Philomena.Textile.Renderer do
+  alias Textile.Parser
+  alias Philomena.Images.Image
+  alias Philomena.Repo
+  import Phoenix.HTML
+  import Phoenix.HTML.Link
+  import Ecto.Query
+
+  @parser %Parser{
+    image_transform: &Camo.Image.image_url/1
+  }
+
+  def render_one(post) do
+    hd(render_collection([post]))
+  end
+
+  def render_collection(posts) do
+    parsed =
+      posts
+      |> Enum.map(fn post ->
+        Parser.parse(@parser, post.body)
+      end)
+
+    images =
+      parsed
+      |> Enum.flat_map(fn tree ->
+        tree
+        |> Enum.flat_map(fn
+            {:text, text} ->
+              [text]
+            _ ->
+              []
+          end)
+      end)
+      |> find_images
+
+    parsed
+    |> Enum.map(fn tree ->
+      tree
+      |> Enum.map(fn
+        {:text, text} ->
+          text
+          |> replacement_entities()
+          |> replacement_images(images)
+
+        {_k, markup} ->
+          markup
+      end)
+      |> Enum.join()
+    end)
+  end
+
+  defp replacement_entities(t) do
+    t
+    |> String.replace("-&gt;", "&rarr;")
+    |> String.replace("--", "&mdash;")
+    |> String.replace("...", "&hellip;")
+    |> String.replace(~r|(\s)-(\s)|, "\\1&mdash;\\2")
+    |> String.replace("(tm)", "&tm;")
+    |> String.replace("(c)", "&copy;")
+    |> String.replace("(r)", "&reg;")
+    |> String.replace("&apos;", "&rsquo;")
+  end
+
+  defp replacement_images(t, images) do
+    t
+    |> String.replace(~r|&gt;&gt;(\d+)([pts])?|, fn match ->
+      # Stupid, but the method doesn't give us capture group information
+      match_data = Regex.run(~r|&gt;&gt;(\d+)([pts])?|, match, capture: :all_but_first)
+      [image_id | rest] = match_data
+      image = images[String.to_integer(image_id)]
+
+      case [image | rest] do
+        [nil, _] ->
+          match
+
+        [nil] ->
+          match
+
+        [image, "p"] ->
+          Phoenix.View.render(PhilomenaWeb.ImageView, "_image_container.html", image: image, size: :medium)
+          |> safe_to_string()
+
+        [image, "t"] ->
+          Phoenix.View.render(PhilomenaWeb.ImageView, "_image_container.html", image: image, size: :small)
+          |> safe_to_string()
+
+        [image, "s"] ->
+          Phoenix.View.render(PhilomenaWeb.ImageView, "_image_container.html", image: image, size: :thumb_small)
+          |> safe_to_string()
+
+        [image] ->
+          link(">>#{image.id}", to: "/#{image.id}")
+          |> safe_to_string()
+      end
+    end)
+  end
+
+  defp find_images(text_segments) do
+    image_ids =
+      text_segments
+      |> Enum.flat_map(fn t ->
+        Regex.scan(~r|&gt;&gt;(\d+)|, t, capture: :all_but_first)
+        |> Enum.map(fn [first] -> String.to_integer(first) end)
+      end)
+
+    Image
+    |> where([i], i.id in ^image_ids)
+    |> where([i], i.hidden_from_users == false)
+    |> preload(:tags)
+    |> Repo.all()
+    |> Map.new(fn image -> {image.id, image} end)
+  end
+end
--- a/lib/philomena_web/controllers/image_controller.ex
+++ b/lib/philomena_web/controllers/image_controller.ex
@ -1,7 +1,7 @@
 defmodule PhilomenaWeb.ImageController do
  use PhilomenaWeb, :controller

-  alias Philomena.{Images.Image, Comments.Comment}
+  alias Philomena.{Images.Image, Comments.Comment, Textile.Renderer}
  alias Philomena.Repo
  import Ecto.Query

@ -32,6 +32,13 @@ defmodule PhilomenaWeb.ImageController do
      |> limit(25)
      |> Repo.all()

+    rendered =
+      comments
+      |> Renderer.render_collection()
+
+    comments =
+      Enum.zip(comments, rendered)
+
    render(conn, "show.html", image: conn.assigns.image, comments: comments)
  end
 end
--- a/lib/philomena_web/controllers/topic_controller.ex
+++ b/lib/philomena_web/controllers/topic_controller.ex
@ -1,7 +1,7 @@
 defmodule PhilomenaWeb.TopicController do
  use PhilomenaWeb, :controller

-  alias Philomena.{Forums.Forum, Topics.Topic, Posts.Post}
+  alias Philomena.{Forums.Forum, Topics.Topic, Posts.Post, Textile.Renderer}
  alias Philomena.Repo
  import Ecto.Query

@ -26,6 +26,12 @@ defmodule PhilomenaWeb.TopicController do
      |> preload([:user, topic: :forum])
      |> Repo.all()

+    rendered =
+      Renderer.render_collection(posts)
+
+    posts =
+      Enum.zip(posts, rendered)
+
    posts =
      %Scrivener.Page{
        entries: posts,
--- a/lib/philomena_web/router.ex
+++ b/lib/philomena_web/router.ex
@ -19,7 +19,7 @@ defmodule PhilomenaWeb.Router do
  scope "/" do
    pipe_through :browser
  
-    pow_routes()
+    #pow_routes()
  end

  scope "/", PhilomenaWeb do
--- a/lib/philomena_web/templates/comment/_comment.html.slime
+++ b/lib/philomena_web/templates/comment/_comment.html.slime
@ -19,7 +19,7 @@ article.block.communication id="comment_#{@comment.id}"
        /      br
        /= if can?(:read, @comment)
        = if !@comment.hidden_from_users do
-          =<> @comment.body
+          ==<> @body

  .block__content.communication__options
    .flex.flex--wrap.flex--spaced-out
--- a/lib/philomena_web/templates/image/show.html.slime
+++ b/lib/philomena_web/templates/image/show.html.slime
@ -23,5 +23,5 @@

  h4 Comments
  #comments data-current-url="" data-loaded="true"
-    = for comment <- @comments do
-      = render PhilomenaWeb.CommentView, "_comment.html", comment: comment
+    = for {comment, body} <- @comments do
+      = render PhilomenaWeb.CommentView, "_comment.html", comment: comment, body: body
--- a/lib/philomena_web/templates/post/_post.html.slime
+++ b/lib/philomena_web/templates/post/_post.html.slime
@ -6,7 +6,7 @@ article.block.communication id="post_#{@post.id}"
      span.communication__body__sender-name = render PhilomenaWeb.UserAttributionView, "_anon_user.html", object: @post
      .communication__body__text
        = if !@post.hidden_from_users do
-          =<> @post.body
+          ==<> @body

  .block__content.communication__options
    .flex.flex--wrap.flex--spaced-out
--- a/lib/philomena_web/templates/topic/show.html.slime
+++ b/lib/philomena_web/templates/topic/show.html.slime
@ -35,8 +35,8 @@ h1 = @topic.title
  / The actual posts
  .posts-area
    .post-list
-      = for post <- @posts, !post.destroyed_content do
-        = render PhilomenaWeb.PostView, "_post.html", conn: @conn, post: post
+      = for {post, body} <- @posts, !post.destroyed_content do
+        = render PhilomenaWeb.PostView, "_post.html", conn: @conn, post: post, body: body

      /include ../adverts/_box.html.slim

--- a/lib/textile/lexer.ex
+++ b/lib/textile/lexer.ex
@ -143,6 +143,7 @@ defmodule Textile.Lexer do
    |> unwrap_and_tag(:link_end)
    |> concat(
      url_ending_in(string("]"))
+      |> ignore(string("]"))
      |> unwrap_and_tag(:link_url)
    )

--- a/lib/textile/markup_lexer.ex
+++ b/lib/textile/markup_lexer.ex
@ -13,6 +13,13 @@ defmodule Textile.MarkupLexer do
      string("\n")
      |> unwrap_and_tag(:newline)

+    preceding_whitespace =
+      choice([
+        double_newline,
+        newline,
+        special_characters()
+      ])
+
    # The literal tag is special, because
    # 1. It needs to capture everything inside it as a distinct token.
    # 2. It can be surrounded by markup on all sides.
@ -152,8 +159,8 @@ defmodule Textile.MarkupLexer do
        literal,
        bracketed_markup_closing_tags,
        bracketed_markup_opening_tags |> lookahead_not(space()),
-        special_characters() |> concat(markup_opening_tags),
-        markup_closing_tags |> choice([special_characters(), ending_sequence]),
+        preceding_whitespace |> concat(markup_opening_tags),
+        markup_closing_tags |> lookahead(choice([special_characters(), ending_sequence])),
        double_newline,
        newline,
        utf8_char([])
--- a/lib/textile/parser.ex
+++ b/lib/textile/parser.ex
@ -12,7 +12,7 @@ defmodule Textile.Parser do
  ]

  def parse(%Parser{} = parser, input) do
-    with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input),
+    with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input |> remove_linefeeds()),
         tokens <- TokenCoalescer.coalesce_lex(tokens),
         {:ok, tree, []} <- textile_top(parser, tokens),
         tree <- TokenCoalescer.coalesce_parse(tree)
@ -26,7 +26,7 @@ defmodule Textile.Parser do


  #
-  # Backtracking LL parser for simplified Textile grammar
+  # Backtracking LL packrat parser for simplified Textile grammar
  #


@ -45,7 +45,7 @@ defmodule Textile.Parser do
        [{_token, string} | r_tokens] = tokens
        {:ok, next_tree, r2_tokens} = textile_top(parser, r_tokens)

-        {:ok, [text: escape_nl2br(string)] ++ next_tree, r2_tokens}
+        {:ok, [{:text, escape_nl2br(string)}, next_tree], r2_tokens}
    end
  end

@ -55,12 +55,9 @@ defmodule Textile.Parser do
  #
  defp well_formed_including_paragraphs(_parser, []), do: {:ok, [], []}
  defp well_formed_including_paragraphs(parser, [{:double_newline, _nl} | r_tokens]) do
-    with {:ok, tree, r2_tokens} <- well_formed_including_paragraphs(parser, r_tokens) do
-      {:ok, [{:markup, "<br/><br/>"}, tree], r2_tokens}
-    else
-      _ ->
-        {:ok, [], r_tokens}
-    end
+    {:ok, tree, r2_tokens} = well_formed_including_paragraphs(parser, r_tokens)
+
+    {:ok, [{:markup, "<br/><br/>"}, tree], r2_tokens}
  end

  defp well_formed_including_paragraphs(parser, tokens) do
@ -124,20 +121,22 @@ defmodule Textile.Parser do
  #   blockquote_open well_formed_including_paragraphs blockquote_close;
  #
  defp blockquote(parser, [{:blockquote_open_cite, author} | r_tokens]) do
-    with {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
-      {:ok, [{:markup, ~s|<blockquote author="#{escape_html(author)}">|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
-    else
-      _ ->
-        {:ok, [text: escape_nl2br(~s|[bq="#{author}"]|)], r_tokens}
+    case well_formed_including_paragraphs(parser, r_tokens) do
+      {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} ->
+        {:ok, [{:markup, ~s|<blockquote author="#{escape_html(author)}">|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
+
+      {:ok, tree, r2_tokens} ->
+        {:ok, [{:text, escape_nl2br(~s|[bq="#{author}"]|)}, tree], r2_tokens}
    end
  end

  defp blockquote(parser, [{:blockquote_open, open} | r_tokens]) do
-    with {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
-      {:ok, [{:markup, ~s|<blockquote>|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
-    else
-      _ ->
-        {:ok, [text: escape_nl2br(open)], r_tokens}
+    case well_formed_including_paragraphs(parser, r_tokens) do
+      {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} ->
+        {:ok, [{:markup, ~s|<blockquote>|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
+
+      {:ok, tree, r2_tokens} ->
+        {:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens}
    end
  end

@ -150,11 +149,12 @@ defmodule Textile.Parser do
  #   spoiler_open well_formed_including_paragraphs spoiler_close;
  #
  defp spoiler(parser, [{:spoiler_open, open} | r_tokens]) do
-    with {:ok, tree, [{:spoiler_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
-      {:ok, [{:markup, ~s|<span class="spoiler">|}, tree, {:markup, ~s|</span>|}], r2_tokens}
-    else
-      _ ->
-        {:ok, [text: escape_nl2br(open)], r_tokens}
+    case well_formed_including_paragraphs(parser, r_tokens) do
+      {:ok, tree, [{:spoiler_close, _close} | r2_tokens]} ->
+        {:ok, [{:markup, ~s|<span class="spoiler">|}, tree, {:markup, ~s|</span>|}], r2_tokens}
+
+      {:ok, tree, r2_tokens} ->
+        {:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens}
    end
  end

@ -167,11 +167,12 @@ defmodule Textile.Parser do
  #   link_start well_formed_including_paragraphs link_end link_url;
  #
  defp link(parser, [{:link_start, start} | r_tokens]) do
-    with {:ok, tree, [{:link_end, _end}, {:link_url, url} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
-      {:ok, [{:markup, ~s|<a href="#{escape_html(url)}">|}, tree, {:markup, ~s|</a>|}], r2_tokens}
-    else
-      _ ->
-        {:ok, [text: escape_nl2br(start)], r_tokens}
+    case well_formed_including_paragraphs(parser, r_tokens) do
+      {:ok, tree, [{:link_end, _end}, {:link_url, url} | r2_tokens]} ->
+        {:ok, [{:markup, ~s|<a href="#{escape_html(url)}">|}, tree, {:markup, ~s|</a>|}], r2_tokens}
+
+      {:ok, tree, r2_tokens} ->
+        {:ok, [{:text, escape_nl2br(start)}, tree], r2_tokens}
    end
  end

--- a/lib/textile/parser_helpers.ex
+++ b/lib/textile/parser_helpers.ex
@ -4,20 +4,22 @@ defmodule Textile.ParserHelpers do
  defmacro attribute_parser(name, open_token, close_token, open_tag, close_tag) do
    quote do
      defp unquote(name)(parser, [{unquote(open_token), open} | r_tokens]) do
-        with {:ok, tree, [{unquote(close_token), _close} | r2_tokens]} <- well_formed(parser, r_tokens) do
-          {:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
-        else
-          _ ->
-            {:ok, [text: escape_html(open)], r_tokens}
+        case well_formed(parser, r_tokens) do
+          {:ok, tree, [{unquote(close_token), _close} | r2_tokens]} ->
+            {:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
+
+          {:ok, tree, r2_tokens} ->
+            {:ok, [{:text, escape_html(open)}, tree], r2_tokens}
        end
      end

      defp unquote(name)(parser, [{unquote(:"b_#{open_token}"), open} | r_tokens]) do
-        with {:ok, tree, [{unquote(:"b_#{close_token}"), _close} | r2_tokens]} <- well_formed(parser, r_tokens) do
-          {:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
-        else
-          _ ->
-            {:ok, [text: escape_html(open)], r_tokens}
+        case well_formed(parser, r_tokens) do
+          {:ok, tree, [{unquote(:"b_#{close_token}"), _close} | r2_tokens]} ->
+            {:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
+
+          {:ok, tree, r2_tokens} ->
+            {:ok, [{:text, escape_html(open)}, tree], r2_tokens}
        end
      end

@ -26,6 +28,11 @@ defmodule Textile.ParserHelpers do
    end
  end

+  def remove_linefeeds(text) do
+    text
+    |> String.replace("\r", "")
+  end
+
  def escape_nl2br(text) do
    text
    |> String.split("\n", trim: true)