battle-test parser

This commit is contained in:
byte[] 2019-11-10 18:35:52 -05:00
parent 3b5a705ecf
commit 41867a6cec
14 changed files with 228 additions and 51 deletions

View file

@ -17,7 +17,10 @@ config :bcrypt_elixir,
config :philomena,
password_pepper: System.get_env("PASSWORD_PEPPER"),
otp_secret_key: System.get_env("OTP_SECRET_KEY"),
image_url_root: System.get_env("IMAGE_URL_ROOT")
image_url_root: System.get_env("IMAGE_URL_ROOT"),
camo_host: System.get_env("CAMO_HOST"),
camo_key: System.get_env("CAMO_KEY"),
cdn_host: System.get_env("CDN_HOST")
config :philomena, Philomena.Repo,
# ssl: true,

31
lib/camo/image.ex Normal file
View file

@ -0,0 +1,31 @@
defmodule Camo.Image do
def image_url(input) do
%{host: host} = URI.parse(input)
if !host or String.ends_with?(host, cdn_host()) do
input
else
camo_digest = :crypto.hmac(:sha, camo_key(), input) |> Base.encode16()
camo_uri = %URI{
host: camo_host(),
path: "/" <> camo_digest,
query: input,
scheme: "https"
}
URI.to_string(camo_uri)
end
end
defp cdn_host do
Application.get_env(:philomena, :cdn_host)
end
defp camo_key do
Application.get_env(:philomena, :camo_key)
end
defp camo_host do
Application.get_env(:philomena, :camo_host)
end
end

View file

@ -0,0 +1,114 @@
defmodule Philomena.Textile.Renderer do
alias Textile.Parser
alias Philomena.Images.Image
alias Philomena.Repo
import Phoenix.HTML
import Phoenix.HTML.Link
import Ecto.Query
@parser %Parser{
image_transform: &Camo.Image.image_url/1
}
def render_one(post) do
hd(render_collection([post]))
end
def render_collection(posts) do
parsed =
posts
|> Enum.map(fn post ->
Parser.parse(@parser, post.body)
end)
images =
parsed
|> Enum.flat_map(fn tree ->
tree
|> Enum.flat_map(fn
{:text, text} ->
[text]
_ ->
[]
end)
end)
|> find_images
parsed
|> Enum.map(fn tree ->
tree
|> Enum.map(fn
{:text, text} ->
text
|> replacement_entities()
|> replacement_images(images)
{_k, markup} ->
markup
end)
|> Enum.join()
end)
end
defp replacement_entities(t) do
t
|> String.replace("-&gt;", "&rarr;")
|> String.replace("--", "&mdash;")
|> String.replace("...", "&hellip;")
|> String.replace(~r|(\s)-(\s)|, "\\1&mdash;\\2")
|> String.replace("(tm)", "&tm;")
|> String.replace("(c)", "&copy;")
|> String.replace("(r)", "&reg;")
|> String.replace("&apos;", "&rsquo;")
end
defp replacement_images(t, images) do
t
|> String.replace(~r|&gt;&gt;(\d+)([pts])?|, fn match ->
# Stupid, but the method doesn't give us capture group information
match_data = Regex.run(~r|&gt;&gt;(\d+)([pts])?|, match, capture: :all_but_first)
[image_id | rest] = match_data
image = images[String.to_integer(image_id)]
case [image | rest] do
[nil, _] ->
match
[nil] ->
match
[image, "p"] ->
Phoenix.View.render(PhilomenaWeb.ImageView, "_image_container.html", image: image, size: :medium)
|> safe_to_string()
[image, "t"] ->
Phoenix.View.render(PhilomenaWeb.ImageView, "_image_container.html", image: image, size: :small)
|> safe_to_string()
[image, "s"] ->
Phoenix.View.render(PhilomenaWeb.ImageView, "_image_container.html", image: image, size: :thumb_small)
|> safe_to_string()
[image] ->
link(">>#{image.id}", to: "/#{image.id}")
|> safe_to_string()
end
end)
end
defp find_images(text_segments) do
image_ids =
text_segments
|> Enum.flat_map(fn t ->
Regex.scan(~r|&gt;&gt;(\d+)|, t, capture: :all_but_first)
|> Enum.map(fn [first] -> String.to_integer(first) end)
end)
Image
|> where([i], i.id in ^image_ids)
|> where([i], i.hidden_from_users == false)
|> preload(:tags)
|> Repo.all()
|> Map.new(fn image -> {image.id, image} end)
end
end

View file

@ -1,7 +1,7 @@
defmodule PhilomenaWeb.ImageController do
use PhilomenaWeb, :controller
alias Philomena.{Images.Image, Comments.Comment}
alias Philomena.{Images.Image, Comments.Comment, Textile.Renderer}
alias Philomena.Repo
import Ecto.Query
@ -32,6 +32,13 @@ defmodule PhilomenaWeb.ImageController do
|> limit(25)
|> Repo.all()
rendered =
comments
|> Renderer.render_collection()
comments =
Enum.zip(comments, rendered)
render(conn, "show.html", image: conn.assigns.image, comments: comments)
end
end

View file

@ -1,7 +1,7 @@
defmodule PhilomenaWeb.TopicController do
use PhilomenaWeb, :controller
alias Philomena.{Forums.Forum, Topics.Topic, Posts.Post}
alias Philomena.{Forums.Forum, Topics.Topic, Posts.Post, Textile.Renderer}
alias Philomena.Repo
import Ecto.Query
@ -26,6 +26,12 @@ defmodule PhilomenaWeb.TopicController do
|> preload([:user, topic: :forum])
|> Repo.all()
rendered =
Renderer.render_collection(posts)
posts =
Enum.zip(posts, rendered)
posts =
%Scrivener.Page{
entries: posts,

View file

@ -19,7 +19,7 @@ defmodule PhilomenaWeb.Router do
scope "/" do
pipe_through :browser
pow_routes()
#pow_routes()
end
scope "/", PhilomenaWeb do

View file

@ -19,7 +19,7 @@ article.block.communication id="comment_#{@comment.id}"
/ br
/= if can?(:read, @comment)
= if !@comment.hidden_from_users do
=<> @comment.body
==<> @body
.block__content.communication__options
.flex.flex--wrap.flex--spaced-out

View file

@ -23,5 +23,5 @@
h4 Comments
#comments data-current-url="" data-loaded="true"
= for comment <- @comments do
= render PhilomenaWeb.CommentView, "_comment.html", comment: comment
= for {comment, body} <- @comments do
= render PhilomenaWeb.CommentView, "_comment.html", comment: comment, body: body

View file

@ -6,7 +6,7 @@ article.block.communication id="post_#{@post.id}"
span.communication__body__sender-name = render PhilomenaWeb.UserAttributionView, "_anon_user.html", object: @post
.communication__body__text
= if !@post.hidden_from_users do
=<> @post.body
==<> @body
.block__content.communication__options
.flex.flex--wrap.flex--spaced-out

View file

@ -35,8 +35,8 @@ h1 = @topic.title
/ The actual posts
.posts-area
.post-list
= for post <- @posts, !post.destroyed_content do
= render PhilomenaWeb.PostView, "_post.html", conn: @conn, post: post
= for {post, body} <- @posts, !post.destroyed_content do
= render PhilomenaWeb.PostView, "_post.html", conn: @conn, post: post, body: body
/include ../adverts/_box.html.slim

View file

@ -143,6 +143,7 @@ defmodule Textile.Lexer do
|> unwrap_and_tag(:link_end)
|> concat(
url_ending_in(string("]"))
|> ignore(string("]"))
|> unwrap_and_tag(:link_url)
)

View file

@ -13,6 +13,13 @@ defmodule Textile.MarkupLexer do
string("\n")
|> unwrap_and_tag(:newline)
preceding_whitespace =
choice([
double_newline,
newline,
special_characters()
])
# The literal tag is special, because
# 1. It needs to capture everything inside it as a distinct token.
# 2. It can be surrounded by markup on all sides.
@ -152,8 +159,8 @@ defmodule Textile.MarkupLexer do
literal,
bracketed_markup_closing_tags,
bracketed_markup_opening_tags |> lookahead_not(space()),
special_characters() |> concat(markup_opening_tags),
markup_closing_tags |> choice([special_characters(), ending_sequence]),
preceding_whitespace |> concat(markup_opening_tags),
markup_closing_tags |> lookahead(choice([special_characters(), ending_sequence])),
double_newline,
newline,
utf8_char([])

View file

@ -12,7 +12,7 @@ defmodule Textile.Parser do
]
def parse(%Parser{} = parser, input) do
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input),
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input |> remove_linefeeds()),
tokens <- TokenCoalescer.coalesce_lex(tokens),
{:ok, tree, []} <- textile_top(parser, tokens),
tree <- TokenCoalescer.coalesce_parse(tree)
@ -26,7 +26,7 @@ defmodule Textile.Parser do
#
# Backtracking LL parser for simplified Textile grammar
# Backtracking LL packrat parser for simplified Textile grammar
#
@ -45,7 +45,7 @@ defmodule Textile.Parser do
[{_token, string} | r_tokens] = tokens
{:ok, next_tree, r2_tokens} = textile_top(parser, r_tokens)
{:ok, [text: escape_nl2br(string)] ++ next_tree, r2_tokens}
{:ok, [{:text, escape_nl2br(string)}, next_tree], r2_tokens}
end
end
@ -55,12 +55,9 @@ defmodule Textile.Parser do
#
defp well_formed_including_paragraphs(_parser, []), do: {:ok, [], []}
defp well_formed_including_paragraphs(parser, [{:double_newline, _nl} | r_tokens]) do
with {:ok, tree, r2_tokens} <- well_formed_including_paragraphs(parser, r_tokens) do
{:ok, [{:markup, "<br/><br/>"}, tree], r2_tokens}
else
_ ->
{:ok, [], r_tokens}
end
{:ok, tree, r2_tokens} = well_formed_including_paragraphs(parser, r_tokens)
{:ok, [{:markup, "<br/><br/>"}, tree], r2_tokens}
end
defp well_formed_including_paragraphs(parser, tokens) do
@ -124,20 +121,22 @@ defmodule Textile.Parser do
# blockquote_open well_formed_including_paragraphs blockquote_close;
#
defp blockquote(parser, [{:blockquote_open_cite, author} | r_tokens]) do
with {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
{:ok, [{:markup, ~s|<blockquote author="#{escape_html(author)}">|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
else
_ ->
{:ok, [text: escape_nl2br(~s|[bq="#{author}"]|)], r_tokens}
case well_formed_including_paragraphs(parser, r_tokens) do
{:ok, tree, [{:blockquote_close, _close} | r2_tokens]} ->
{:ok, [{:markup, ~s|<blockquote author="#{escape_html(author)}">|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_nl2br(~s|[bq="#{author}"]|)}, tree], r2_tokens}
end
end
defp blockquote(parser, [{:blockquote_open, open} | r_tokens]) do
with {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
{:ok, [{:markup, ~s|<blockquote>|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
else
_ ->
{:ok, [text: escape_nl2br(open)], r_tokens}
case well_formed_including_paragraphs(parser, r_tokens) do
{:ok, tree, [{:blockquote_close, _close} | r2_tokens]} ->
{:ok, [{:markup, ~s|<blockquote>|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens}
end
end
@ -150,11 +149,12 @@ defmodule Textile.Parser do
# spoiler_open well_formed_including_paragraphs spoiler_close;
#
defp spoiler(parser, [{:spoiler_open, open} | r_tokens]) do
with {:ok, tree, [{:spoiler_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
{:ok, [{:markup, ~s|<span class="spoiler">|}, tree, {:markup, ~s|</span>|}], r2_tokens}
else
_ ->
{:ok, [text: escape_nl2br(open)], r_tokens}
case well_formed_including_paragraphs(parser, r_tokens) do
{:ok, tree, [{:spoiler_close, _close} | r2_tokens]} ->
{:ok, [{:markup, ~s|<span class="spoiler">|}, tree, {:markup, ~s|</span>|}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens}
end
end
@ -167,11 +167,12 @@ defmodule Textile.Parser do
# link_start well_formed_including_paragraphs link_end link_url;
#
defp link(parser, [{:link_start, start} | r_tokens]) do
with {:ok, tree, [{:link_end, _end}, {:link_url, url} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
{:ok, [{:markup, ~s|<a href="#{escape_html(url)}">|}, tree, {:markup, ~s|</a>|}], r2_tokens}
else
_ ->
{:ok, [text: escape_nl2br(start)], r_tokens}
case well_formed_including_paragraphs(parser, r_tokens) do
{:ok, tree, [{:link_end, _end}, {:link_url, url} | r2_tokens]} ->
{:ok, [{:markup, ~s|<a href="#{escape_html(url)}">|}, tree, {:markup, ~s|</a>|}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_nl2br(start)}, tree], r2_tokens}
end
end

View file

@ -4,20 +4,22 @@ defmodule Textile.ParserHelpers do
defmacro attribute_parser(name, open_token, close_token, open_tag, close_tag) do
quote do
defp unquote(name)(parser, [{unquote(open_token), open} | r_tokens]) do
with {:ok, tree, [{unquote(close_token), _close} | r2_tokens]} <- well_formed(parser, r_tokens) do
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
else
_ ->
{:ok, [text: escape_html(open)], r_tokens}
case well_formed(parser, r_tokens) do
{:ok, tree, [{unquote(close_token), _close} | r2_tokens]} ->
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_html(open)}, tree], r2_tokens}
end
end
defp unquote(name)(parser, [{unquote(:"b_#{open_token}"), open} | r_tokens]) do
with {:ok, tree, [{unquote(:"b_#{close_token}"), _close} | r2_tokens]} <- well_formed(parser, r_tokens) do
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
else
_ ->
{:ok, [text: escape_html(open)], r_tokens}
case well_formed(parser, r_tokens) do
{:ok, tree, [{unquote(:"b_#{close_token}"), _close} | r2_tokens]} ->
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_html(open)}, tree], r2_tokens}
end
end
@ -26,6 +28,11 @@ defmodule Textile.ParserHelpers do
end
end
def remove_linefeeds(text) do
text
|> String.replace("\r", "")
end
def escape_nl2br(text) do
text
|> String.split("\n", trim: true)