mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-01-19 14:17:59 +01:00
battle-test parser
This commit is contained in:
parent
3b5a705ecf
commit
41867a6cec
14 changed files with 228 additions and 51 deletions
|
@ -17,7 +17,10 @@ config :bcrypt_elixir,
|
|||
config :philomena,
|
||||
password_pepper: System.get_env("PASSWORD_PEPPER"),
|
||||
otp_secret_key: System.get_env("OTP_SECRET_KEY"),
|
||||
image_url_root: System.get_env("IMAGE_URL_ROOT")
|
||||
image_url_root: System.get_env("IMAGE_URL_ROOT"),
|
||||
camo_host: System.get_env("CAMO_HOST"),
|
||||
camo_key: System.get_env("CAMO_KEY"),
|
||||
cdn_host: System.get_env("CDN_HOST")
|
||||
|
||||
config :philomena, Philomena.Repo,
|
||||
# ssl: true,
|
||||
|
|
31
lib/camo/image.ex
Normal file
31
lib/camo/image.ex
Normal file
|
@ -0,0 +1,31 @@
|
|||
defmodule Camo.Image do
|
||||
def image_url(input) do
|
||||
%{host: host} = URI.parse(input)
|
||||
|
||||
if !host or String.ends_with?(host, cdn_host()) do
|
||||
input
|
||||
else
|
||||
camo_digest = :crypto.hmac(:sha, camo_key(), input) |> Base.encode16()
|
||||
camo_uri = %URI{
|
||||
host: camo_host(),
|
||||
path: "/" <> camo_digest,
|
||||
query: input,
|
||||
scheme: "https"
|
||||
}
|
||||
|
||||
URI.to_string(camo_uri)
|
||||
end
|
||||
end
|
||||
|
||||
defp cdn_host do
|
||||
Application.get_env(:philomena, :cdn_host)
|
||||
end
|
||||
|
||||
defp camo_key do
|
||||
Application.get_env(:philomena, :camo_key)
|
||||
end
|
||||
|
||||
defp camo_host do
|
||||
Application.get_env(:philomena, :camo_host)
|
||||
end
|
||||
end
|
114
lib/philomena/textile/renderer.ex
Normal file
114
lib/philomena/textile/renderer.ex
Normal file
|
@ -0,0 +1,114 @@
|
|||
defmodule Philomena.Textile.Renderer do
|
||||
alias Textile.Parser
|
||||
alias Philomena.Images.Image
|
||||
alias Philomena.Repo
|
||||
import Phoenix.HTML
|
||||
import Phoenix.HTML.Link
|
||||
import Ecto.Query
|
||||
|
||||
@parser %Parser{
|
||||
image_transform: &Camo.Image.image_url/1
|
||||
}
|
||||
|
||||
def render_one(post) do
|
||||
hd(render_collection([post]))
|
||||
end
|
||||
|
||||
def render_collection(posts) do
|
||||
parsed =
|
||||
posts
|
||||
|> Enum.map(fn post ->
|
||||
Parser.parse(@parser, post.body)
|
||||
end)
|
||||
|
||||
images =
|
||||
parsed
|
||||
|> Enum.flat_map(fn tree ->
|
||||
tree
|
||||
|> Enum.flat_map(fn
|
||||
{:text, text} ->
|
||||
[text]
|
||||
_ ->
|
||||
[]
|
||||
end)
|
||||
end)
|
||||
|> find_images
|
||||
|
||||
parsed
|
||||
|> Enum.map(fn tree ->
|
||||
tree
|
||||
|> Enum.map(fn
|
||||
{:text, text} ->
|
||||
text
|
||||
|> replacement_entities()
|
||||
|> replacement_images(images)
|
||||
|
||||
{_k, markup} ->
|
||||
markup
|
||||
end)
|
||||
|> Enum.join()
|
||||
end)
|
||||
end
|
||||
|
||||
defp replacement_entities(t) do
|
||||
t
|
||||
|> String.replace("->", "→")
|
||||
|> String.replace("--", "—")
|
||||
|> String.replace("...", "…")
|
||||
|> String.replace(~r|(\s)-(\s)|, "\\1—\\2")
|
||||
|> String.replace("(tm)", "&tm;")
|
||||
|> String.replace("(c)", "©")
|
||||
|> String.replace("(r)", "®")
|
||||
|> String.replace("'", "’")
|
||||
end
|
||||
|
||||
defp replacement_images(t, images) do
|
||||
t
|
||||
|> String.replace(~r|>>(\d+)([pts])?|, fn match ->
|
||||
# Stupid, but the method doesn't give us capture group information
|
||||
match_data = Regex.run(~r|>>(\d+)([pts])?|, match, capture: :all_but_first)
|
||||
[image_id | rest] = match_data
|
||||
image = images[String.to_integer(image_id)]
|
||||
|
||||
case [image | rest] do
|
||||
[nil, _] ->
|
||||
match
|
||||
|
||||
[nil] ->
|
||||
match
|
||||
|
||||
[image, "p"] ->
|
||||
Phoenix.View.render(PhilomenaWeb.ImageView, "_image_container.html", image: image, size: :medium)
|
||||
|> safe_to_string()
|
||||
|
||||
[image, "t"] ->
|
||||
Phoenix.View.render(PhilomenaWeb.ImageView, "_image_container.html", image: image, size: :small)
|
||||
|> safe_to_string()
|
||||
|
||||
[image, "s"] ->
|
||||
Phoenix.View.render(PhilomenaWeb.ImageView, "_image_container.html", image: image, size: :thumb_small)
|
||||
|> safe_to_string()
|
||||
|
||||
[image] ->
|
||||
link(">>#{image.id}", to: "/#{image.id}")
|
||||
|> safe_to_string()
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
defp find_images(text_segments) do
|
||||
image_ids =
|
||||
text_segments
|
||||
|> Enum.flat_map(fn t ->
|
||||
Regex.scan(~r|>>(\d+)|, t, capture: :all_but_first)
|
||||
|> Enum.map(fn [first] -> String.to_integer(first) end)
|
||||
end)
|
||||
|
||||
Image
|
||||
|> where([i], i.id in ^image_ids)
|
||||
|> where([i], i.hidden_from_users == false)
|
||||
|> preload(:tags)
|
||||
|> Repo.all()
|
||||
|> Map.new(fn image -> {image.id, image} end)
|
||||
end
|
||||
end
|
|
@ -1,7 +1,7 @@
|
|||
defmodule PhilomenaWeb.ImageController do
|
||||
use PhilomenaWeb, :controller
|
||||
|
||||
alias Philomena.{Images.Image, Comments.Comment}
|
||||
alias Philomena.{Images.Image, Comments.Comment, Textile.Renderer}
|
||||
alias Philomena.Repo
|
||||
import Ecto.Query
|
||||
|
||||
|
@ -32,6 +32,13 @@ defmodule PhilomenaWeb.ImageController do
|
|||
|> limit(25)
|
||||
|> Repo.all()
|
||||
|
||||
rendered =
|
||||
comments
|
||||
|> Renderer.render_collection()
|
||||
|
||||
comments =
|
||||
Enum.zip(comments, rendered)
|
||||
|
||||
render(conn, "show.html", image: conn.assigns.image, comments: comments)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
defmodule PhilomenaWeb.TopicController do
|
||||
use PhilomenaWeb, :controller
|
||||
|
||||
alias Philomena.{Forums.Forum, Topics.Topic, Posts.Post}
|
||||
alias Philomena.{Forums.Forum, Topics.Topic, Posts.Post, Textile.Renderer}
|
||||
alias Philomena.Repo
|
||||
import Ecto.Query
|
||||
|
||||
|
@ -26,6 +26,12 @@ defmodule PhilomenaWeb.TopicController do
|
|||
|> preload([:user, topic: :forum])
|
||||
|> Repo.all()
|
||||
|
||||
rendered =
|
||||
Renderer.render_collection(posts)
|
||||
|
||||
posts =
|
||||
Enum.zip(posts, rendered)
|
||||
|
||||
posts =
|
||||
%Scrivener.Page{
|
||||
entries: posts,
|
||||
|
|
|
@ -19,7 +19,7 @@ defmodule PhilomenaWeb.Router do
|
|||
scope "/" do
|
||||
pipe_through :browser
|
||||
|
||||
pow_routes()
|
||||
#pow_routes()
|
||||
end
|
||||
|
||||
scope "/", PhilomenaWeb do
|
||||
|
|
|
@ -19,7 +19,7 @@ article.block.communication id="comment_#{@comment.id}"
|
|||
/ br
|
||||
/= if can?(:read, @comment)
|
||||
= if !@comment.hidden_from_users do
|
||||
=<> @comment.body
|
||||
==<> @body
|
||||
|
||||
.block__content.communication__options
|
||||
.flex.flex--wrap.flex--spaced-out
|
||||
|
|
|
@ -23,5 +23,5 @@
|
|||
|
||||
h4 Comments
|
||||
#comments data-current-url="" data-loaded="true"
|
||||
= for comment <- @comments do
|
||||
= render PhilomenaWeb.CommentView, "_comment.html", comment: comment
|
||||
= for {comment, body} <- @comments do
|
||||
= render PhilomenaWeb.CommentView, "_comment.html", comment: comment, body: body
|
||||
|
|
|
@ -6,7 +6,7 @@ article.block.communication id="post_#{@post.id}"
|
|||
span.communication__body__sender-name = render PhilomenaWeb.UserAttributionView, "_anon_user.html", object: @post
|
||||
.communication__body__text
|
||||
= if !@post.hidden_from_users do
|
||||
=<> @post.body
|
||||
==<> @body
|
||||
|
||||
.block__content.communication__options
|
||||
.flex.flex--wrap.flex--spaced-out
|
||||
|
|
|
@ -35,8 +35,8 @@ h1 = @topic.title
|
|||
/ The actual posts
|
||||
.posts-area
|
||||
.post-list
|
||||
= for post <- @posts, !post.destroyed_content do
|
||||
= render PhilomenaWeb.PostView, "_post.html", conn: @conn, post: post
|
||||
= for {post, body} <- @posts, !post.destroyed_content do
|
||||
= render PhilomenaWeb.PostView, "_post.html", conn: @conn, post: post, body: body
|
||||
|
||||
/include ../adverts/_box.html.slim
|
||||
|
||||
|
|
|
@ -143,6 +143,7 @@ defmodule Textile.Lexer do
|
|||
|> unwrap_and_tag(:link_end)
|
||||
|> concat(
|
||||
url_ending_in(string("]"))
|
||||
|> ignore(string("]"))
|
||||
|> unwrap_and_tag(:link_url)
|
||||
)
|
||||
|
||||
|
|
|
@ -13,6 +13,13 @@ defmodule Textile.MarkupLexer do
|
|||
string("\n")
|
||||
|> unwrap_and_tag(:newline)
|
||||
|
||||
preceding_whitespace =
|
||||
choice([
|
||||
double_newline,
|
||||
newline,
|
||||
special_characters()
|
||||
])
|
||||
|
||||
# The literal tag is special, because
|
||||
# 1. It needs to capture everything inside it as a distinct token.
|
||||
# 2. It can be surrounded by markup on all sides.
|
||||
|
@ -152,8 +159,8 @@ defmodule Textile.MarkupLexer do
|
|||
literal,
|
||||
bracketed_markup_closing_tags,
|
||||
bracketed_markup_opening_tags |> lookahead_not(space()),
|
||||
special_characters() |> concat(markup_opening_tags),
|
||||
markup_closing_tags |> choice([special_characters(), ending_sequence]),
|
||||
preceding_whitespace |> concat(markup_opening_tags),
|
||||
markup_closing_tags |> lookahead(choice([special_characters(), ending_sequence])),
|
||||
double_newline,
|
||||
newline,
|
||||
utf8_char([])
|
||||
|
|
|
@ -12,7 +12,7 @@ defmodule Textile.Parser do
|
|||
]
|
||||
|
||||
def parse(%Parser{} = parser, input) do
|
||||
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input),
|
||||
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input |> remove_linefeeds()),
|
||||
tokens <- TokenCoalescer.coalesce_lex(tokens),
|
||||
{:ok, tree, []} <- textile_top(parser, tokens),
|
||||
tree <- TokenCoalescer.coalesce_parse(tree)
|
||||
|
@ -26,7 +26,7 @@ defmodule Textile.Parser do
|
|||
|
||||
|
||||
#
|
||||
# Backtracking LL parser for simplified Textile grammar
|
||||
# Backtracking LL packrat parser for simplified Textile grammar
|
||||
#
|
||||
|
||||
|
||||
|
@ -45,7 +45,7 @@ defmodule Textile.Parser do
|
|||
[{_token, string} | r_tokens] = tokens
|
||||
{:ok, next_tree, r2_tokens} = textile_top(parser, r_tokens)
|
||||
|
||||
{:ok, [text: escape_nl2br(string)] ++ next_tree, r2_tokens}
|
||||
{:ok, [{:text, escape_nl2br(string)}, next_tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -55,12 +55,9 @@ defmodule Textile.Parser do
|
|||
#
|
||||
defp well_formed_including_paragraphs(_parser, []), do: {:ok, [], []}
|
||||
defp well_formed_including_paragraphs(parser, [{:double_newline, _nl} | r_tokens]) do
|
||||
with {:ok, tree, r2_tokens} <- well_formed_including_paragraphs(parser, r_tokens) do
|
||||
{:ok, [{:markup, "<br/><br/>"}, tree], r2_tokens}
|
||||
else
|
||||
_ ->
|
||||
{:ok, [], r_tokens}
|
||||
end
|
||||
{:ok, tree, r2_tokens} = well_formed_including_paragraphs(parser, r_tokens)
|
||||
|
||||
{:ok, [{:markup, "<br/><br/>"}, tree], r2_tokens}
|
||||
end
|
||||
|
||||
defp well_formed_including_paragraphs(parser, tokens) do
|
||||
|
@ -124,20 +121,22 @@ defmodule Textile.Parser do
|
|||
# blockquote_open well_formed_including_paragraphs blockquote_close;
|
||||
#
|
||||
defp blockquote(parser, [{:blockquote_open_cite, author} | r_tokens]) do
|
||||
with {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
|
||||
{:ok, [{:markup, ~s|<blockquote author="#{escape_html(author)}">|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
|
||||
else
|
||||
_ ->
|
||||
{:ok, [text: escape_nl2br(~s|[bq="#{author}"]|)], r_tokens}
|
||||
case well_formed_including_paragraphs(parser, r_tokens) do
|
||||
{:ok, tree, [{:blockquote_close, _close} | r2_tokens]} ->
|
||||
{:ok, [{:markup, ~s|<blockquote author="#{escape_html(author)}">|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_nl2br(~s|[bq="#{author}"]|)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
|
||||
defp blockquote(parser, [{:blockquote_open, open} | r_tokens]) do
|
||||
with {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
|
||||
{:ok, [{:markup, ~s|<blockquote>|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
|
||||
else
|
||||
_ ->
|
||||
{:ok, [text: escape_nl2br(open)], r_tokens}
|
||||
case well_formed_including_paragraphs(parser, r_tokens) do
|
||||
{:ok, tree, [{:blockquote_close, _close} | r2_tokens]} ->
|
||||
{:ok, [{:markup, ~s|<blockquote>|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -150,11 +149,12 @@ defmodule Textile.Parser do
|
|||
# spoiler_open well_formed_including_paragraphs spoiler_close;
|
||||
#
|
||||
defp spoiler(parser, [{:spoiler_open, open} | r_tokens]) do
|
||||
with {:ok, tree, [{:spoiler_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
|
||||
{:ok, [{:markup, ~s|<span class="spoiler">|}, tree, {:markup, ~s|</span>|}], r2_tokens}
|
||||
else
|
||||
_ ->
|
||||
{:ok, [text: escape_nl2br(open)], r_tokens}
|
||||
case well_formed_including_paragraphs(parser, r_tokens) do
|
||||
{:ok, tree, [{:spoiler_close, _close} | r2_tokens]} ->
|
||||
{:ok, [{:markup, ~s|<span class="spoiler">|}, tree, {:markup, ~s|</span>|}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -167,11 +167,12 @@ defmodule Textile.Parser do
|
|||
# link_start well_formed_including_paragraphs link_end link_url;
|
||||
#
|
||||
defp link(parser, [{:link_start, start} | r_tokens]) do
|
||||
with {:ok, tree, [{:link_end, _end}, {:link_url, url} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do
|
||||
{:ok, [{:markup, ~s|<a href="#{escape_html(url)}">|}, tree, {:markup, ~s|</a>|}], r2_tokens}
|
||||
else
|
||||
_ ->
|
||||
{:ok, [text: escape_nl2br(start)], r_tokens}
|
||||
case well_formed_including_paragraphs(parser, r_tokens) do
|
||||
{:ok, tree, [{:link_end, _end}, {:link_url, url} | r2_tokens]} ->
|
||||
{:ok, [{:markup, ~s|<a href="#{escape_html(url)}">|}, tree, {:markup, ~s|</a>|}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_nl2br(start)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -4,20 +4,22 @@ defmodule Textile.ParserHelpers do
|
|||
defmacro attribute_parser(name, open_token, close_token, open_tag, close_tag) do
|
||||
quote do
|
||||
defp unquote(name)(parser, [{unquote(open_token), open} | r_tokens]) do
|
||||
with {:ok, tree, [{unquote(close_token), _close} | r2_tokens]} <- well_formed(parser, r_tokens) do
|
||||
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
|
||||
else
|
||||
_ ->
|
||||
{:ok, [text: escape_html(open)], r_tokens}
|
||||
case well_formed(parser, r_tokens) do
|
||||
{:ok, tree, [{unquote(close_token), _close} | r2_tokens]} ->
|
||||
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_html(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
|
||||
defp unquote(name)(parser, [{unquote(:"b_#{open_token}"), open} | r_tokens]) do
|
||||
with {:ok, tree, [{unquote(:"b_#{close_token}"), _close} | r2_tokens]} <- well_formed(parser, r_tokens) do
|
||||
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
|
||||
else
|
||||
_ ->
|
||||
{:ok, [text: escape_html(open)], r_tokens}
|
||||
case well_formed(parser, r_tokens) do
|
||||
{:ok, tree, [{unquote(:"b_#{close_token}"), _close} | r2_tokens]} ->
|
||||
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_html(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -26,6 +28,11 @@ defmodule Textile.ParserHelpers do
|
|||
end
|
||||
end
|
||||
|
||||
def remove_linefeeds(text) do
|
||||
text
|
||||
|> String.replace("\r", "")
|
||||
end
|
||||
|
||||
def escape_nl2br(text) do
|
||||
text
|
||||
|> String.split("\n", trim: true)
|
||||
|
|
Loading…
Reference in a new issue