mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-27 13:47:58 +01:00
discard old parser
This commit is contained in:
parent
6ccb3f645a
commit
4c82c90cea
13 changed files with 480 additions and 1394 deletions
|
@ -1,238 +0,0 @@
|
|||
defmodule FastTextile.Lexer do
|
||||
import NimbleParsec
|
||||
|
||||
space =
|
||||
utf8_char('\f \r\t\u00a0\u1680\u180e\u202f\u205f\u3000' ++ Enum.to_list(0x2000..0x200a))
|
||||
|
||||
extended_space =
|
||||
choice([
|
||||
space,
|
||||
string("\n"),
|
||||
eos()
|
||||
])
|
||||
|
||||
space_token =
|
||||
space
|
||||
|> unwrap_and_tag(:space)
|
||||
|
||||
double_newline =
|
||||
string("\n")
|
||||
|> repeat(space)
|
||||
|> string("\n")
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:double_newline)
|
||||
|
||||
newline =
|
||||
string("\n")
|
||||
|> unwrap_and_tag(:newline)
|
||||
|
||||
link_ending_characters =
|
||||
utf8_char('@#$%&(),.:;<=?\\`|\'')
|
||||
|
||||
bracket_link_ending_characters =
|
||||
utf8_char('" []')
|
||||
|
||||
end_of_link =
|
||||
choice([
|
||||
concat(link_ending_characters, extended_space),
|
||||
extended_space
|
||||
])
|
||||
|
||||
bracketed_literal =
|
||||
ignore(string("[=="))
|
||||
|> repeat(lookahead_not(string("==]")) |> utf8_char([]))
|
||||
|> ignore(string("==]"))
|
||||
|
||||
unbracketed_literal =
|
||||
ignore(string("=="))
|
||||
|> repeat(lookahead_not(string("==")) |> utf8_char([]))
|
||||
|> ignore(string("=="))
|
||||
|
||||
literal =
|
||||
choice([
|
||||
bracketed_literal,
|
||||
unbracketed_literal
|
||||
])
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:literal)
|
||||
|
||||
bq_cite_start =
|
||||
string("[bq=\"")
|
||||
|> unwrap_and_tag(:bq_cite_start)
|
||||
|
||||
bq_cite_open =
|
||||
string("\"]")
|
||||
|> unwrap_and_tag(:bq_cite_open)
|
||||
|
||||
bq_open =
|
||||
string("[bq]")
|
||||
|> unwrap_and_tag(:bq_open)
|
||||
|
||||
bq_close =
|
||||
string("[/bq]")
|
||||
|> unwrap_and_tag(:bq_close)
|
||||
|
||||
spoiler_open =
|
||||
string("[spoiler]")
|
||||
|> unwrap_and_tag(:spoiler_open)
|
||||
|
||||
spoiler_close =
|
||||
string("[/spoiler]")
|
||||
|> unwrap_and_tag(:spoiler_close)
|
||||
|
||||
image_url_scheme =
|
||||
choice([
|
||||
string("//"),
|
||||
string("/"),
|
||||
string("https://"),
|
||||
string("http://")
|
||||
])
|
||||
|
||||
link_url_scheme =
|
||||
choice([
|
||||
string("#"),
|
||||
image_url_scheme
|
||||
])
|
||||
|
||||
unbracketed_url =
|
||||
string(":")
|
||||
|> concat(link_url_scheme)
|
||||
|> repeat(lookahead_not(end_of_link) |> utf8_char([]))
|
||||
|
||||
unbracketed_image_url =
|
||||
unbracketed_url
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:unbracketed_image_url)
|
||||
|
||||
unbracketed_link_url =
|
||||
string("\"")
|
||||
|> concat(unbracketed_url)
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:unbracketed_link_url)
|
||||
|
||||
unbracketed_image =
|
||||
ignore(string("!"))
|
||||
|> concat(image_url_scheme)
|
||||
|> repeat(utf8_char(not: ?!))
|
||||
|> ignore(string("!"))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:unbracketed_image)
|
||||
|> concat(optional(unbracketed_image_url))
|
||||
|
||||
bracketed_image =
|
||||
ignore(string("[!"))
|
||||
|> concat(image_url_scheme)
|
||||
|> repeat(lookahead_not(string("!]")) |> utf8_char([]))
|
||||
|> ignore(string("!]"))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:bracketed_image)
|
||||
|> concat(optional(unbracketed_image_url))
|
||||
|
||||
link_delim =
|
||||
string("\"")
|
||||
|> unwrap_and_tag(:link_delim)
|
||||
|
||||
bracketed_link_open =
|
||||
string("[\"")
|
||||
|> unwrap_and_tag(:bracketed_link_open)
|
||||
|
||||
bracketed_link_url =
|
||||
string("\":")
|
||||
|> concat(link_url_scheme)
|
||||
|> repeat(lookahead_not(bracket_link_ending_characters) |> utf8_char([]))
|
||||
|> ignore(string("]"))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:bracketed_link_url)
|
||||
|
||||
bracketed_b_open = string("[**") |> unwrap_and_tag(:bracketed_b_open)
|
||||
bracketed_i_open = string("[__") |> unwrap_and_tag(:bracketed_i_open)
|
||||
bracketed_strong_open = string("[*") |> unwrap_and_tag(:bracketed_strong_open)
|
||||
bracketed_em_open = string("[_") |> unwrap_and_tag(:bracketed_em_open)
|
||||
bracketed_code_open = string("[@") |> unwrap_and_tag(:bracketed_code_open)
|
||||
bracketed_ins_open = string("[+") |> unwrap_and_tag(:bracketed_ins_open)
|
||||
bracketed_sup_open = string("[^") |> unwrap_and_tag(:bracketed_sup_open)
|
||||
bracketed_del_open = string("[-") |> unwrap_and_tag(:bracketed_del_open)
|
||||
bracketed_sub_open = string("[~") |> unwrap_and_tag(:bracketed_sub_open)
|
||||
|
||||
bracketed_b_close = string("**]") |> unwrap_and_tag(:bracketed_b_close)
|
||||
bracketed_i_close = string("__]") |> unwrap_and_tag(:bracketed_i_close)
|
||||
bracketed_strong_close = string("*]") |> unwrap_and_tag(:bracketed_strong_close)
|
||||
bracketed_em_close = string("_]") |> unwrap_and_tag(:bracketed_em_close)
|
||||
bracketed_code_close = string("@]") |> unwrap_and_tag(:bracketed_code_close)
|
||||
bracketed_ins_close = string("+]") |> unwrap_and_tag(:bracketed_ins_close)
|
||||
bracketed_sup_close = string("^]") |> unwrap_and_tag(:bracketed_sup_close)
|
||||
bracketed_del_close = string("-]") |> unwrap_and_tag(:bracketed_del_close)
|
||||
bracketed_sub_close = string("~]") |> unwrap_and_tag(:bracketed_sub_close)
|
||||
|
||||
b_delim = string("**") |> unwrap_and_tag(:b_delim)
|
||||
i_delim = string("__") |> unwrap_and_tag(:i_delim)
|
||||
strong_delim = string("*") |> unwrap_and_tag(:strong_delim)
|
||||
em_delim = string("_") |> unwrap_and_tag(:em_delim)
|
||||
code_delim = string("@") |> unwrap_and_tag(:code_delim)
|
||||
ins_delim = string("+") |> unwrap_and_tag(:ins_delim)
|
||||
sup_delim = string("^") |> unwrap_and_tag(:sup_delim)
|
||||
sub_delim = string("~") |> unwrap_and_tag(:sub_delim)
|
||||
|
||||
del_delim = lookahead_not(string("-"), string(">")) |> unwrap_and_tag(:del_delim)
|
||||
|
||||
quicktxt =
|
||||
utf8_char('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*@_{}')
|
||||
|> unwrap_and_tag(:quicktxt)
|
||||
|
||||
char =
|
||||
utf8_char([])
|
||||
|> unwrap_and_tag(:char)
|
||||
|
||||
textile =
|
||||
choice([
|
||||
literal,
|
||||
double_newline,
|
||||
newline,
|
||||
space_token,
|
||||
bq_cite_start,
|
||||
bq_cite_open,
|
||||
bq_open,
|
||||
bq_close,
|
||||
spoiler_open,
|
||||
spoiler_close,
|
||||
unbracketed_image,
|
||||
bracketed_image,
|
||||
bracketed_link_open,
|
||||
bracketed_link_url,
|
||||
unbracketed_link_url,
|
||||
link_delim,
|
||||
bracketed_b_open,
|
||||
bracketed_i_open,
|
||||
bracketed_strong_open,
|
||||
bracketed_em_open,
|
||||
bracketed_code_open,
|
||||
bracketed_ins_open,
|
||||
bracketed_sup_open,
|
||||
bracketed_del_open,
|
||||
bracketed_sub_open,
|
||||
bracketed_b_close,
|
||||
bracketed_i_close,
|
||||
bracketed_strong_close,
|
||||
bracketed_em_close,
|
||||
bracketed_code_close,
|
||||
bracketed_ins_close,
|
||||
bracketed_sup_close,
|
||||
bracketed_del_close,
|
||||
bracketed_sub_close,
|
||||
b_delim,
|
||||
i_delim,
|
||||
strong_delim,
|
||||
em_delim,
|
||||
code_delim,
|
||||
ins_delim,
|
||||
sup_delim,
|
||||
del_delim,
|
||||
sub_delim,
|
||||
quicktxt,
|
||||
char
|
||||
])
|
||||
|> repeat()
|
||||
|> eos()
|
||||
|
||||
defparsec :lex, textile
|
||||
end
|
|
@ -1,371 +0,0 @@
|
|||
defmodule FastTextile.Parser do
|
||||
alias FastTextile.Lexer
|
||||
alias Phoenix.HTML
|
||||
|
||||
def parse(parser, input) do
|
||||
parser = Map.put(parser, :state, %{})
|
||||
|
||||
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(String.trim(input)),
|
||||
{:ok, tree, []} <- repeat(&textile/2, parser, tokens)
|
||||
do
|
||||
partial_flatten(tree)
|
||||
else
|
||||
_ ->
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
# Helper to turn a parse tree into a string
|
||||
def flatten(tree) do
|
||||
tree
|
||||
|> List.flatten()
|
||||
|> Enum.map_join("", fn {_k, v} -> v end)
|
||||
end
|
||||
|
||||
# Helper to escape HTML
|
||||
defp escape(text) do
|
||||
text
|
||||
|> HTML.html_escape()
|
||||
|> HTML.safe_to_string()
|
||||
end
|
||||
|
||||
# Helper to turn a parse tree into a list
|
||||
def partial_flatten(tree) do
|
||||
tree
|
||||
|> List.flatten()
|
||||
|> Enum.chunk_by(fn {k, _v} -> k end)
|
||||
|> Enum.map(fn list ->
|
||||
[{type, _v} | _rest] = list
|
||||
|
||||
value = Enum.map_join(list, "", fn {_k, v} -> v end)
|
||||
|
||||
{type, value}
|
||||
end)
|
||||
end
|
||||
|
||||
defp put_state(parser, new_state) do
|
||||
state = Map.put(parser.state, new_state, true)
|
||||
Map.put(parser, :state, state)
|
||||
end
|
||||
|
||||
# Helper corresponding to Kleene star (*) operator
|
||||
# Match a specificed rule zero or more times
|
||||
defp repeat(rule, parser, tokens) do
|
||||
case rule.(parser, tokens) do
|
||||
{:ok, tree, r_tokens} ->
|
||||
{:ok, tree2, r2_tokens} = repeat(rule, parser, r_tokens)
|
||||
{:ok, [tree, tree2], r2_tokens}
|
||||
|
||||
_ ->
|
||||
{:ok, [], tokens}
|
||||
end
|
||||
end
|
||||
|
||||
# Helper to match a simple recursive grammar rule of the following form:
|
||||
#
|
||||
# open_token callback* close_token
|
||||
#
|
||||
defp simple_recursive(open_token, close_token, open_tag, close_tag, callback, parser, [{open_token, open} | r_tokens]) do
|
||||
case repeat(callback, parser, r_tokens) do
|
||||
{:ok, tree, [{^close_token, _} | r2_tokens]} ->
|
||||
{:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
defp simple_recursive(_open_token, _close_token, _open_tag, _close_tag, _callback, _parser, _tokens) do
|
||||
{:error, "Expected a simple recursive rule"}
|
||||
end
|
||||
|
||||
# Helper to match a simple recursive grammar rule with negative lookahead:
|
||||
#
|
||||
# open_token callback* close_token (?!lookahead_not)
|
||||
#
|
||||
defp simple_lookahead_not(open_token, close_token, open_tag, close_tag, lookahead_not, callback, state, parser, [{open_token, open} | r_tokens]) do
|
||||
case parser.state do
|
||||
%{^state => _} ->
|
||||
{:error, "End of rule"}
|
||||
|
||||
_ ->
|
||||
case r_tokens do
|
||||
[{forbidden_lookahead, _la} | _] when forbidden_lookahead in [:space, :newline] ->
|
||||
{:ok, [{:text, escape(open)}], r_tokens}
|
||||
|
||||
_ ->
|
||||
case repeat(callback, put_state(parser, state), r_tokens) do
|
||||
{:ok, tree, [{^close_token, close}, {^lookahead_not, ln} | r2_tokens]} ->
|
||||
{:ok, [{:text, escape(open)}, tree, {:text, escape(close)}], [{lookahead_not, ln} | r2_tokens]}
|
||||
|
||||
{:ok, tree, [{^close_token, _} | r2_tokens]} ->
|
||||
{:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
defp simple_lookahead_not(_open_token, _close_token, _open_tag, _close_tag, _lookahead_not, _callback, _state, _parser, _tokens) do
|
||||
{:error, "Expected a simple lookahead not rule"}
|
||||
end
|
||||
|
||||
# Helper to efficiently assemble a UTF-8 binary from tokens of the
|
||||
# given type
|
||||
defp assemble_binary(token_type, accumulator, [{token_type, t} | stream]) do
|
||||
assemble_binary(token_type, accumulator <> <<t::utf8>>, stream)
|
||||
end
|
||||
defp assemble_binary(_token_type, accumulator, tokens), do: {accumulator, tokens}
|
||||
|
||||
#
|
||||
# inline_textile_element =
|
||||
# opening_markup inline_textile_element* closing_markup (?!quicktxt) |
|
||||
# closing_markup (?=quicktxt) |
|
||||
# link_delim block_textile_element* link_url |
|
||||
# image url? |
|
||||
# code_delim inline_textile_element* code_delim |
|
||||
# inline_textile_element_not_opening_markup;
|
||||
#
|
||||
|
||||
defp inline_textile_element(parser, tokens) do
|
||||
[
|
||||
{:b_delim, :b, "<b>", "</b>"},
|
||||
{:i_delim, :i, "<i>", "</i>"},
|
||||
{:strong_delim, :strong, "<strong>", "</strong>"},
|
||||
{:em_delim, :em, "<em>", "</em>"},
|
||||
{:ins_delim, :ins, "<ins>", "</ins>"},
|
||||
{:sup_delim, :sup, "<sup>", "</sup>"},
|
||||
{:del_delim, :del, "<del>", "</del>"},
|
||||
{:sub_delim, :sub, "<sub>", "</sub>"}
|
||||
]
|
||||
|> Enum.find_value(fn {delim_token, state, open_tag, close_tag} ->
|
||||
simple_lookahead_not(
|
||||
delim_token,
|
||||
delim_token,
|
||||
open_tag,
|
||||
close_tag,
|
||||
:quicktxt,
|
||||
&inline_textile_element/2,
|
||||
state,
|
||||
parser,
|
||||
tokens
|
||||
)
|
||||
|> case do
|
||||
{:ok, tree, r_tokens} ->
|
||||
{:ok, tree, r_tokens}
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end)
|
||||
|> case do
|
||||
nil -> inner_inline_textile_element(parser, tokens)
|
||||
value -> value
|
||||
end
|
||||
end
|
||||
|
||||
defp inner_inline_textile_element(parser, [{token, t}, {:quicktxt, q} | r_tokens])
|
||||
when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim]
|
||||
do
|
||||
case inline_textile_element(parser, [{:quicktxt, q} | r_tokens]) do
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape(t)}, tree], r2_tokens}
|
||||
|
||||
_ ->
|
||||
{:ok, [{:text, escape(t)}], [{:quicktxt, q} | r_tokens]}
|
||||
end
|
||||
end
|
||||
defp inner_inline_textile_element(parser, [{:link_delim, open} | r_tokens]) do
|
||||
case repeat(&block_textile_element/2, parser, r_tokens) do
|
||||
{:ok, tree, [{:unbracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} ->
|
||||
href = escape(url)
|
||||
|
||||
{:ok, [{:markup, "<a href=\""}, {:markup, href}, {:markup, "\">"}, tree, {:markup, "</a>"}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
defp inner_inline_textile_element(parser, [{:bracketed_link_open, open} | r_tokens]) do
|
||||
case repeat(&inline_textile_element/2, parser, r_tokens) do
|
||||
{:ok, tree, [{:bracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} ->
|
||||
href = escape(url)
|
||||
|
||||
{:ok, [{:markup, "<a href=\""}, {:markup, href}, {:markup, "\">"}, tree, {:markup, "</a>"}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
defp inner_inline_textile_element(parser, [{token, img}, {:unbracketed_image_url, <<":", url::binary>>} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do
|
||||
img = parser.image_transform.(img)
|
||||
|
||||
{:ok, [{:markup, "<a href=\""}, {:markup, escape(url)}, {:markup, "\"><span class=\"imgspoiler\"><img src=\""}, {:markup, escape(img)}, {:markup, "\"/></span></a>"}], r_tokens}
|
||||
end
|
||||
defp inner_inline_textile_element(parser, [{token, img} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do
|
||||
img = parser.image_transform.(img)
|
||||
|
||||
{:ok, [{:markup, "<span class=\"imgspoiler\"><img src=\""}, {:markup, escape(img)}, {:markup, "\"/></span>"}], r_tokens}
|
||||
end
|
||||
defp inner_inline_textile_element(parser, [{:code_delim, open} | r_tokens]) do
|
||||
case parser.state do
|
||||
%{code: _} ->
|
||||
{:error, "End of rule"}
|
||||
|
||||
_ ->
|
||||
case repeat(&inline_textile_element/2, put_state(parser, :code), r_tokens) do
|
||||
{:ok, tree, [{:code_delim, _} | r2_tokens]} ->
|
||||
{:ok, [{:markup, "<code>"}, tree, {:markup, "</code>"}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
end
|
||||
defp inner_inline_textile_element(parser, tokens) do
|
||||
inline_textile_element_not_opening_markup(parser, tokens)
|
||||
end
|
||||
|
||||
#
|
||||
# bq_cite_text = literal | char | space | quicktxt;
|
||||
#
|
||||
|
||||
# Note that text is not escaped here because it will be escaped
|
||||
# when the tree is flattened
|
||||
defp bq_cite_text(_parser, [{:literal, lit} | r_tokens]) do
|
||||
{:ok, [{:text, lit}], r_tokens}
|
||||
end
|
||||
defp bq_cite_text(_parser, [{:char, lit} | r_tokens]) do
|
||||
{:ok, [{:text, <<lit::utf8>>}], r_tokens}
|
||||
end
|
||||
defp bq_cite_text(_parser, [{:space, _} | r_tokens]) do
|
||||
{:ok, [{:text, " "}], r_tokens}
|
||||
end
|
||||
defp bq_cite_text(_parser, [{:quicktxt, lit} | r_tokens]) do
|
||||
{:ok, [{:text, <<lit::utf8>>}], r_tokens}
|
||||
end
|
||||
defp bq_cite_text(_parser, _tokens) do
|
||||
{:error, "Expected cite tokens"}
|
||||
end
|
||||
|
||||
#
|
||||
# inline_textile_element_not_opening_markup =
|
||||
# literal | space | char |
|
||||
# quicktxt opening_markup quicktxt |
|
||||
# quicktxt |
|
||||
# opening_block_tag block_textile_element* closing_block_tag;
|
||||
#
|
||||
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:literal, lit} | r_tokens]) do
|
||||
{:ok, [{:markup, "<span class=\"literal\">"}, {:markup, escape(lit)}, {:markup, "</span>"}], r_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:space, _} | r_tokens]) do
|
||||
{:ok, [{:text, " "}], r_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:char, lit} | r_tokens]) do
|
||||
{binary, r2_tokens} = assemble_binary(:char, <<lit::utf8>>, r_tokens)
|
||||
|
||||
{:ok, [{:text, escape(binary)}], r2_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, q1}, {token, t}, {:quicktxt, q2} | r_tokens])
|
||||
when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim]
|
||||
do
|
||||
{:ok, [{:text, escape(<<q1::utf8>>)}, {:text, escape(t)}, {:text, escape(<<q2::utf8>>)}], r_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, lit} | r_tokens]) do
|
||||
{:ok, [{:text, escape(<<lit::utf8>>)}], r_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(parser, [{:bq_cite_start, start} | r_tokens]) do
|
||||
case repeat(&bq_cite_text/2, parser, r_tokens) do
|
||||
{:ok, tree, [{:bq_cite_open, open} | r2_tokens]} ->
|
||||
case repeat(&block_textile_element/2, parser, r2_tokens) do
|
||||
{:ok, tree2, [{:bq_close, _} | r3_tokens]} ->
|
||||
cite = escape(flatten(tree))
|
||||
|
||||
{:ok, [{:markup, "<blockquote author=\""}, {:markup, cite}, {:markup, "\">"}, tree2, {:markup, "</blockquote>"}], r3_tokens}
|
||||
|
||||
{:ok, tree2, r3_tokens} ->
|
||||
{:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}, tree2], r3_tokens}
|
||||
|
||||
_ ->
|
||||
{:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}], r_tokens}
|
||||
end
|
||||
|
||||
_ ->
|
||||
{:ok, [{:text, escape(start)}], r_tokens}
|
||||
end
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:bq_cite_open, tok} | r_tokens]) do
|
||||
{:ok, [{:text, escape(tok)}], r_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(parser, tokens) do
|
||||
[
|
||||
{:bq_open, :bq_close, "<blockquote>", "</blockquote>"},
|
||||
{:spoiler_open, :spoiler_close, "<span class=\"spoiler\">", "</span>"},
|
||||
{:bracketed_b_open, :bracketed_b_close, "<b>", "</b>"},
|
||||
{:bracketed_i_open, :bracketed_i_close, "<i>", "</i>"},
|
||||
{:bracketed_strong_open, :bracketed_strong_close, "<strong>", "</strong>"},
|
||||
{:bracketed_em_open, :bracketed_em_close, "<em>", "</em>"},
|
||||
{:bracketed_code_open, :bracketed_code_close, "<code>", "</code>"},
|
||||
{:bracketed_ins_open, :bracketed_ins_close, "<ins>", "</ins>"},
|
||||
{:bracketed_sup_open, :bracketed_sup_close, "<sup>", "</sup>"},
|
||||
{:bracketed_del_open, :bracketed_del_close, "<del>", "</del>"},
|
||||
{:bracketed_sub_open, :bracketed_sub_close, "<sub>", "</sub>"}
|
||||
]
|
||||
|> Enum.find_value(fn {open_token, close_token, open_tag, close_tag} ->
|
||||
simple_recursive(
|
||||
open_token,
|
||||
close_token,
|
||||
open_tag,
|
||||
close_tag,
|
||||
&block_textile_element/2,
|
||||
parser,
|
||||
tokens
|
||||
)
|
||||
|> case do
|
||||
{:ok, tree, r_tokens} ->
|
||||
{:ok, tree, r_tokens}
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end)
|
||||
|> Kernel.||({:error, "Expected block markup"})
|
||||
end
|
||||
|
||||
#
|
||||
# block_textile_element =
|
||||
# double_newline | newline | inline_textile_element;
|
||||
#
|
||||
|
||||
defp block_textile_element(_parser, [{:double_newline, _} | r_tokens]) do
|
||||
{:ok, [{:markup, "<br/><br/>"}], r_tokens}
|
||||
end
|
||||
defp block_textile_element(_parser, [{:newline, _} | r_tokens]) do
|
||||
{:ok, [{:markup, "<br/>"}], r_tokens}
|
||||
end
|
||||
defp block_textile_element(parser, tokens) do
|
||||
inline_textile_element(parser, tokens)
|
||||
end
|
||||
|
||||
#
|
||||
# textile =
|
||||
# (block_textile_element | TOKEN)* eos;
|
||||
#
|
||||
|
||||
defp textile(parser, tokens) do
|
||||
case block_textile_element(parser, tokens) do
|
||||
{:ok, tree, r_tokens} ->
|
||||
{:ok, tree, r_tokens}
|
||||
|
||||
_ ->
|
||||
case tokens do
|
||||
[{_, string} | r_tokens] ->
|
||||
{:ok, [{:text, escape(string)}], r_tokens}
|
||||
|
||||
_ ->
|
||||
{:error, "Expected textile"}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,17 +1,12 @@
|
|||
defmodule Philomena.Textile.Renderer do
|
||||
# todo: belongs in PhilomenaWeb
|
||||
alias Textile.Parser, as: SlowParser
|
||||
alias FastTextile.Parser, as: FastParser
|
||||
alias Textile.Parser
|
||||
alias Philomena.Images.Image
|
||||
alias Philomena.Repo
|
||||
import Phoenix.HTML
|
||||
import Phoenix.HTML.Link
|
||||
import Ecto.Query
|
||||
|
||||
@parser %SlowParser{
|
||||
image_transform: &Camo.Image.image_url/1
|
||||
}
|
||||
|
||||
# Kill bogus compile time dependency on ImageView
|
||||
@image_view Module.concat(["PhilomenaWeb.ImageView"])
|
||||
|
||||
|
@ -20,17 +15,8 @@ defmodule Philomena.Textile.Renderer do
|
|||
end
|
||||
|
||||
def render_collection(posts, conn) do
|
||||
parser =
|
||||
case conn.cookies["new_parser"] do
|
||||
"true" -> FastParser
|
||||
_ -> SlowParser
|
||||
end
|
||||
|
||||
parsed =
|
||||
posts
|
||||
|> Enum.map(fn post ->
|
||||
parser.parse(@parser, post.body)
|
||||
end)
|
||||
opts = %{image_transform: &Camo.Image.image_url/1}
|
||||
parsed = Enum.map(posts, &Parser.parse(opts, &1.body))
|
||||
|
||||
images =
|
||||
parsed
|
||||
|
|
|
@ -40,7 +40,6 @@ defmodule PhilomenaWeb.SettingController do
|
|||
|> set_cookie(user_params, "webm", "webm")
|
||||
|> set_cookie(user_params, "chan_nsfw", "chan_nsfw")
|
||||
|> set_cookie(user_params, "hide_staff_tools", "hide_staff_tools")
|
||||
|> set_cookie(user_params, "new_parser", "new_parser")
|
||||
end
|
||||
|
||||
defp set_cookie(conn, params, param_name, cookie_name) do
|
||||
|
|
|
@ -122,10 +122,6 @@ h1 Content Settings
|
|||
=> label f, :chan_nsfw, "Show NSFW channels"
|
||||
=> checkbox f, :chan_nsfw
|
||||
.fieldlabel: i Show streams marked as NSFW on the channels page.
|
||||
.field
|
||||
=> label f, :new_parser, "Use experimental parser"
|
||||
=> checkbox f, :new_parser
|
||||
.fieldlabel: i Use the experimental Textile parser.
|
||||
= if staff?(@conn.assigns.current_user) do
|
||||
.field
|
||||
=> label f, :hide_staff_tools
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
defmodule PhilomenaWeb.PostView do
|
||||
alias Philomena.Attribution
|
||||
alias FastTextile.Parser
|
||||
alias Textile.Parser
|
||||
|
||||
use PhilomenaWeb, :view
|
||||
|
||||
|
|
|
@ -1,41 +0,0 @@
|
|||
defmodule Textile.Helpers do
|
||||
import NimbleParsec
|
||||
|
||||
# Helper to "undo" a tokenization and convert it back
|
||||
# to a string
|
||||
def unwrap([{_name, value}]), do: value
|
||||
|
||||
# Lots of extra unicode space characters
|
||||
def space do
|
||||
choice([
|
||||
utf8_char('\n\r\f \t\u00a0\u1680\u180e\u202f\u205f\u3000'),
|
||||
utf8_char([0x2000..0x200a])
|
||||
])
|
||||
end
|
||||
|
||||
# Characters which are valid before and after the main markup characters.
|
||||
def special_characters do
|
||||
choice([
|
||||
space(),
|
||||
utf8_char('#$%&(),./:;<=?\\`|\'')
|
||||
])
|
||||
end
|
||||
|
||||
# Simple tag for a markup element that must
|
||||
# be succeeded immediately by a non-space character
|
||||
def markup_open_tag(str, char \\ nil, tag_name) do
|
||||
char = char || binary_head(str)
|
||||
|
||||
open_stops =
|
||||
choice([
|
||||
space(),
|
||||
string(char)
|
||||
])
|
||||
|
||||
string(str)
|
||||
|> lookahead_not(open_stops)
|
||||
|> unwrap_and_tag(:"#{tag_name}_open")
|
||||
end
|
||||
|
||||
defp binary_head(<<c::utf8, _rest::binary>>), do: <<c::utf8>>
|
||||
end
|
|
@ -1,15 +1,43 @@
|
|||
defmodule Textile.Lexer do
|
||||
import NimbleParsec
|
||||
import Textile.Helpers
|
||||
import Textile.MarkupLexer
|
||||
import Textile.UrlLexer
|
||||
|
||||
space =
|
||||
utf8_char('\f \r\t\u00a0\u1680\u180e\u202f\u205f\u3000' ++ Enum.to_list(0x2000..0x200a))
|
||||
|
||||
# Structural tags
|
||||
extended_space =
|
||||
choice([
|
||||
space,
|
||||
string("\n"),
|
||||
eos()
|
||||
])
|
||||
|
||||
space_token =
|
||||
space
|
||||
|> unwrap_and_tag(:space)
|
||||
|
||||
double_newline =
|
||||
string("\n")
|
||||
|> repeat(space)
|
||||
|> string("\n")
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:double_newline)
|
||||
|
||||
newline =
|
||||
string("\n")
|
||||
|> unwrap_and_tag(:newline)
|
||||
|
||||
link_ending_characters =
|
||||
utf8_char('@#$%&(),.:;<=?\\`|\'')
|
||||
|
||||
bracket_link_ending_characters =
|
||||
utf8_char('" []')
|
||||
|
||||
end_of_link =
|
||||
choice([
|
||||
concat(link_ending_characters, extended_space),
|
||||
extended_space
|
||||
])
|
||||
|
||||
# Literals enclosed via [== ==]
|
||||
# Will never contain any markup
|
||||
bracketed_literal =
|
||||
ignore(string("[=="))
|
||||
|> repeat(lookahead_not(string("==]")) |> utf8_char([]))
|
||||
|
@ -28,231 +56,183 @@ defmodule Textile.Lexer do
|
|||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:literal)
|
||||
|
||||
blockquote_cite =
|
||||
lookahead_not(string("\""))
|
||||
|> choice([
|
||||
literal |> reduce(:unwrap),
|
||||
utf8_char([])
|
||||
])
|
||||
|> repeat()
|
||||
bq_cite_start =
|
||||
string("[bq=\"")
|
||||
|> unwrap_and_tag(:bq_cite_start)
|
||||
|
||||
# Blockquote opening tag with cite: [bq="the author"]
|
||||
# Cite can contain bracketed literals or text
|
||||
blockquote_open_cite =
|
||||
ignore(string("[bq=\""))
|
||||
|> concat(blockquote_cite)
|
||||
|> ignore(string("\"]"))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:blockquote_open_cite)
|
||||
bq_cite_open =
|
||||
string("\"]")
|
||||
|> unwrap_and_tag(:bq_cite_open)
|
||||
|
||||
# Blockquote opening tag
|
||||
blockquote_open =
|
||||
bq_open =
|
||||
string("[bq]")
|
||||
|> unwrap_and_tag(:blockquote_open)
|
||||
|> unwrap_and_tag(:bq_open)
|
||||
|
||||
# Blockquote closing tag
|
||||
blockquote_close =
|
||||
bq_close =
|
||||
string("[/bq]")
|
||||
|> unwrap_and_tag(:blockquote_close)
|
||||
|> unwrap_and_tag(:bq_close)
|
||||
|
||||
# Spoiler open tag
|
||||
spoiler_open =
|
||||
string("[spoiler]")
|
||||
|> unwrap_and_tag(:spoiler_open)
|
||||
|
||||
# Spoiler close tag
|
||||
spoiler_close =
|
||||
string("[/spoiler]")
|
||||
|> unwrap_and_tag(:spoiler_close)
|
||||
|
||||
|
||||
# Images
|
||||
|
||||
|
||||
image_url_with_title =
|
||||
url_ending_in(string("("))
|
||||
|> unwrap_and_tag(:image_url)
|
||||
|> concat(
|
||||
ignore(string("("))
|
||||
|> repeat(utf8_char(not: ?)))
|
||||
|> ignore(string(")"))
|
||||
|> lookahead(string("!"))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:image_title)
|
||||
)
|
||||
|
||||
image_url_without_title =
|
||||
url_ending_in(string("!"))
|
||||
|> unwrap_and_tag(:image_url)
|
||||
|
||||
image_url =
|
||||
image_url_scheme =
|
||||
choice([
|
||||
image_url_with_title,
|
||||
image_url_without_title
|
||||
string("//"),
|
||||
string("/"),
|
||||
string("https://"),
|
||||
string("http://")
|
||||
])
|
||||
|
||||
bracketed_image_with_link =
|
||||
ignore(string("[!"))
|
||||
|> concat(image_url)
|
||||
|> ignore(string("!:"))
|
||||
|> concat(
|
||||
url_ending_in(string("]"))
|
||||
|> unwrap_and_tag(:image_link_url)
|
||||
)
|
||||
|
||||
bracketed_image_without_link =
|
||||
ignore(string("[!"))
|
||||
|> concat(image_url)
|
||||
|> ignore(string("!]"))
|
||||
|
||||
image_with_link =
|
||||
ignore(string("!"))
|
||||
|> concat(image_url)
|
||||
|> ignore(string("!:"))
|
||||
|> concat(
|
||||
url_ending_in(space())
|
||||
|> unwrap_and_tag(:image_link_url)
|
||||
)
|
||||
|
||||
image_without_link =
|
||||
ignore(string("!"))
|
||||
|> concat(image_url)
|
||||
|> ignore(string("!"))
|
||||
|
||||
image =
|
||||
link_url_scheme =
|
||||
choice([
|
||||
bracketed_image_with_link,
|
||||
bracketed_image_without_link,
|
||||
image_with_link,
|
||||
image_without_link
|
||||
string("#"),
|
||||
image_url_scheme
|
||||
])
|
||||
|
||||
unbracketed_url =
|
||||
string(":")
|
||||
|> concat(link_url_scheme)
|
||||
|> repeat(lookahead_not(end_of_link) |> utf8_char([]))
|
||||
|
||||
# Links
|
||||
unbracketed_image_url =
|
||||
unbracketed_url
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:unbracketed_image_url)
|
||||
|
||||
|
||||
{link_markup_start, link_markup_element} = markup_ending_in(string("\""))
|
||||
|
||||
link_url_stop =
|
||||
choice([
|
||||
string("*"),
|
||||
string("@"),
|
||||
string("^"),
|
||||
string("~"),
|
||||
string(".") |> concat(choice([space(), eos()])),
|
||||
string("!") |> concat(choice([space(), eos()])),
|
||||
string(",") |> concat(choice([space(), eos()])),
|
||||
string("_") |> concat(choice([space(), eos()])),
|
||||
string("?") |> concat(choice([space(), eos()])),
|
||||
string(";") |> concat(choice([space(), eos()])),
|
||||
space(),
|
||||
eos()
|
||||
])
|
||||
|
||||
link_contents_start =
|
||||
choice([
|
||||
image,
|
||||
spoiler_open,
|
||||
spoiler_close,
|
||||
blockquote_open,
|
||||
blockquote_open_cite,
|
||||
blockquote_close,
|
||||
literal,
|
||||
link_markup_start
|
||||
])
|
||||
|
||||
link_contents_element =
|
||||
choice([
|
||||
image,
|
||||
spoiler_open,
|
||||
spoiler_close,
|
||||
blockquote_open,
|
||||
blockquote_open_cite,
|
||||
blockquote_close,
|
||||
literal,
|
||||
link_markup_element
|
||||
])
|
||||
|
||||
link_contents =
|
||||
optional(link_contents_start)
|
||||
|> repeat(link_contents_element)
|
||||
|
||||
bracketed_link_end =
|
||||
string("\":")
|
||||
|> unwrap_and_tag(:link_end)
|
||||
|> concat(
|
||||
url_ending_in(string("]"))
|
||||
|> ignore(string("]"))
|
||||
|> unwrap_and_tag(:link_url)
|
||||
)
|
||||
|
||||
bracketed_link =
|
||||
string("[\"")
|
||||
|> unwrap_and_tag(:link_start)
|
||||
|> concat(link_contents)
|
||||
|> concat(bracketed_link_end)
|
||||
|
||||
unbracketed_link_end =
|
||||
string("\":")
|
||||
|> unwrap_and_tag(:link_end)
|
||||
|> concat(
|
||||
url_ending_in(link_url_stop)
|
||||
|> unwrap_and_tag(:link_url)
|
||||
)
|
||||
|
||||
unbracketed_link =
|
||||
unbracketed_link_url =
|
||||
string("\"")
|
||||
|> unwrap_and_tag(:link_start)
|
||||
|> concat(link_contents)
|
||||
|> concat(unbracketed_link_end)
|
||||
|> concat(unbracketed_url)
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:unbracketed_link_url)
|
||||
|
||||
link =
|
||||
choice([
|
||||
bracketed_link,
|
||||
unbracketed_link
|
||||
])
|
||||
unbracketed_image =
|
||||
ignore(string("!"))
|
||||
|> concat(image_url_scheme)
|
||||
|> repeat(utf8_char(not: ?!))
|
||||
|> ignore(string("!"))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:unbracketed_image)
|
||||
|> concat(optional(unbracketed_image_url))
|
||||
|
||||
bracketed_image =
|
||||
ignore(string("[!"))
|
||||
|> concat(image_url_scheme)
|
||||
|> repeat(lookahead_not(string("!]")) |> utf8_char([]))
|
||||
|> ignore(string("!]"))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:bracketed_image)
|
||||
|> concat(optional(unbracketed_image_url))
|
||||
|
||||
# Textile
|
||||
link_delim =
|
||||
string("\"")
|
||||
|> unwrap_and_tag(:link_delim)
|
||||
|
||||
markup_ends =
|
||||
choice([
|
||||
spoiler_close,
|
||||
blockquote_close,
|
||||
eos()
|
||||
])
|
||||
bracketed_link_open =
|
||||
string("[\"")
|
||||
|> unwrap_and_tag(:bracketed_link_open)
|
||||
|
||||
{markup_start, markup_element} = markup_ending_in(markup_ends)
|
||||
bracketed_link_url =
|
||||
string("\":")
|
||||
|> concat(link_url_scheme)
|
||||
|> repeat(lookahead_not(bracket_link_ending_characters) |> utf8_char([]))
|
||||
|> ignore(string("]"))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:bracketed_link_url)
|
||||
|
||||
textile_default =
|
||||
choice([
|
||||
literal,
|
||||
blockquote_open_cite |> optional(markup_start),
|
||||
blockquote_open |> optional(markup_start),
|
||||
blockquote_close,
|
||||
spoiler_open |> optional(markup_start),
|
||||
spoiler_close,
|
||||
link,
|
||||
image
|
||||
])
|
||||
bracketed_b_open = string("[**") |> unwrap_and_tag(:bracketed_b_open)
|
||||
bracketed_i_open = string("[__") |> unwrap_and_tag(:bracketed_i_open)
|
||||
bracketed_strong_open = string("[*") |> unwrap_and_tag(:bracketed_strong_open)
|
||||
bracketed_em_open = string("[_") |> unwrap_and_tag(:bracketed_em_open)
|
||||
bracketed_code_open = string("[@") |> unwrap_and_tag(:bracketed_code_open)
|
||||
bracketed_ins_open = string("[+") |> unwrap_and_tag(:bracketed_ins_open)
|
||||
bracketed_sup_open = string("[^") |> unwrap_and_tag(:bracketed_sup_open)
|
||||
bracketed_del_open = string("[-") |> unwrap_and_tag(:bracketed_del_open)
|
||||
bracketed_sub_open = string("[~") |> unwrap_and_tag(:bracketed_sub_open)
|
||||
|
||||
textile_main =
|
||||
choice([
|
||||
textile_default,
|
||||
markup_element
|
||||
])
|
||||
bracketed_b_close = string("**]") |> unwrap_and_tag(:bracketed_b_close)
|
||||
bracketed_i_close = string("__]") |> unwrap_and_tag(:bracketed_i_close)
|
||||
bracketed_strong_close = string("*]") |> unwrap_and_tag(:bracketed_strong_close)
|
||||
bracketed_em_close = string("_]") |> unwrap_and_tag(:bracketed_em_close)
|
||||
bracketed_code_close = string("@]") |> unwrap_and_tag(:bracketed_code_close)
|
||||
bracketed_ins_close = string("+]") |> unwrap_and_tag(:bracketed_ins_close)
|
||||
bracketed_sup_close = string("^]") |> unwrap_and_tag(:bracketed_sup_close)
|
||||
bracketed_del_close = string("-]") |> unwrap_and_tag(:bracketed_del_close)
|
||||
bracketed_sub_close = string("~]") |> unwrap_and_tag(:bracketed_sub_close)
|
||||
|
||||
textile_start =
|
||||
choice([
|
||||
textile_default,
|
||||
markup_start
|
||||
])
|
||||
b_delim = string("**") |> unwrap_and_tag(:b_delim)
|
||||
i_delim = string("__") |> unwrap_and_tag(:i_delim)
|
||||
strong_delim = string("*") |> unwrap_and_tag(:strong_delim)
|
||||
em_delim = string("_") |> unwrap_and_tag(:em_delim)
|
||||
code_delim = string("@") |> unwrap_and_tag(:code_delim)
|
||||
ins_delim = string("+") |> unwrap_and_tag(:ins_delim)
|
||||
sup_delim = string("^") |> unwrap_and_tag(:sup_delim)
|
||||
sub_delim = string("~") |> unwrap_and_tag(:sub_delim)
|
||||
|
||||
del_delim = lookahead_not(string("-"), string(">")) |> unwrap_and_tag(:del_delim)
|
||||
|
||||
quicktxt =
|
||||
utf8_char('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*@_{}')
|
||||
|> unwrap_and_tag(:quicktxt)
|
||||
|
||||
char =
|
||||
utf8_char([])
|
||||
|> unwrap_and_tag(:char)
|
||||
|
||||
textile =
|
||||
optional(textile_start)
|
||||
|> repeat(textile_main)
|
||||
choice([
|
||||
literal,
|
||||
double_newline,
|
||||
newline,
|
||||
space_token,
|
||||
bq_cite_start,
|
||||
bq_cite_open,
|
||||
bq_open,
|
||||
bq_close,
|
||||
spoiler_open,
|
||||
spoiler_close,
|
||||
unbracketed_image,
|
||||
bracketed_image,
|
||||
bracketed_link_open,
|
||||
bracketed_link_url,
|
||||
unbracketed_link_url,
|
||||
link_delim,
|
||||
bracketed_b_open,
|
||||
bracketed_i_open,
|
||||
bracketed_strong_open,
|
||||
bracketed_em_open,
|
||||
bracketed_code_open,
|
||||
bracketed_ins_open,
|
||||
bracketed_sup_open,
|
||||
bracketed_del_open,
|
||||
bracketed_sub_open,
|
||||
bracketed_b_close,
|
||||
bracketed_i_close,
|
||||
bracketed_strong_close,
|
||||
bracketed_em_close,
|
||||
bracketed_code_close,
|
||||
bracketed_ins_close,
|
||||
bracketed_sup_close,
|
||||
bracketed_del_close,
|
||||
bracketed_sub_close,
|
||||
b_delim,
|
||||
i_delim,
|
||||
strong_delim,
|
||||
em_delim,
|
||||
code_delim,
|
||||
ins_delim,
|
||||
sup_delim,
|
||||
del_delim,
|
||||
sub_delim,
|
||||
quicktxt,
|
||||
char
|
||||
])
|
||||
|> repeat()
|
||||
|> eos()
|
||||
|
||||
|
||||
defparsec :lex, textile
|
||||
end
|
||||
|
|
|
@ -1,171 +0,0 @@
|
|||
defmodule Textile.MarkupLexer do
|
||||
import NimbleParsec
|
||||
import Textile.Helpers
|
||||
|
||||
# Markup tags
|
||||
|
||||
def markup_ending_in(ending_sequence) do
|
||||
double_newline =
|
||||
string("\n\n")
|
||||
|> unwrap_and_tag(:double_newline)
|
||||
|
||||
newline =
|
||||
string("\n")
|
||||
|> unwrap_and_tag(:newline)
|
||||
|
||||
preceding_whitespace =
|
||||
choice([
|
||||
double_newline,
|
||||
newline,
|
||||
special_characters()
|
||||
])
|
||||
|
||||
# The literal tag is special, because
|
||||
# 1. It needs to capture everything inside it as a distinct token.
|
||||
# 2. It can be surrounded by markup on all sides.
|
||||
# 3. If it successfully tokenizes, it will always be in the output.
|
||||
|
||||
literal_open_stops =
|
||||
choice([
|
||||
space(),
|
||||
ending_sequence,
|
||||
string("=")
|
||||
])
|
||||
|
||||
literal_close_stops =
|
||||
lookahead_not(
|
||||
choice([
|
||||
ending_sequence,
|
||||
string("\n\n"),
|
||||
string("="),
|
||||
space() |> concat(string("="))
|
||||
])
|
||||
)
|
||||
|> utf8_char([])
|
||||
|
||||
literal =
|
||||
ignore(string("=="))
|
||||
|> lookahead_not(literal_open_stops)
|
||||
|> repeat(literal_close_stops)
|
||||
|> ignore(string("=="))
|
||||
|> reduce({List, :to_string, []})
|
||||
|> unwrap_and_tag(:literal)
|
||||
|
||||
b_open = markup_open_tag("**", "*", :b)
|
||||
i_open = markup_open_tag("__", "*", :i)
|
||||
|
||||
strong_open = markup_open_tag("*", :strong)
|
||||
em_open = markup_open_tag("_", :em)
|
||||
code_open = markup_open_tag("@", :code)
|
||||
ins_open = markup_open_tag("+", :ins)
|
||||
sup_open = markup_open_tag("^", :sup)
|
||||
del_open = markup_open_tag("-", :del)
|
||||
sub_open = markup_open_tag("~", :sub)
|
||||
|
||||
b_b_open = markup_open_tag("[**", "*", :b_b)
|
||||
b_i_open = markup_open_tag("[__", "_", :b_i)
|
||||
|
||||
b_strong_open = markup_open_tag("[*", "*", :b_strong)
|
||||
b_em_open = markup_open_tag("[_", "_", :b_em)
|
||||
b_code_open = markup_open_tag("[@", "@", :b_code)
|
||||
b_ins_open = markup_open_tag("[+", "+", :b_ins)
|
||||
b_sup_open = markup_open_tag("[^", "^", :b_sup)
|
||||
b_del_open = markup_open_tag("[-", "-", :b_del)
|
||||
b_sub_open = markup_open_tag("[~", "~", :b_sub)
|
||||
|
||||
b_b_close = string("**]") |> unwrap_and_tag(:b_b_close)
|
||||
b_i_close = string("__]") |> unwrap_and_tag(:b_i_close)
|
||||
|
||||
b_strong_close = string("*]") |> unwrap_and_tag(:b_strong_close)
|
||||
b_em_close = string("_]") |> unwrap_and_tag(:b_em_close)
|
||||
b_code_close = string("@]") |> unwrap_and_tag(:b_code_close)
|
||||
b_ins_close = string("+]") |> unwrap_and_tag(:b_ins_close)
|
||||
b_sup_close = string("^]") |> unwrap_and_tag(:b_sup_close)
|
||||
b_del_close = string("-]") |> unwrap_and_tag(:b_del_close)
|
||||
b_sub_close = string("~]") |> unwrap_and_tag(:b_sub_close)
|
||||
|
||||
b_close = string("**") |> unwrap_and_tag(:b_close)
|
||||
i_close = string("__") |> unwrap_and_tag(:i_close)
|
||||
|
||||
strong_close = string("*") |> unwrap_and_tag(:strong_close)
|
||||
em_close = string("_") |> unwrap_and_tag(:em_close)
|
||||
code_close = string("@") |> unwrap_and_tag(:code_close)
|
||||
ins_close = string("+") |> unwrap_and_tag(:ins_close)
|
||||
sup_close = string("^") |> unwrap_and_tag(:sup_close)
|
||||
del_close = string("-") |> unwrap_and_tag(:del_close)
|
||||
sub_close = string("~") |> unwrap_and_tag(:sub_close)
|
||||
|
||||
bracketed_markup_opening_tags =
|
||||
choice([
|
||||
b_b_open,
|
||||
b_i_open,
|
||||
b_strong_open,
|
||||
b_em_open,
|
||||
b_code_open,
|
||||
b_ins_open,
|
||||
b_sup_open,
|
||||
b_del_open,
|
||||
b_sub_open
|
||||
])
|
||||
|
||||
markup_opening_tags =
|
||||
choice([
|
||||
b_open,
|
||||
i_open,
|
||||
strong_open,
|
||||
em_open,
|
||||
code_open,
|
||||
ins_open,
|
||||
sup_open,
|
||||
del_open |> lookahead_not(string(">")),
|
||||
sub_open
|
||||
])
|
||||
|
||||
bracketed_markup_closing_tags =
|
||||
choice([
|
||||
b_b_close,
|
||||
b_i_close,
|
||||
b_strong_close,
|
||||
b_em_close,
|
||||
b_code_close,
|
||||
b_ins_close,
|
||||
b_sup_close,
|
||||
b_del_close,
|
||||
b_sub_close,
|
||||
])
|
||||
|
||||
markup_closing_tags =
|
||||
choice([
|
||||
b_close,
|
||||
i_close,
|
||||
strong_close,
|
||||
em_close,
|
||||
code_close,
|
||||
ins_close,
|
||||
sup_close,
|
||||
del_close,
|
||||
sub_close
|
||||
])
|
||||
|
||||
markup_at_start =
|
||||
choice([
|
||||
times(markup_opening_tags, min: 1),
|
||||
bracketed_markup_opening_tags
|
||||
])
|
||||
|
||||
markup_element =
|
||||
lookahead_not(ending_sequence)
|
||||
|> choice([
|
||||
literal,
|
||||
bracketed_markup_closing_tags,
|
||||
bracketed_markup_opening_tags |> lookahead_not(space()),
|
||||
preceding_whitespace |> times(markup_opening_tags, min: 1) |> lookahead_not(ending_sequence),
|
||||
times(markup_closing_tags, min: 1) |> lookahead(choice([special_characters(), ending_sequence])),
|
||||
double_newline,
|
||||
newline,
|
||||
utf8_char([])
|
||||
])
|
||||
|
||||
{markup_at_start, markup_element}
|
||||
end
|
||||
end
|
|
@ -1,313 +1,371 @@
|
|||
defmodule Textile.Parser do
|
||||
import Textile.ParserHelpers
|
||||
alias Textile.Lexer
|
||||
alias Phoenix.HTML
|
||||
|
||||
alias Textile.{
|
||||
Lexer,
|
||||
Parser,
|
||||
TokenCoalescer
|
||||
}
|
||||
def parse(parser, input) do
|
||||
parser = Map.put(parser, :state, %{})
|
||||
|
||||
defstruct [
|
||||
image_transform: nil
|
||||
]
|
||||
|
||||
def parse(%Parser{} = parser, input) do
|
||||
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input |> remove_linefeeds()),
|
||||
tokens <- TokenCoalescer.coalesce_lex(tokens),
|
||||
{:ok, tree, []} <- textile_top(parser, tokens),
|
||||
tree <- TokenCoalescer.coalesce_parse(tree)
|
||||
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(String.trim(input)),
|
||||
{:ok, tree, []} <- repeat(&textile/2, parser, tokens)
|
||||
do
|
||||
tree
|
||||
else
|
||||
err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
#
|
||||
# Backtracking LL packrat parser for simplified Textile grammar
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# textile = (well_formed_including_paragraphs | TOKEN)*;
|
||||
#
|
||||
defp textile_top(_parser, []), do: {:ok, [], []}
|
||||
defp textile_top(parser, tokens) do
|
||||
with {:ok, tree, r_tokens} <- well_formed_including_paragraphs(parser, nil, tokens),
|
||||
false <- tree == [],
|
||||
{:ok, next_tree, r2_tokens} <- textile_top(parser, r_tokens)
|
||||
do
|
||||
{:ok, [tree, next_tree], r2_tokens}
|
||||
partial_flatten(tree)
|
||||
else
|
||||
_ ->
|
||||
[{_token, string} | r_tokens] = tokens
|
||||
{:ok, next_tree, r2_tokens} = textile_top(parser, r_tokens)
|
||||
|
||||
{:ok, [{:text, escape_nl2br(string)}, next_tree], r2_tokens}
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
#
|
||||
# well_formed_including_paragraphs = (markup | double_newline)*;
|
||||
#
|
||||
defp well_formed_including_paragraphs(_parser, _closing_token, []), do: {:ok, [], []}
|
||||
defp well_formed_including_paragraphs(parser, closing_token, [{:double_newline, _nl} | r_tokens]) do
|
||||
{:ok, tree, r2_tokens} = well_formed_including_paragraphs(parser, closing_token, r_tokens)
|
||||
|
||||
{:ok, [{:markup, "<br/><br/>"}, tree], r2_tokens}
|
||||
# Helper to turn a parse tree into a string
|
||||
def flatten(tree) do
|
||||
tree
|
||||
|> List.flatten()
|
||||
|> Enum.map_join("", fn {_k, v} -> v end)
|
||||
end
|
||||
|
||||
defp well_formed_including_paragraphs(parser, closing_token, tokens) do
|
||||
with {:markup, {:ok, tree, r_tokens}} <- {:markup, markup(parser, tokens)},
|
||||
{:ok, next_tree, r2_tokens} <- well_formed_including_paragraphs(parser, closing_token, r_tokens)
|
||||
do
|
||||
{:ok, [tree, next_tree], r2_tokens}
|
||||
else
|
||||
_ ->
|
||||
consume_nonclosing(parser, closing_token, tokens)
|
||||
end
|
||||
# Helper to escape HTML
|
||||
defp escape(text) do
|
||||
text
|
||||
|> HTML.html_escape()
|
||||
|> HTML.safe_to_string()
|
||||
end
|
||||
|
||||
defp consume_nonclosing(_parser, closing_token, [{closing_token, _string} | _r_tokens] = tokens) do
|
||||
{:ok, [], tokens}
|
||||
end
|
||||
defp consume_nonclosing(parser, closing_token, [{_next_token, string} | r_tokens]) do
|
||||
{:ok, next_tree, r2_tokens} = well_formed_including_paragraphs(parser, closing_token, r_tokens)
|
||||
# Helper to turn a parse tree into a list
|
||||
def partial_flatten(tree) do
|
||||
tree
|
||||
|> List.flatten()
|
||||
|> Enum.chunk_by(fn {k, _v} -> k end)
|
||||
|> Enum.map(fn list ->
|
||||
[{type, _v} | _rest] = list
|
||||
|
||||
{:ok, [{:text, escape_nl2br(string)}, next_tree], r2_tokens}
|
||||
end
|
||||
defp consume_nonclosing(_parser, _closing_token, []) do
|
||||
{:ok, [], []}
|
||||
value = Enum.map_join(list, "", fn {_k, v} -> v end)
|
||||
|
||||
{type, value}
|
||||
end)
|
||||
end
|
||||
|
||||
#
|
||||
# well_formed = (markup)*;
|
||||
#
|
||||
defp well_formed(parser, tokens) do
|
||||
case markup(parser, tokens) do
|
||||
defp put_state(parser, new_state) do
|
||||
state = Map.put(parser.state, new_state, true)
|
||||
Map.put(parser, :state, state)
|
||||
end
|
||||
|
||||
# Helper corresponding to Kleene star (*) operator
|
||||
# Match a specificed rule zero or more times
|
||||
defp repeat(rule, parser, tokens) do
|
||||
case rule.(parser, tokens) do
|
||||
{:ok, tree, r_tokens} ->
|
||||
{:ok, next_tree, r2_tokens} = well_formed(parser, r_tokens)
|
||||
{:ok, [tree, next_tree], r2_tokens}
|
||||
{:ok, tree2, r2_tokens} = repeat(rule, parser, r_tokens)
|
||||
{:ok, [tree, tree2], r2_tokens}
|
||||
|
||||
_ ->
|
||||
{:ok, [], tokens}
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Helper to match a simple recursive grammar rule of the following form:
|
||||
#
|
||||
# markup =
|
||||
# blockquote | spoiler | link | image | bold | italic | strong | emphasis |
|
||||
# code | inserted | superscript | deleted | subscript | newline | literal |
|
||||
# literal | text;
|
||||
# open_token callback* close_token
|
||||
#
|
||||
defp markup(parser, tokens) do
|
||||
markups = [
|
||||
&blockquote/2, &spoiler/2, &link/2, &image/2, &bold/2, &italic/2, &strong/2,
|
||||
&emphasis/2, &code/2, &inserted/2, &superscript/2, &deleted/2, &subscript/2,
|
||||
&newline/2, &literal/2, &literal/2, &text/2
|
||||
]
|
||||
defp simple_recursive(open_token, close_token, open_tag, close_tag, callback, parser, [{open_token, open} | r_tokens]) do
|
||||
case repeat(callback, parser, r_tokens) do
|
||||
{:ok, tree, [{^close_token, _} | r2_tokens]} ->
|
||||
{:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens}
|
||||
|
||||
value =
|
||||
markups
|
||||
|> Enum.find_value(fn func ->
|
||||
case func.(parser, tokens) do
|
||||
{:ok, tree, r_tokens} ->
|
||||
{:ok, tree, r_tokens}
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
defp simple_recursive(_open_token, _close_token, _open_tag, _close_tag, _callback, _parser, _tokens) do
|
||||
{:error, "Expected a simple recursive rule"}
|
||||
end
|
||||
|
||||
# Helper to match a simple recursive grammar rule with negative lookahead:
|
||||
#
|
||||
# open_token callback* close_token (?!lookahead_not)
|
||||
#
|
||||
defp simple_lookahead_not(open_token, close_token, open_tag, close_tag, lookahead_not, callback, state, parser, [{open_token, open} | r_tokens]) do
|
||||
case parser.state do
|
||||
%{^state => _} ->
|
||||
{:error, "End of rule"}
|
||||
|
||||
_ ->
|
||||
case r_tokens do
|
||||
[{forbidden_lookahead, _la} | _] when forbidden_lookahead in [:space, :newline] ->
|
||||
{:ok, [{:text, escape(open)}], r_tokens}
|
||||
|
||||
_ ->
|
||||
nil
|
||||
case repeat(callback, put_state(parser, state), r_tokens) do
|
||||
{:ok, tree, [{^close_token, close}, {^lookahead_not, ln} | r2_tokens]} ->
|
||||
{:ok, [{:text, escape(open)}, tree, {:text, escape(close)}], [{lookahead_not, ln} | r2_tokens]}
|
||||
|
||||
{:ok, tree, [{^close_token, _} | r2_tokens]} ->
|
||||
{:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
end)
|
||||
|
||||
value || {:error, "Expected markup"}
|
||||
end
|
||||
end
|
||||
defp simple_lookahead_not(_open_token, _close_token, _open_tag, _close_tag, _lookahead_not, _callback, _state, _parser, _tokens) do
|
||||
{:error, "Expected a simple lookahead not rule"}
|
||||
end
|
||||
|
||||
# Helper to efficiently assemble a UTF-8 binary from tokens of the
|
||||
# given type
|
||||
defp assemble_binary(token_type, accumulator, [{token_type, t} | stream]) do
|
||||
assemble_binary(token_type, accumulator <> <<t::utf8>>, stream)
|
||||
end
|
||||
defp assemble_binary(_token_type, accumulator, tokens), do: {accumulator, tokens}
|
||||
|
||||
#
|
||||
# blockquote =
|
||||
# blockquote_open_cite well_formed_including_paragraphs blockquote_close |
|
||||
# blockquote_open well_formed_including_paragraphs blockquote_close;
|
||||
# inline_textile_element =
|
||||
# opening_markup inline_textile_element* closing_markup (?!quicktxt) |
|
||||
# closing_markup (?=quicktxt) |
|
||||
# link_delim block_textile_element* link_url |
|
||||
# image url? |
|
||||
# code_delim inline_textile_element* code_delim |
|
||||
# inline_textile_element_not_opening_markup;
|
||||
#
|
||||
defp blockquote(parser, [{:blockquote_open_cite, author} | r_tokens]) do
|
||||
case well_formed_including_paragraphs(parser, :blockquote_close, r_tokens) do
|
||||
{:ok, tree, [{:blockquote_close, _close} | r2_tokens]} ->
|
||||
{:ok, [{:markup, ~s|<blockquote author="#{escape_html(author)}">|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_nl2br(~s|[bq="#{author}"]|)}, tree], r2_tokens}
|
||||
defp inline_textile_element(parser, tokens) do
|
||||
[
|
||||
{:b_delim, :b, "<b>", "</b>"},
|
||||
{:i_delim, :i, "<i>", "</i>"},
|
||||
{:strong_delim, :strong, "<strong>", "</strong>"},
|
||||
{:em_delim, :em, "<em>", "</em>"},
|
||||
{:ins_delim, :ins, "<ins>", "</ins>"},
|
||||
{:sup_delim, :sup, "<sup>", "</sup>"},
|
||||
{:del_delim, :del, "<del>", "</del>"},
|
||||
{:sub_delim, :sub, "<sub>", "</sub>"}
|
||||
]
|
||||
|> Enum.find_value(fn {delim_token, state, open_tag, close_tag} ->
|
||||
simple_lookahead_not(
|
||||
delim_token,
|
||||
delim_token,
|
||||
open_tag,
|
||||
close_tag,
|
||||
:quicktxt,
|
||||
&inline_textile_element/2,
|
||||
state,
|
||||
parser,
|
||||
tokens
|
||||
)
|
||||
|> case do
|
||||
{:ok, tree, r_tokens} ->
|
||||
{:ok, tree, r_tokens}
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end)
|
||||
|> case do
|
||||
nil -> inner_inline_textile_element(parser, tokens)
|
||||
value -> value
|
||||
end
|
||||
end
|
||||
|
||||
defp blockquote(parser, [{:blockquote_open, open} | r_tokens]) do
|
||||
case well_formed_including_paragraphs(parser, :blockquote_close, r_tokens) do
|
||||
{:ok, tree, [{:blockquote_close, _close} | r2_tokens]} ->
|
||||
{:ok, [{:markup, ~s|<blockquote>|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
|
||||
|
||||
defp inner_inline_textile_element(parser, [{token, t}, {:quicktxt, q} | r_tokens])
|
||||
when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim]
|
||||
do
|
||||
case inline_textile_element(parser, [{:quicktxt, q} | r_tokens]) do
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens}
|
||||
{:ok, [{:text, escape(t)}, tree], r2_tokens}
|
||||
|
||||
_ ->
|
||||
{:ok, [{:text, escape(t)}], [{:quicktxt, q} | r_tokens]}
|
||||
end
|
||||
end
|
||||
defp inner_inline_textile_element(parser, [{:link_delim, open} | r_tokens]) do
|
||||
case repeat(&block_textile_element/2, parser, r_tokens) do
|
||||
{:ok, tree, [{:unbracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} ->
|
||||
href = escape(url)
|
||||
|
||||
defp blockquote(_parser, _tokens),
|
||||
do: {:error, "Expected a blockquote tag with optional citation"}
|
||||
|
||||
|
||||
#
|
||||
# spoiler =
|
||||
# spoiler_open well_formed_including_paragraphs spoiler_close;
|
||||
#
|
||||
defp spoiler(parser, [{:spoiler_open, open} | r_tokens]) do
|
||||
case well_formed_including_paragraphs(parser, :spoiler_close, r_tokens) do
|
||||
{:ok, tree, [{:spoiler_close, _close} | r2_tokens]} ->
|
||||
{:ok, [{:markup, ~s|<span class="spoiler">|}, tree, {:markup, ~s|</span>|}], r2_tokens}
|
||||
{:ok, [{:markup, "<a href=\""}, {:markup, href}, {:markup, "\">"}, tree, {:markup, "</a>"}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens}
|
||||
{:ok, [{:text, escape(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
defp inner_inline_textile_element(parser, [{:bracketed_link_open, open} | r_tokens]) do
|
||||
case repeat(&inline_textile_element/2, parser, r_tokens) do
|
||||
{:ok, tree, [{:bracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} ->
|
||||
href = escape(url)
|
||||
|
||||
defp spoiler(_parser, _tokens),
|
||||
do: {:error, "Expected a spoiler tag"}
|
||||
|
||||
|
||||
#
|
||||
# link =
|
||||
# link_start well_formed_including_paragraphs link_end link_url;
|
||||
#
|
||||
defp link(parser, [{:link_start, start} | r_tokens]) do
|
||||
case well_formed_including_paragraphs(parser, :link_end, r_tokens) do
|
||||
{:ok, tree, [{:link_end, _end}, {:link_url, url} | r2_tokens]} ->
|
||||
{:ok, [{:markup, ~s|<a href="#{escape_html(url)}">|}, tree, {:markup, ~s|</a>|}], r2_tokens}
|
||||
{:ok, [{:markup, "<a href=\""}, {:markup, href}, {:markup, "\">"}, tree, {:markup, "</a>"}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_nl2br(start)}, tree], r2_tokens}
|
||||
{:ok, [{:text, escape(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
defp inner_inline_textile_element(parser, [{token, img}, {:unbracketed_image_url, <<":", url::binary>>} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do
|
||||
img = parser.image_transform.(img)
|
||||
|
||||
defp link(_parser, _tokens),
|
||||
do: {:error, "Expected a link"}
|
||||
{:ok, [{:markup, "<a href=\""}, {:markup, escape(url)}, {:markup, "\"><span class=\"imgspoiler\"><img src=\""}, {:markup, escape(img)}, {:markup, "\"/></span></a>"}], r_tokens}
|
||||
end
|
||||
defp inner_inline_textile_element(parser, [{token, img} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do
|
||||
img = parser.image_transform.(img)
|
||||
|
||||
{:ok, [{:markup, "<span class=\"imgspoiler\"><img src=\""}, {:markup, escape(img)}, {:markup, "\"/></span>"}], r_tokens}
|
||||
end
|
||||
defp inner_inline_textile_element(parser, [{:code_delim, open} | r_tokens]) do
|
||||
case parser.state do
|
||||
%{code: _} ->
|
||||
{:error, "End of rule"}
|
||||
|
||||
#
|
||||
# image =
|
||||
# image_url image_title? image_link_url?;
|
||||
#
|
||||
defp image(parser, [{:image_url, image_url}, {:image_title, title}, {:image_link_url, link_url} | r_tokens]) do
|
||||
image_url = parser.image_transform.(image_url)
|
||||
_ ->
|
||||
case repeat(&inline_textile_element/2, put_state(parser, :code), r_tokens) do
|
||||
{:ok, tree, [{:code_delim, _} | r2_tokens]} ->
|
||||
{:ok, [{:markup, "<code>"}, tree, {:markup, "</code>"}], r2_tokens}
|
||||
|
||||
{:ok, [markup: ~s|<a href="#{escape_html(link_url)}"><span class="imgspoiler"><img src="#{escape_html(image_url)}" title="#{escape_html(title)}"/></span></a>|], r_tokens}
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
end
|
||||
defp inner_inline_textile_element(parser, tokens) do
|
||||
inline_textile_element_not_opening_markup(parser, tokens)
|
||||
end
|
||||
|
||||
defp image(parser, [{:image_url, image_url}, {:image_title, title} | r_tokens]) do
|
||||
image_url = parser.image_transform.(image_url)
|
||||
#
|
||||
# bq_cite_text = literal | char | space | quicktxt;
|
||||
#
|
||||
|
||||
{:ok, [markup: ~s|<span class="imgspoiler"><img src="#{escape_html(image_url)}" title="#{escape_html(title)}"/></span>|], r_tokens}
|
||||
# Note that text is not escaped here because it will be escaped
|
||||
# when the tree is flattened
|
||||
defp bq_cite_text(_parser, [{:literal, lit} | r_tokens]) do
|
||||
{:ok, [{:text, lit}], r_tokens}
|
||||
end
|
||||
defp bq_cite_text(_parser, [{:char, lit} | r_tokens]) do
|
||||
{:ok, [{:text, <<lit::utf8>>}], r_tokens}
|
||||
end
|
||||
defp bq_cite_text(_parser, [{:space, _} | r_tokens]) do
|
||||
{:ok, [{:text, " "}], r_tokens}
|
||||
end
|
||||
defp bq_cite_text(_parser, [{:quicktxt, lit} | r_tokens]) do
|
||||
{:ok, [{:text, <<lit::utf8>>}], r_tokens}
|
||||
end
|
||||
defp bq_cite_text(_parser, _tokens) do
|
||||
{:error, "Expected cite tokens"}
|
||||
end
|
||||
|
||||
defp image(parser, [{:image_url, image_url}, {:image_link_url, link_url} | r_tokens]) do
|
||||
image_url = parser.image_transform.(image_url)
|
||||
#
|
||||
# inline_textile_element_not_opening_markup =
|
||||
# literal | space | char |
|
||||
# quicktxt opening_markup quicktxt |
|
||||
# quicktxt |
|
||||
# opening_block_tag block_textile_element* closing_block_tag;
|
||||
#
|
||||
|
||||
{:ok, [markup: ~s|<a href="#{escape_html(link_url)}"><span class="imgspoiler"><img src="#{escape_html(image_url)}"/></span></a>|], r_tokens}
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:literal, lit} | r_tokens]) do
|
||||
{:ok, [{:markup, "<span class=\"literal\">"}, {:markup, escape(lit)}, {:markup, "</span>"}], r_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:space, _} | r_tokens]) do
|
||||
{:ok, [{:text, " "}], r_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:char, lit} | r_tokens]) do
|
||||
{binary, r2_tokens} = assemble_binary(:char, <<lit::utf8>>, r_tokens)
|
||||
|
||||
{:ok, [{:text, escape(binary)}], r2_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, q1}, {token, t}, {:quicktxt, q2} | r_tokens])
|
||||
when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim]
|
||||
do
|
||||
{:ok, [{:text, escape(<<q1::utf8>>)}, {:text, escape(t)}, {:text, escape(<<q2::utf8>>)}], r_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, lit} | r_tokens]) do
|
||||
{:ok, [{:text, escape(<<lit::utf8>>)}], r_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(parser, [{:bq_cite_start, start} | r_tokens]) do
|
||||
case repeat(&bq_cite_text/2, parser, r_tokens) do
|
||||
{:ok, tree, [{:bq_cite_open, open} | r2_tokens]} ->
|
||||
case repeat(&block_textile_element/2, parser, r2_tokens) do
|
||||
{:ok, tree2, [{:bq_close, _} | r3_tokens]} ->
|
||||
cite = escape(flatten(tree))
|
||||
|
||||
{:ok, [{:markup, "<blockquote author=\""}, {:markup, cite}, {:markup, "\">"}, tree2, {:markup, "</blockquote>"}], r3_tokens}
|
||||
|
||||
{:ok, tree2, r3_tokens} ->
|
||||
{:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}, tree2], r3_tokens}
|
||||
|
||||
_ ->
|
||||
{:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}], r_tokens}
|
||||
end
|
||||
|
||||
_ ->
|
||||
{:ok, [{:text, escape(start)}], r_tokens}
|
||||
end
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(_parser, [{:bq_cite_open, tok} | r_tokens]) do
|
||||
{:ok, [{:text, escape(tok)}], r_tokens}
|
||||
end
|
||||
defp inline_textile_element_not_opening_markup(parser, tokens) do
|
||||
[
|
||||
{:bq_open, :bq_close, "<blockquote>", "</blockquote>"},
|
||||
{:spoiler_open, :spoiler_close, "<span class=\"spoiler\">", "</span>"},
|
||||
{:bracketed_b_open, :bracketed_b_close, "<b>", "</b>"},
|
||||
{:bracketed_i_open, :bracketed_i_close, "<i>", "</i>"},
|
||||
{:bracketed_strong_open, :bracketed_strong_close, "<strong>", "</strong>"},
|
||||
{:bracketed_em_open, :bracketed_em_close, "<em>", "</em>"},
|
||||
{:bracketed_code_open, :bracketed_code_close, "<code>", "</code>"},
|
||||
{:bracketed_ins_open, :bracketed_ins_close, "<ins>", "</ins>"},
|
||||
{:bracketed_sup_open, :bracketed_sup_close, "<sup>", "</sup>"},
|
||||
{:bracketed_del_open, :bracketed_del_close, "<del>", "</del>"},
|
||||
{:bracketed_sub_open, :bracketed_sub_close, "<sub>", "</sub>"}
|
||||
]
|
||||
|> Enum.find_value(fn {open_token, close_token, open_tag, close_tag} ->
|
||||
simple_recursive(
|
||||
open_token,
|
||||
close_token,
|
||||
open_tag,
|
||||
close_tag,
|
||||
&block_textile_element/2,
|
||||
parser,
|
||||
tokens
|
||||
)
|
||||
|> case do
|
||||
{:ok, tree, r_tokens} ->
|
||||
{:ok, tree, r_tokens}
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end)
|
||||
|> Kernel.||({:error, "Expected block markup"})
|
||||
end
|
||||
|
||||
defp image(parser, [{:image_url, image_url} | r_tokens]) do
|
||||
image_url = parser.image_transform.(image_url)
|
||||
#
|
||||
# block_textile_element =
|
||||
# double_newline | newline | inline_textile_element;
|
||||
#
|
||||
|
||||
{:ok, [markup: ~s|<span class="imgspoiler"><img src="#{escape_html(image_url)}"/></span>|], r_tokens}
|
||||
defp block_textile_element(_parser, [{:double_newline, _} | r_tokens]) do
|
||||
{:ok, [{:markup, "<br/><br/>"}], r_tokens}
|
||||
end
|
||||
defp block_textile_element(_parser, [{:newline, _} | r_tokens]) do
|
||||
{:ok, [{:markup, "<br/>"}], r_tokens}
|
||||
end
|
||||
defp block_textile_element(parser, tokens) do
|
||||
inline_textile_element(parser, tokens)
|
||||
end
|
||||
|
||||
defp image(_parser, _tokens),
|
||||
do: {:error, "Expected an image tag"}
|
||||
|
||||
#
|
||||
# bold =
|
||||
# b_open well_formed b_close |
|
||||
# b_b_open well_formed b_b_close;
|
||||
#
|
||||
attribute_parser(:bold, :b_open, :b_close, "<b>", "</b>")
|
||||
|
||||
#
|
||||
# italic =
|
||||
# i_open well_formed i_close |
|
||||
# b_i_open well_formed b_i_close;
|
||||
#
|
||||
attribute_parser(:italic, :i_open, :i_close, "<i>", "</i>")
|
||||
|
||||
#
|
||||
# strong =
|
||||
# strong_open well_formed strong_close |
|
||||
# b_strong_open well_formed b_strong_close;
|
||||
#
|
||||
attribute_parser(:strong, :strong_open, :strong_close, "<strong>", "</strong>")
|
||||
|
||||
#
|
||||
# emphasis =
|
||||
# em_open well_formed em_close |
|
||||
# b_em_open well_formed b_em_close;
|
||||
#
|
||||
attribute_parser(:emphasis, :em_open, :em_close, "<em>", "</em>")
|
||||
|
||||
#
|
||||
# code =
|
||||
# code_open well_formed code_close |
|
||||
# b_code_open well_formed b_code_close;
|
||||
#
|
||||
attribute_parser(:code, :code_open, :code_close, "<code>", "</code>")
|
||||
|
||||
#
|
||||
# inserted =
|
||||
# ins_open well_formed ins_close |
|
||||
# b_ins_open well_formed b_ins_close;
|
||||
#
|
||||
attribute_parser(:inserted, :ins_open, :ins_close, "<ins>", "</ins>")
|
||||
|
||||
#
|
||||
# superscript =
|
||||
# sup_open well_formed sup_close |
|
||||
# b_sup_open well_formed b_sup_close;
|
||||
#
|
||||
attribute_parser(:superscript, :sup_open, :sup_close, "<sup>", "</sup>")
|
||||
|
||||
#
|
||||
# deleted =
|
||||
# del_open well_formed del_close |
|
||||
# b_del_open well_formed b_del_close;
|
||||
#
|
||||
attribute_parser(:deleted, :del_open, :del_close, "<del>", "</del>")
|
||||
|
||||
#
|
||||
# subscript =
|
||||
# sub_open well_formed sub_close |
|
||||
# b_sub_open well_formed b_sub_close;
|
||||
#
|
||||
attribute_parser(:subscript, :sub_open, :sub_close, "<sub>", "</sub>")
|
||||
|
||||
|
||||
#
|
||||
# Terminals
|
||||
# textile =
|
||||
# (block_textile_element | TOKEN)* eos;
|
||||
#
|
||||
|
||||
defp literal(_parser, [{:literal, text} | r_tokens]),
|
||||
do: {:ok, [markup: escape_nl2br(text)], r_tokens}
|
||||
defp textile(parser, tokens) do
|
||||
case block_textile_element(parser, tokens) do
|
||||
{:ok, tree, r_tokens} ->
|
||||
{:ok, tree, r_tokens}
|
||||
|
||||
defp literal(_parser, _tokens),
|
||||
do: {:error, "Expected a literal"}
|
||||
_ ->
|
||||
case tokens do
|
||||
[{_, string} | r_tokens] ->
|
||||
{:ok, [{:text, escape(string)}], r_tokens}
|
||||
|
||||
|
||||
defp newline(_parser, [{:newline, _nl} | r_tokens]),
|
||||
do: {:ok, [markup: "<br/>"], r_tokens}
|
||||
|
||||
defp newline(_parser, _tokens),
|
||||
do: {:error, "Expected a line break"}
|
||||
|
||||
|
||||
defp text(_parser, [{:text, text} | r_tokens]),
|
||||
do: {:ok, [text: escape_nl2br(text)], r_tokens}
|
||||
|
||||
defp text(_parser, _tokens),
|
||||
do: {:error, "Expected text"}
|
||||
_ ->
|
||||
{:error, "Expected textile"}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
defmodule Textile.ParserHelpers do
|
||||
import Phoenix.HTML
|
||||
|
||||
defmacro attribute_parser(name, open_token, close_token, open_tag, close_tag) do
|
||||
quote do
|
||||
defp unquote(name)(parser, [{unquote(open_token), open} | r_tokens]) do
|
||||
case well_formed(parser, r_tokens) do
|
||||
{:ok, tree, [{unquote(close_token), _close} | r2_tokens]} ->
|
||||
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_html(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
|
||||
defp unquote(name)(parser, [{unquote(:"b_#{open_token}"), open} | r_tokens]) do
|
||||
case well_formed(parser, r_tokens) do
|
||||
{:ok, tree, [{unquote(:"b_#{close_token}"), _close} | r2_tokens]} ->
|
||||
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
|
||||
|
||||
{:ok, tree, r2_tokens} ->
|
||||
{:ok, [{:text, escape_html(open)}, tree], r2_tokens}
|
||||
end
|
||||
end
|
||||
|
||||
defp unquote(name)(_parser, _tokens),
|
||||
do: {:error, "Expected #{unquote(name)} tag"}
|
||||
end
|
||||
end
|
||||
|
||||
def remove_linefeeds(text) do
|
||||
text
|
||||
|> to_string()
|
||||
|> String.replace("\r", "")
|
||||
end
|
||||
|
||||
def escape_nl2br(text) do
|
||||
text
|
||||
|> String.split("\n")
|
||||
|> Enum.map(&escape_html(&1))
|
||||
|> Enum.join("<br/>")
|
||||
end
|
||||
|
||||
def escape_html(text) do
|
||||
html_escape(text) |> safe_to_string()
|
||||
end
|
||||
end
|
|
@ -1,31 +0,0 @@
|
|||
defmodule Textile.TokenCoalescer do
|
||||
# The lexer, as a practical concern, does not coalesce runs of
|
||||
# character tokens. This fixes that.
|
||||
def coalesce_lex(tokens) do
|
||||
tokens
|
||||
|> Enum.chunk_by(&is_number(&1))
|
||||
|> Enum.flat_map(fn
|
||||
[t | _rest] = str when is_number(t) ->
|
||||
[text: List.to_string(str)]
|
||||
|
||||
t ->
|
||||
t
|
||||
end)
|
||||
end
|
||||
|
||||
def coalesce_parse(tokens) do
|
||||
tokens
|
||||
|> List.flatten()
|
||||
|> Enum.chunk_by(fn {k, _v} -> k == :text end)
|
||||
|> Enum.flat_map(fn t ->
|
||||
[{type, _v} | _rest] = t
|
||||
|
||||
value =
|
||||
t
|
||||
|> Enum.map(fn {_k, v} -> v end)
|
||||
|> Enum.join("")
|
||||
|
||||
[{type, value}]
|
||||
end)
|
||||
end
|
||||
end
|
|
@ -1,34 +0,0 @@
|
|||
defmodule Textile.UrlLexer do
|
||||
import NimbleParsec
|
||||
|
||||
def url_ending_in(ending_sequence) do
|
||||
domain_character =
|
||||
choice([
|
||||
ascii_char([?a..?z]),
|
||||
ascii_char([?A..?Z]),
|
||||
ascii_char([?0..?9]),
|
||||
string("-")
|
||||
])
|
||||
|
||||
domain =
|
||||
repeat(
|
||||
choice([
|
||||
domain_character |> string(".") |> concat(domain_character),
|
||||
domain_character
|
||||
])
|
||||
)
|
||||
|
||||
scheme_and_domain =
|
||||
choice([
|
||||
string("#"),
|
||||
string("/"),
|
||||
string("data:image/"),
|
||||
string("https://") |> concat(domain),
|
||||
string("http://") |> concat(domain)
|
||||
])
|
||||
|
||||
scheme_and_domain
|
||||
|> repeat(lookahead_not(ending_sequence) |> utf8_char([]))
|
||||
|> reduce({List, :to_string, []})
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue