diff --git a/lib/fast_textile/lexer.ex b/lib/fast_textile/lexer.ex deleted file mode 100644 index 9c8c4544..00000000 --- a/lib/fast_textile/lexer.ex +++ /dev/null @@ -1,238 +0,0 @@ -defmodule FastTextile.Lexer do - import NimbleParsec - - space = - utf8_char('\f \r\t\u00a0\u1680\u180e\u202f\u205f\u3000' ++ Enum.to_list(0x2000..0x200a)) - - extended_space = - choice([ - space, - string("\n"), - eos() - ]) - - space_token = - space - |> unwrap_and_tag(:space) - - double_newline = - string("\n") - |> repeat(space) - |> string("\n") - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:double_newline) - - newline = - string("\n") - |> unwrap_and_tag(:newline) - - link_ending_characters = - utf8_char('@#$%&(),.:;<=?\\`|\'') - - bracket_link_ending_characters = - utf8_char('" []') - - end_of_link = - choice([ - concat(link_ending_characters, extended_space), - extended_space - ]) - - bracketed_literal = - ignore(string("[==")) - |> repeat(lookahead_not(string("==]")) |> utf8_char([])) - |> ignore(string("==]")) - - unbracketed_literal = - ignore(string("==")) - |> repeat(lookahead_not(string("==")) |> utf8_char([])) - |> ignore(string("==")) - - literal = - choice([ - bracketed_literal, - unbracketed_literal - ]) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:literal) - - bq_cite_start = - string("[bq=\"") - |> unwrap_and_tag(:bq_cite_start) - - bq_cite_open = - string("\"]") - |> unwrap_and_tag(:bq_cite_open) - - bq_open = - string("[bq]") - |> unwrap_and_tag(:bq_open) - - bq_close = - string("[/bq]") - |> unwrap_and_tag(:bq_close) - - spoiler_open = - string("[spoiler]") - |> unwrap_and_tag(:spoiler_open) - - spoiler_close = - string("[/spoiler]") - |> unwrap_and_tag(:spoiler_close) - - image_url_scheme = - choice([ - string("//"), - string("/"), - string("https://"), - string("http://") - ]) - - link_url_scheme = - choice([ - string("#"), - image_url_scheme - ]) - - unbracketed_url = - string(":") - |> concat(link_url_scheme) - |> repeat(lookahead_not(end_of_link) |> utf8_char([])) - - unbracketed_image_url = - unbracketed_url - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:unbracketed_image_url) - - unbracketed_link_url = - string("\"") - |> concat(unbracketed_url) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:unbracketed_link_url) - - unbracketed_image = - ignore(string("!")) - |> concat(image_url_scheme) - |> repeat(utf8_char(not: ?!)) - |> ignore(string("!")) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:unbracketed_image) - |> concat(optional(unbracketed_image_url)) - - bracketed_image = - ignore(string("[!")) - |> concat(image_url_scheme) - |> repeat(lookahead_not(string("!]")) |> utf8_char([])) - |> ignore(string("!]")) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:bracketed_image) - |> concat(optional(unbracketed_image_url)) - - link_delim = - string("\"") - |> unwrap_and_tag(:link_delim) - - bracketed_link_open = - string("[\"") - |> unwrap_and_tag(:bracketed_link_open) - - bracketed_link_url = - string("\":") - |> concat(link_url_scheme) - |> repeat(lookahead_not(bracket_link_ending_characters) |> utf8_char([])) - |> ignore(string("]")) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:bracketed_link_url) - - bracketed_b_open = string("[**") |> unwrap_and_tag(:bracketed_b_open) - bracketed_i_open = string("[__") |> unwrap_and_tag(:bracketed_i_open) - bracketed_strong_open = string("[*") |> unwrap_and_tag(:bracketed_strong_open) - bracketed_em_open = string("[_") |> unwrap_and_tag(:bracketed_em_open) - bracketed_code_open = string("[@") |> unwrap_and_tag(:bracketed_code_open) - bracketed_ins_open = string("[+") |> unwrap_and_tag(:bracketed_ins_open) - bracketed_sup_open = string("[^") |> unwrap_and_tag(:bracketed_sup_open) - bracketed_del_open = string("[-") |> unwrap_and_tag(:bracketed_del_open) - bracketed_sub_open = string("[~") |> unwrap_and_tag(:bracketed_sub_open) - - bracketed_b_close = string("**]") |> unwrap_and_tag(:bracketed_b_close) - bracketed_i_close = string("__]") |> unwrap_and_tag(:bracketed_i_close) - bracketed_strong_close = string("*]") |> unwrap_and_tag(:bracketed_strong_close) - bracketed_em_close = string("_]") |> unwrap_and_tag(:bracketed_em_close) - bracketed_code_close = string("@]") |> unwrap_and_tag(:bracketed_code_close) - bracketed_ins_close = string("+]") |> unwrap_and_tag(:bracketed_ins_close) - bracketed_sup_close = string("^]") |> unwrap_and_tag(:bracketed_sup_close) - bracketed_del_close = string("-]") |> unwrap_and_tag(:bracketed_del_close) - bracketed_sub_close = string("~]") |> unwrap_and_tag(:bracketed_sub_close) - - b_delim = string("**") |> unwrap_and_tag(:b_delim) - i_delim = string("__") |> unwrap_and_tag(:i_delim) - strong_delim = string("*") |> unwrap_and_tag(:strong_delim) - em_delim = string("_") |> unwrap_and_tag(:em_delim) - code_delim = string("@") |> unwrap_and_tag(:code_delim) - ins_delim = string("+") |> unwrap_and_tag(:ins_delim) - sup_delim = string("^") |> unwrap_and_tag(:sup_delim) - sub_delim = string("~") |> unwrap_and_tag(:sub_delim) - - del_delim = lookahead_not(string("-"), string(">")) |> unwrap_and_tag(:del_delim) - - quicktxt = - utf8_char('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*@_{}') - |> unwrap_and_tag(:quicktxt) - - char = - utf8_char([]) - |> unwrap_and_tag(:char) - - textile = - choice([ - literal, - double_newline, - newline, - space_token, - bq_cite_start, - bq_cite_open, - bq_open, - bq_close, - spoiler_open, - spoiler_close, - unbracketed_image, - bracketed_image, - bracketed_link_open, - bracketed_link_url, - unbracketed_link_url, - link_delim, - bracketed_b_open, - bracketed_i_open, - bracketed_strong_open, - bracketed_em_open, - bracketed_code_open, - bracketed_ins_open, - bracketed_sup_open, - bracketed_del_open, - bracketed_sub_open, - bracketed_b_close, - bracketed_i_close, - bracketed_strong_close, - bracketed_em_close, - bracketed_code_close, - bracketed_ins_close, - bracketed_sup_close, - bracketed_del_close, - bracketed_sub_close, - b_delim, - i_delim, - strong_delim, - em_delim, - code_delim, - ins_delim, - sup_delim, - del_delim, - sub_delim, - quicktxt, - char - ]) - |> repeat() - |> eos() - - defparsec :lex, textile -end diff --git a/lib/fast_textile/parser.ex b/lib/fast_textile/parser.ex deleted file mode 100644 index b003ade4..00000000 --- a/lib/fast_textile/parser.ex +++ /dev/null @@ -1,371 +0,0 @@ -defmodule FastTextile.Parser do - alias FastTextile.Lexer - alias Phoenix.HTML - - def parse(parser, input) do - parser = Map.put(parser, :state, %{}) - - with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(String.trim(input)), - {:ok, tree, []} <- repeat(&textile/2, parser, tokens) - do - partial_flatten(tree) - else - _ -> - [] - end - end - - # Helper to turn a parse tree into a string - def flatten(tree) do - tree - |> List.flatten() - |> Enum.map_join("", fn {_k, v} -> v end) - end - - # Helper to escape HTML - defp escape(text) do - text - |> HTML.html_escape() - |> HTML.safe_to_string() - end - - # Helper to turn a parse tree into a list - def partial_flatten(tree) do - tree - |> List.flatten() - |> Enum.chunk_by(fn {k, _v} -> k end) - |> Enum.map(fn list -> - [{type, _v} | _rest] = list - - value = Enum.map_join(list, "", fn {_k, v} -> v end) - - {type, value} - end) - end - - defp put_state(parser, new_state) do - state = Map.put(parser.state, new_state, true) - Map.put(parser, :state, state) - end - - # Helper corresponding to Kleene star (*) operator - # Match a specificed rule zero or more times - defp repeat(rule, parser, tokens) do - case rule.(parser, tokens) do - {:ok, tree, r_tokens} -> - {:ok, tree2, r2_tokens} = repeat(rule, parser, r_tokens) - {:ok, [tree, tree2], r2_tokens} - - _ -> - {:ok, [], tokens} - end - end - - # Helper to match a simple recursive grammar rule of the following form: - # - # open_token callback* close_token - # - defp simple_recursive(open_token, close_token, open_tag, close_tag, callback, parser, [{open_token, open} | r_tokens]) do - case repeat(callback, parser, r_tokens) do - {:ok, tree, [{^close_token, _} | r2_tokens]} -> - {:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens} - - {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape(open)}, tree], r2_tokens} - end - end - defp simple_recursive(_open_token, _close_token, _open_tag, _close_tag, _callback, _parser, _tokens) do - {:error, "Expected a simple recursive rule"} - end - - # Helper to match a simple recursive grammar rule with negative lookahead: - # - # open_token callback* close_token (?!lookahead_not) - # - defp simple_lookahead_not(open_token, close_token, open_tag, close_tag, lookahead_not, callback, state, parser, [{open_token, open} | r_tokens]) do - case parser.state do - %{^state => _} -> - {:error, "End of rule"} - - _ -> - case r_tokens do - [{forbidden_lookahead, _la} | _] when forbidden_lookahead in [:space, :newline] -> - {:ok, [{:text, escape(open)}], r_tokens} - - _ -> - case repeat(callback, put_state(parser, state), r_tokens) do - {:ok, tree, [{^close_token, close}, {^lookahead_not, ln} | r2_tokens]} -> - {:ok, [{:text, escape(open)}, tree, {:text, escape(close)}], [{lookahead_not, ln} | r2_tokens]} - - {:ok, tree, [{^close_token, _} | r2_tokens]} -> - {:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens} - - {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape(open)}, tree], r2_tokens} - end - end - end - end - defp simple_lookahead_not(_open_token, _close_token, _open_tag, _close_tag, _lookahead_not, _callback, _state, _parser, _tokens) do - {:error, "Expected a simple lookahead not rule"} - end - - # Helper to efficiently assemble a UTF-8 binary from tokens of the - # given type - defp assemble_binary(token_type, accumulator, [{token_type, t} | stream]) do - assemble_binary(token_type, accumulator <> <>, stream) - end - defp assemble_binary(_token_type, accumulator, tokens), do: {accumulator, tokens} - - # - # inline_textile_element = - # opening_markup inline_textile_element* closing_markup (?!quicktxt) | - # closing_markup (?=quicktxt) | - # link_delim block_textile_element* link_url | - # image url? | - # code_delim inline_textile_element* code_delim | - # inline_textile_element_not_opening_markup; - # - - defp inline_textile_element(parser, tokens) do - [ - {:b_delim, :b, "", ""}, - {:i_delim, :i, "", ""}, - {:strong_delim, :strong, "", ""}, - {:em_delim, :em, "", ""}, - {:ins_delim, :ins, "", ""}, - {:sup_delim, :sup, "", ""}, - {:del_delim, :del, "", ""}, - {:sub_delim, :sub, "", ""} - ] - |> Enum.find_value(fn {delim_token, state, open_tag, close_tag} -> - simple_lookahead_not( - delim_token, - delim_token, - open_tag, - close_tag, - :quicktxt, - &inline_textile_element/2, - state, - parser, - tokens - ) - |> case do - {:ok, tree, r_tokens} -> - {:ok, tree, r_tokens} - - _ -> - nil - end - end) - |> case do - nil -> inner_inline_textile_element(parser, tokens) - value -> value - end - end - - defp inner_inline_textile_element(parser, [{token, t}, {:quicktxt, q} | r_tokens]) - when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim] - do - case inline_textile_element(parser, [{:quicktxt, q} | r_tokens]) do - {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape(t)}, tree], r2_tokens} - - _ -> - {:ok, [{:text, escape(t)}], [{:quicktxt, q} | r_tokens]} - end - end - defp inner_inline_textile_element(parser, [{:link_delim, open} | r_tokens]) do - case repeat(&block_textile_element/2, parser, r_tokens) do - {:ok, tree, [{:unbracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} -> - href = escape(url) - - {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} - - {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape(open)}, tree], r2_tokens} - end - end - defp inner_inline_textile_element(parser, [{:bracketed_link_open, open} | r_tokens]) do - case repeat(&inline_textile_element/2, parser, r_tokens) do - {:ok, tree, [{:bracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} -> - href = escape(url) - - {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} - - {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape(open)}, tree], r2_tokens} - end - end - defp inner_inline_textile_element(parser, [{token, img}, {:unbracketed_image_url, <<":", url::binary>>} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do - img = parser.image_transform.(img) - - {:ok, [{:markup, ""}], r_tokens} - end - defp inner_inline_textile_element(parser, [{token, img} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do - img = parser.image_transform.(img) - - {:ok, [{:markup, ""}], r_tokens} - end - defp inner_inline_textile_element(parser, [{:code_delim, open} | r_tokens]) do - case parser.state do - %{code: _} -> - {:error, "End of rule"} - - _ -> - case repeat(&inline_textile_element/2, put_state(parser, :code), r_tokens) do - {:ok, tree, [{:code_delim, _} | r2_tokens]} -> - {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} - - {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape(open)}, tree], r2_tokens} - end - end - end - defp inner_inline_textile_element(parser, tokens) do - inline_textile_element_not_opening_markup(parser, tokens) - end - - # - # bq_cite_text = literal | char | space | quicktxt; - # - - # Note that text is not escaped here because it will be escaped - # when the tree is flattened - defp bq_cite_text(_parser, [{:literal, lit} | r_tokens]) do - {:ok, [{:text, lit}], r_tokens} - end - defp bq_cite_text(_parser, [{:char, lit} | r_tokens]) do - {:ok, [{:text, <>}], r_tokens} - end - defp bq_cite_text(_parser, [{:space, _} | r_tokens]) do - {:ok, [{:text, " "}], r_tokens} - end - defp bq_cite_text(_parser, [{:quicktxt, lit} | r_tokens]) do - {:ok, [{:text, <>}], r_tokens} - end - defp bq_cite_text(_parser, _tokens) do - {:error, "Expected cite tokens"} - end - - # - # inline_textile_element_not_opening_markup = - # literal | space | char | - # quicktxt opening_markup quicktxt | - # quicktxt | - # opening_block_tag block_textile_element* closing_block_tag; - # - - defp inline_textile_element_not_opening_markup(_parser, [{:literal, lit} | r_tokens]) do - {:ok, [{:markup, ""}, {:markup, escape(lit)}, {:markup, ""}], r_tokens} - end - defp inline_textile_element_not_opening_markup(_parser, [{:space, _} | r_tokens]) do - {:ok, [{:text, " "}], r_tokens} - end - defp inline_textile_element_not_opening_markup(_parser, [{:char, lit} | r_tokens]) do - {binary, r2_tokens} = assemble_binary(:char, <>, r_tokens) - - {:ok, [{:text, escape(binary)}], r2_tokens} - end - defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, q1}, {token, t}, {:quicktxt, q2} | r_tokens]) - when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim] - do - {:ok, [{:text, escape(<>)}, {:text, escape(t)}, {:text, escape(<>)}], r_tokens} - end - defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, lit} | r_tokens]) do - {:ok, [{:text, escape(<>)}], r_tokens} - end - defp inline_textile_element_not_opening_markup(parser, [{:bq_cite_start, start} | r_tokens]) do - case repeat(&bq_cite_text/2, parser, r_tokens) do - {:ok, tree, [{:bq_cite_open, open} | r2_tokens]} -> - case repeat(&block_textile_element/2, parser, r2_tokens) do - {:ok, tree2, [{:bq_close, _} | r3_tokens]} -> - cite = escape(flatten(tree)) - - {:ok, [{:markup, "
"}, tree2, {:markup, "
"}], r3_tokens} - - {:ok, tree2, r3_tokens} -> - {:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}, tree2], r3_tokens} - - _ -> - {:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}], r_tokens} - end - - _ -> - {:ok, [{:text, escape(start)}], r_tokens} - end - end - defp inline_textile_element_not_opening_markup(_parser, [{:bq_cite_open, tok} | r_tokens]) do - {:ok, [{:text, escape(tok)}], r_tokens} - end - defp inline_textile_element_not_opening_markup(parser, tokens) do - [ - {:bq_open, :bq_close, "
", "
"}, - {:spoiler_open, :spoiler_close, "", ""}, - {:bracketed_b_open, :bracketed_b_close, "", ""}, - {:bracketed_i_open, :bracketed_i_close, "", ""}, - {:bracketed_strong_open, :bracketed_strong_close, "", ""}, - {:bracketed_em_open, :bracketed_em_close, "", ""}, - {:bracketed_code_open, :bracketed_code_close, "", ""}, - {:bracketed_ins_open, :bracketed_ins_close, "", ""}, - {:bracketed_sup_open, :bracketed_sup_close, "", ""}, - {:bracketed_del_open, :bracketed_del_close, "", ""}, - {:bracketed_sub_open, :bracketed_sub_close, "", ""} - ] - |> Enum.find_value(fn {open_token, close_token, open_tag, close_tag} -> - simple_recursive( - open_token, - close_token, - open_tag, - close_tag, - &block_textile_element/2, - parser, - tokens - ) - |> case do - {:ok, tree, r_tokens} -> - {:ok, tree, r_tokens} - - _ -> - nil - end - end) - |> Kernel.||({:error, "Expected block markup"}) - end - - # - # block_textile_element = - # double_newline | newline | inline_textile_element; - # - - defp block_textile_element(_parser, [{:double_newline, _} | r_tokens]) do - {:ok, [{:markup, "

"}], r_tokens} - end - defp block_textile_element(_parser, [{:newline, _} | r_tokens]) do - {:ok, [{:markup, "
"}], r_tokens} - end - defp block_textile_element(parser, tokens) do - inline_textile_element(parser, tokens) - end - - # - # textile = - # (block_textile_element | TOKEN)* eos; - # - - defp textile(parser, tokens) do - case block_textile_element(parser, tokens) do - {:ok, tree, r_tokens} -> - {:ok, tree, r_tokens} - - _ -> - case tokens do - [{_, string} | r_tokens] -> - {:ok, [{:text, escape(string)}], r_tokens} - - _ -> - {:error, "Expected textile"} - end - end - end -end diff --git a/lib/philomena/textile/renderer.ex b/lib/philomena/textile/renderer.ex index 35686bfc..6644e548 100644 --- a/lib/philomena/textile/renderer.ex +++ b/lib/philomena/textile/renderer.ex @@ -1,17 +1,12 @@ defmodule Philomena.Textile.Renderer do # todo: belongs in PhilomenaWeb - alias Textile.Parser, as: SlowParser - alias FastTextile.Parser, as: FastParser + alias Textile.Parser alias Philomena.Images.Image alias Philomena.Repo import Phoenix.HTML import Phoenix.HTML.Link import Ecto.Query - @parser %SlowParser{ - image_transform: &Camo.Image.image_url/1 - } - # Kill bogus compile time dependency on ImageView @image_view Module.concat(["PhilomenaWeb.ImageView"]) @@ -20,17 +15,8 @@ defmodule Philomena.Textile.Renderer do end def render_collection(posts, conn) do - parser = - case conn.cookies["new_parser"] do - "true" -> FastParser - _ -> SlowParser - end - - parsed = - posts - |> Enum.map(fn post -> - parser.parse(@parser, post.body) - end) + opts = %{image_transform: &Camo.Image.image_url/1} + parsed = Enum.map(posts, &Parser.parse(opts, &1.body)) images = parsed diff --git a/lib/philomena_web/controllers/setting_controller.ex b/lib/philomena_web/controllers/setting_controller.ex index 19dd6518..3627b0e8 100644 --- a/lib/philomena_web/controllers/setting_controller.ex +++ b/lib/philomena_web/controllers/setting_controller.ex @@ -40,7 +40,6 @@ defmodule PhilomenaWeb.SettingController do |> set_cookie(user_params, "webm", "webm") |> set_cookie(user_params, "chan_nsfw", "chan_nsfw") |> set_cookie(user_params, "hide_staff_tools", "hide_staff_tools") - |> set_cookie(user_params, "new_parser", "new_parser") end defp set_cookie(conn, params, param_name, cookie_name) do diff --git a/lib/philomena_web/templates/setting/edit.html.slime b/lib/philomena_web/templates/setting/edit.html.slime index e5c0d80c..26bfa00c 100644 --- a/lib/philomena_web/templates/setting/edit.html.slime +++ b/lib/philomena_web/templates/setting/edit.html.slime @@ -122,10 +122,6 @@ h1 Content Settings => label f, :chan_nsfw, "Show NSFW channels" => checkbox f, :chan_nsfw .fieldlabel: i Show streams marked as NSFW on the channels page. - .field - => label f, :new_parser, "Use experimental parser" - => checkbox f, :new_parser - .fieldlabel: i Use the experimental Textile parser. = if staff?(@conn.assigns.current_user) do .field => label f, :hide_staff_tools diff --git a/lib/philomena_web/views/post_view.ex b/lib/philomena_web/views/post_view.ex index 355dad7c..bc28a461 100644 --- a/lib/philomena_web/views/post_view.ex +++ b/lib/philomena_web/views/post_view.ex @@ -1,6 +1,6 @@ defmodule PhilomenaWeb.PostView do alias Philomena.Attribution - alias FastTextile.Parser + alias Textile.Parser use PhilomenaWeb, :view diff --git a/lib/textile/helpers.ex b/lib/textile/helpers.ex deleted file mode 100644 index e874e4c3..00000000 --- a/lib/textile/helpers.ex +++ /dev/null @@ -1,41 +0,0 @@ -defmodule Textile.Helpers do - import NimbleParsec - - # Helper to "undo" a tokenization and convert it back - # to a string - def unwrap([{_name, value}]), do: value - - # Lots of extra unicode space characters - def space do - choice([ - utf8_char('\n\r\f \t\u00a0\u1680\u180e\u202f\u205f\u3000'), - utf8_char([0x2000..0x200a]) - ]) - end - - # Characters which are valid before and after the main markup characters. - def special_characters do - choice([ - space(), - utf8_char('#$%&(),./:;<=?\\`|\'') - ]) - end - - # Simple tag for a markup element that must - # be succeeded immediately by a non-space character - def markup_open_tag(str, char \\ nil, tag_name) do - char = char || binary_head(str) - - open_stops = - choice([ - space(), - string(char) - ]) - - string(str) - |> lookahead_not(open_stops) - |> unwrap_and_tag(:"#{tag_name}_open") - end - - defp binary_head(<>), do: <> -end diff --git a/lib/textile/lexer.ex b/lib/textile/lexer.ex index ab37a295..f9991bd8 100644 --- a/lib/textile/lexer.ex +++ b/lib/textile/lexer.ex @@ -1,15 +1,43 @@ defmodule Textile.Lexer do import NimbleParsec - import Textile.Helpers - import Textile.MarkupLexer - import Textile.UrlLexer + space = + utf8_char('\f \r\t\u00a0\u1680\u180e\u202f\u205f\u3000' ++ Enum.to_list(0x2000..0x200a)) - # Structural tags + extended_space = + choice([ + space, + string("\n"), + eos() + ]) + space_token = + space + |> unwrap_and_tag(:space) + + double_newline = + string("\n") + |> repeat(space) + |> string("\n") + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:double_newline) + + newline = + string("\n") + |> unwrap_and_tag(:newline) + + link_ending_characters = + utf8_char('@#$%&(),.:;<=?\\`|\'') + + bracket_link_ending_characters = + utf8_char('" []') + + end_of_link = + choice([ + concat(link_ending_characters, extended_space), + extended_space + ]) - # Literals enclosed via [== ==] - # Will never contain any markup bracketed_literal = ignore(string("[==")) |> repeat(lookahead_not(string("==]")) |> utf8_char([])) @@ -28,231 +56,183 @@ defmodule Textile.Lexer do |> reduce({List, :to_string, []}) |> unwrap_and_tag(:literal) - blockquote_cite = - lookahead_not(string("\"")) - |> choice([ - literal |> reduce(:unwrap), - utf8_char([]) - ]) - |> repeat() + bq_cite_start = + string("[bq=\"") + |> unwrap_and_tag(:bq_cite_start) - # Blockquote opening tag with cite: [bq="the author"] - # Cite can contain bracketed literals or text - blockquote_open_cite = - ignore(string("[bq=\"")) - |> concat(blockquote_cite) - |> ignore(string("\"]")) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:blockquote_open_cite) + bq_cite_open = + string("\"]") + |> unwrap_and_tag(:bq_cite_open) - # Blockquote opening tag - blockquote_open = + bq_open = string("[bq]") - |> unwrap_and_tag(:blockquote_open) + |> unwrap_and_tag(:bq_open) - # Blockquote closing tag - blockquote_close = + bq_close = string("[/bq]") - |> unwrap_and_tag(:blockquote_close) + |> unwrap_and_tag(:bq_close) - # Spoiler open tag spoiler_open = string("[spoiler]") |> unwrap_and_tag(:spoiler_open) - # Spoiler close tag spoiler_close = string("[/spoiler]") |> unwrap_and_tag(:spoiler_close) - - # Images - - - image_url_with_title = - url_ending_in(string("(")) - |> unwrap_and_tag(:image_url) - |> concat( - ignore(string("(")) - |> repeat(utf8_char(not: ?))) - |> ignore(string(")")) - |> lookahead(string("!")) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:image_title) - ) - - image_url_without_title = - url_ending_in(string("!")) - |> unwrap_and_tag(:image_url) - - image_url = + image_url_scheme = choice([ - image_url_with_title, - image_url_without_title + string("//"), + string("/"), + string("https://"), + string("http://") ]) - bracketed_image_with_link = - ignore(string("[!")) - |> concat(image_url) - |> ignore(string("!:")) - |> concat( - url_ending_in(string("]")) - |> unwrap_and_tag(:image_link_url) - ) - - bracketed_image_without_link = - ignore(string("[!")) - |> concat(image_url) - |> ignore(string("!]")) - - image_with_link = - ignore(string("!")) - |> concat(image_url) - |> ignore(string("!:")) - |> concat( - url_ending_in(space()) - |> unwrap_and_tag(:image_link_url) - ) - - image_without_link = - ignore(string("!")) - |> concat(image_url) - |> ignore(string("!")) - - image = + link_url_scheme = choice([ - bracketed_image_with_link, - bracketed_image_without_link, - image_with_link, - image_without_link + string("#"), + image_url_scheme ]) + unbracketed_url = + string(":") + |> concat(link_url_scheme) + |> repeat(lookahead_not(end_of_link) |> utf8_char([])) - # Links + unbracketed_image_url = + unbracketed_url + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:unbracketed_image_url) - - {link_markup_start, link_markup_element} = markup_ending_in(string("\"")) - - link_url_stop = - choice([ - string("*"), - string("@"), - string("^"), - string("~"), - string(".") |> concat(choice([space(), eos()])), - string("!") |> concat(choice([space(), eos()])), - string(",") |> concat(choice([space(), eos()])), - string("_") |> concat(choice([space(), eos()])), - string("?") |> concat(choice([space(), eos()])), - string(";") |> concat(choice([space(), eos()])), - space(), - eos() - ]) - - link_contents_start = - choice([ - image, - spoiler_open, - spoiler_close, - blockquote_open, - blockquote_open_cite, - blockquote_close, - literal, - link_markup_start - ]) - - link_contents_element = - choice([ - image, - spoiler_open, - spoiler_close, - blockquote_open, - blockquote_open_cite, - blockquote_close, - literal, - link_markup_element - ]) - - link_contents = - optional(link_contents_start) - |> repeat(link_contents_element) - - bracketed_link_end = - string("\":") - |> unwrap_and_tag(:link_end) - |> concat( - url_ending_in(string("]")) - |> ignore(string("]")) - |> unwrap_and_tag(:link_url) - ) - - bracketed_link = - string("[\"") - |> unwrap_and_tag(:link_start) - |> concat(link_contents) - |> concat(bracketed_link_end) - - unbracketed_link_end = - string("\":") - |> unwrap_and_tag(:link_end) - |> concat( - url_ending_in(link_url_stop) - |> unwrap_and_tag(:link_url) - ) - - unbracketed_link = + unbracketed_link_url = string("\"") - |> unwrap_and_tag(:link_start) - |> concat(link_contents) - |> concat(unbracketed_link_end) + |> concat(unbracketed_url) + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:unbracketed_link_url) - link = - choice([ - bracketed_link, - unbracketed_link - ]) + unbracketed_image = + ignore(string("!")) + |> concat(image_url_scheme) + |> repeat(utf8_char(not: ?!)) + |> ignore(string("!")) + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:unbracketed_image) + |> concat(optional(unbracketed_image_url)) + bracketed_image = + ignore(string("[!")) + |> concat(image_url_scheme) + |> repeat(lookahead_not(string("!]")) |> utf8_char([])) + |> ignore(string("!]")) + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:bracketed_image) + |> concat(optional(unbracketed_image_url)) - # Textile + link_delim = + string("\"") + |> unwrap_and_tag(:link_delim) - markup_ends = - choice([ - spoiler_close, - blockquote_close, - eos() - ]) + bracketed_link_open = + string("[\"") + |> unwrap_and_tag(:bracketed_link_open) - {markup_start, markup_element} = markup_ending_in(markup_ends) + bracketed_link_url = + string("\":") + |> concat(link_url_scheme) + |> repeat(lookahead_not(bracket_link_ending_characters) |> utf8_char([])) + |> ignore(string("]")) + |> reduce({List, :to_string, []}) + |> unwrap_and_tag(:bracketed_link_url) - textile_default = - choice([ - literal, - blockquote_open_cite |> optional(markup_start), - blockquote_open |> optional(markup_start), - blockquote_close, - spoiler_open |> optional(markup_start), - spoiler_close, - link, - image - ]) + bracketed_b_open = string("[**") |> unwrap_and_tag(:bracketed_b_open) + bracketed_i_open = string("[__") |> unwrap_and_tag(:bracketed_i_open) + bracketed_strong_open = string("[*") |> unwrap_and_tag(:bracketed_strong_open) + bracketed_em_open = string("[_") |> unwrap_and_tag(:bracketed_em_open) + bracketed_code_open = string("[@") |> unwrap_and_tag(:bracketed_code_open) + bracketed_ins_open = string("[+") |> unwrap_and_tag(:bracketed_ins_open) + bracketed_sup_open = string("[^") |> unwrap_and_tag(:bracketed_sup_open) + bracketed_del_open = string("[-") |> unwrap_and_tag(:bracketed_del_open) + bracketed_sub_open = string("[~") |> unwrap_and_tag(:bracketed_sub_open) - textile_main = - choice([ - textile_default, - markup_element - ]) + bracketed_b_close = string("**]") |> unwrap_and_tag(:bracketed_b_close) + bracketed_i_close = string("__]") |> unwrap_and_tag(:bracketed_i_close) + bracketed_strong_close = string("*]") |> unwrap_and_tag(:bracketed_strong_close) + bracketed_em_close = string("_]") |> unwrap_and_tag(:bracketed_em_close) + bracketed_code_close = string("@]") |> unwrap_and_tag(:bracketed_code_close) + bracketed_ins_close = string("+]") |> unwrap_and_tag(:bracketed_ins_close) + bracketed_sup_close = string("^]") |> unwrap_and_tag(:bracketed_sup_close) + bracketed_del_close = string("-]") |> unwrap_and_tag(:bracketed_del_close) + bracketed_sub_close = string("~]") |> unwrap_and_tag(:bracketed_sub_close) - textile_start = - choice([ - textile_default, - markup_start - ]) + b_delim = string("**") |> unwrap_and_tag(:b_delim) + i_delim = string("__") |> unwrap_and_tag(:i_delim) + strong_delim = string("*") |> unwrap_and_tag(:strong_delim) + em_delim = string("_") |> unwrap_and_tag(:em_delim) + code_delim = string("@") |> unwrap_and_tag(:code_delim) + ins_delim = string("+") |> unwrap_and_tag(:ins_delim) + sup_delim = string("^") |> unwrap_and_tag(:sup_delim) + sub_delim = string("~") |> unwrap_and_tag(:sub_delim) + + del_delim = lookahead_not(string("-"), string(">")) |> unwrap_and_tag(:del_delim) + + quicktxt = + utf8_char('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*@_{}') + |> unwrap_and_tag(:quicktxt) + + char = + utf8_char([]) + |> unwrap_and_tag(:char) textile = - optional(textile_start) - |> repeat(textile_main) + choice([ + literal, + double_newline, + newline, + space_token, + bq_cite_start, + bq_cite_open, + bq_open, + bq_close, + spoiler_open, + spoiler_close, + unbracketed_image, + bracketed_image, + bracketed_link_open, + bracketed_link_url, + unbracketed_link_url, + link_delim, + bracketed_b_open, + bracketed_i_open, + bracketed_strong_open, + bracketed_em_open, + bracketed_code_open, + bracketed_ins_open, + bracketed_sup_open, + bracketed_del_open, + bracketed_sub_open, + bracketed_b_close, + bracketed_i_close, + bracketed_strong_close, + bracketed_em_close, + bracketed_code_close, + bracketed_ins_close, + bracketed_sup_close, + bracketed_del_close, + bracketed_sub_close, + b_delim, + i_delim, + strong_delim, + em_delim, + code_delim, + ins_delim, + sup_delim, + del_delim, + sub_delim, + quicktxt, + char + ]) + |> repeat() |> eos() - defparsec :lex, textile end diff --git a/lib/textile/markup_lexer.ex b/lib/textile/markup_lexer.ex deleted file mode 100644 index 8c839011..00000000 --- a/lib/textile/markup_lexer.ex +++ /dev/null @@ -1,171 +0,0 @@ -defmodule Textile.MarkupLexer do - import NimbleParsec - import Textile.Helpers - - # Markup tags - - def markup_ending_in(ending_sequence) do - double_newline = - string("\n\n") - |> unwrap_and_tag(:double_newline) - - newline = - string("\n") - |> unwrap_and_tag(:newline) - - preceding_whitespace = - choice([ - double_newline, - newline, - special_characters() - ]) - - # The literal tag is special, because - # 1. It needs to capture everything inside it as a distinct token. - # 2. It can be surrounded by markup on all sides. - # 3. If it successfully tokenizes, it will always be in the output. - - literal_open_stops = - choice([ - space(), - ending_sequence, - string("=") - ]) - - literal_close_stops = - lookahead_not( - choice([ - ending_sequence, - string("\n\n"), - string("="), - space() |> concat(string("=")) - ]) - ) - |> utf8_char([]) - - literal = - ignore(string("==")) - |> lookahead_not(literal_open_stops) - |> repeat(literal_close_stops) - |> ignore(string("==")) - |> reduce({List, :to_string, []}) - |> unwrap_and_tag(:literal) - - b_open = markup_open_tag("**", "*", :b) - i_open = markup_open_tag("__", "*", :i) - - strong_open = markup_open_tag("*", :strong) - em_open = markup_open_tag("_", :em) - code_open = markup_open_tag("@", :code) - ins_open = markup_open_tag("+", :ins) - sup_open = markup_open_tag("^", :sup) - del_open = markup_open_tag("-", :del) - sub_open = markup_open_tag("~", :sub) - - b_b_open = markup_open_tag("[**", "*", :b_b) - b_i_open = markup_open_tag("[__", "_", :b_i) - - b_strong_open = markup_open_tag("[*", "*", :b_strong) - b_em_open = markup_open_tag("[_", "_", :b_em) - b_code_open = markup_open_tag("[@", "@", :b_code) - b_ins_open = markup_open_tag("[+", "+", :b_ins) - b_sup_open = markup_open_tag("[^", "^", :b_sup) - b_del_open = markup_open_tag("[-", "-", :b_del) - b_sub_open = markup_open_tag("[~", "~", :b_sub) - - b_b_close = string("**]") |> unwrap_and_tag(:b_b_close) - b_i_close = string("__]") |> unwrap_and_tag(:b_i_close) - - b_strong_close = string("*]") |> unwrap_and_tag(:b_strong_close) - b_em_close = string("_]") |> unwrap_and_tag(:b_em_close) - b_code_close = string("@]") |> unwrap_and_tag(:b_code_close) - b_ins_close = string("+]") |> unwrap_and_tag(:b_ins_close) - b_sup_close = string("^]") |> unwrap_and_tag(:b_sup_close) - b_del_close = string("-]") |> unwrap_and_tag(:b_del_close) - b_sub_close = string("~]") |> unwrap_and_tag(:b_sub_close) - - b_close = string("**") |> unwrap_and_tag(:b_close) - i_close = string("__") |> unwrap_and_tag(:i_close) - - strong_close = string("*") |> unwrap_and_tag(:strong_close) - em_close = string("_") |> unwrap_and_tag(:em_close) - code_close = string("@") |> unwrap_and_tag(:code_close) - ins_close = string("+") |> unwrap_and_tag(:ins_close) - sup_close = string("^") |> unwrap_and_tag(:sup_close) - del_close = string("-") |> unwrap_and_tag(:del_close) - sub_close = string("~") |> unwrap_and_tag(:sub_close) - - bracketed_markup_opening_tags = - choice([ - b_b_open, - b_i_open, - b_strong_open, - b_em_open, - b_code_open, - b_ins_open, - b_sup_open, - b_del_open, - b_sub_open - ]) - - markup_opening_tags = - choice([ - b_open, - i_open, - strong_open, - em_open, - code_open, - ins_open, - sup_open, - del_open |> lookahead_not(string(">")), - sub_open - ]) - - bracketed_markup_closing_tags = - choice([ - b_b_close, - b_i_close, - b_strong_close, - b_em_close, - b_code_close, - b_ins_close, - b_sup_close, - b_del_close, - b_sub_close, - ]) - - markup_closing_tags = - choice([ - b_close, - i_close, - strong_close, - em_close, - code_close, - ins_close, - sup_close, - del_close, - sub_close - ]) - - markup_at_start = - choice([ - times(markup_opening_tags, min: 1), - bracketed_markup_opening_tags - ]) - - markup_element = - lookahead_not(ending_sequence) - |> choice([ - literal, - bracketed_markup_closing_tags, - bracketed_markup_opening_tags |> lookahead_not(space()), - preceding_whitespace |> times(markup_opening_tags, min: 1) |> lookahead_not(ending_sequence), - times(markup_closing_tags, min: 1) |> lookahead(choice([special_characters(), ending_sequence])), - double_newline, - newline, - utf8_char([]) - ]) - - {markup_at_start, markup_element} - end -end diff --git a/lib/textile/parser.ex b/lib/textile/parser.ex index 1b1cfa69..241fb02a 100644 --- a/lib/textile/parser.ex +++ b/lib/textile/parser.ex @@ -1,313 +1,371 @@ defmodule Textile.Parser do - import Textile.ParserHelpers + alias Textile.Lexer + alias Phoenix.HTML - alias Textile.{ - Lexer, - Parser, - TokenCoalescer - } + def parse(parser, input) do + parser = Map.put(parser, :state, %{}) - defstruct [ - image_transform: nil - ] - - def parse(%Parser{} = parser, input) do - with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input |> remove_linefeeds()), - tokens <- TokenCoalescer.coalesce_lex(tokens), - {:ok, tree, []} <- textile_top(parser, tokens), - tree <- TokenCoalescer.coalesce_parse(tree) + with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(String.trim(input)), + {:ok, tree, []} <- repeat(&textile/2, parser, tokens) do - tree - else - err -> - err - end - end - - - # - # Backtracking LL packrat parser for simplified Textile grammar - # - - - # - # textile = (well_formed_including_paragraphs | TOKEN)*; - # - defp textile_top(_parser, []), do: {:ok, [], []} - defp textile_top(parser, tokens) do - with {:ok, tree, r_tokens} <- well_formed_including_paragraphs(parser, nil, tokens), - false <- tree == [], - {:ok, next_tree, r2_tokens} <- textile_top(parser, r_tokens) - do - {:ok, [tree, next_tree], r2_tokens} + partial_flatten(tree) else _ -> - [{_token, string} | r_tokens] = tokens - {:ok, next_tree, r2_tokens} = textile_top(parser, r_tokens) - - {:ok, [{:text, escape_nl2br(string)}, next_tree], r2_tokens} + [] end end - - # - # well_formed_including_paragraphs = (markup | double_newline)*; - # - defp well_formed_including_paragraphs(_parser, _closing_token, []), do: {:ok, [], []} - defp well_formed_including_paragraphs(parser, closing_token, [{:double_newline, _nl} | r_tokens]) do - {:ok, tree, r2_tokens} = well_formed_including_paragraphs(parser, closing_token, r_tokens) - - {:ok, [{:markup, "

"}, tree], r2_tokens} + # Helper to turn a parse tree into a string + def flatten(tree) do + tree + |> List.flatten() + |> Enum.map_join("", fn {_k, v} -> v end) end - defp well_formed_including_paragraphs(parser, closing_token, tokens) do - with {:markup, {:ok, tree, r_tokens}} <- {:markup, markup(parser, tokens)}, - {:ok, next_tree, r2_tokens} <- well_formed_including_paragraphs(parser, closing_token, r_tokens) - do - {:ok, [tree, next_tree], r2_tokens} - else - _ -> - consume_nonclosing(parser, closing_token, tokens) - end + # Helper to escape HTML + defp escape(text) do + text + |> HTML.html_escape() + |> HTML.safe_to_string() end - defp consume_nonclosing(_parser, closing_token, [{closing_token, _string} | _r_tokens] = tokens) do - {:ok, [], tokens} - end - defp consume_nonclosing(parser, closing_token, [{_next_token, string} | r_tokens]) do - {:ok, next_tree, r2_tokens} = well_formed_including_paragraphs(parser, closing_token, r_tokens) + # Helper to turn a parse tree into a list + def partial_flatten(tree) do + tree + |> List.flatten() + |> Enum.chunk_by(fn {k, _v} -> k end) + |> Enum.map(fn list -> + [{type, _v} | _rest] = list - {:ok, [{:text, escape_nl2br(string)}, next_tree], r2_tokens} - end - defp consume_nonclosing(_parser, _closing_token, []) do - {:ok, [], []} + value = Enum.map_join(list, "", fn {_k, v} -> v end) + + {type, value} + end) end - # - # well_formed = (markup)*; - # - defp well_formed(parser, tokens) do - case markup(parser, tokens) do + defp put_state(parser, new_state) do + state = Map.put(parser.state, new_state, true) + Map.put(parser, :state, state) + end + + # Helper corresponding to Kleene star (*) operator + # Match a specificed rule zero or more times + defp repeat(rule, parser, tokens) do + case rule.(parser, tokens) do {:ok, tree, r_tokens} -> - {:ok, next_tree, r2_tokens} = well_formed(parser, r_tokens) - {:ok, [tree, next_tree], r2_tokens} + {:ok, tree2, r2_tokens} = repeat(rule, parser, r_tokens) + {:ok, [tree, tree2], r2_tokens} _ -> {:ok, [], tokens} end end - + # Helper to match a simple recursive grammar rule of the following form: # - # markup = - # blockquote | spoiler | link | image | bold | italic | strong | emphasis | - # code | inserted | superscript | deleted | subscript | newline | literal | - # literal | text; + # open_token callback* close_token # - defp markup(parser, tokens) do - markups = [ - &blockquote/2, &spoiler/2, &link/2, &image/2, &bold/2, &italic/2, &strong/2, - &emphasis/2, &code/2, &inserted/2, &superscript/2, &deleted/2, &subscript/2, - &newline/2, &literal/2, &literal/2, &text/2 - ] + defp simple_recursive(open_token, close_token, open_tag, close_tag, callback, parser, [{open_token, open} | r_tokens]) do + case repeat(callback, parser, r_tokens) do + {:ok, tree, [{^close_token, _} | r2_tokens]} -> + {:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens} - value = - markups - |> Enum.find_value(fn func -> - case func.(parser, tokens) do - {:ok, tree, r_tokens} -> - {:ok, tree, r_tokens} + {:ok, tree, r2_tokens} -> + {:ok, [{:text, escape(open)}, tree], r2_tokens} + end + end + defp simple_recursive(_open_token, _close_token, _open_tag, _close_tag, _callback, _parser, _tokens) do + {:error, "Expected a simple recursive rule"} + end + + # Helper to match a simple recursive grammar rule with negative lookahead: + # + # open_token callback* close_token (?!lookahead_not) + # + defp simple_lookahead_not(open_token, close_token, open_tag, close_tag, lookahead_not, callback, state, parser, [{open_token, open} | r_tokens]) do + case parser.state do + %{^state => _} -> + {:error, "End of rule"} + + _ -> + case r_tokens do + [{forbidden_lookahead, _la} | _] when forbidden_lookahead in [:space, :newline] -> + {:ok, [{:text, escape(open)}], r_tokens} _ -> - nil + case repeat(callback, put_state(parser, state), r_tokens) do + {:ok, tree, [{^close_token, close}, {^lookahead_not, ln} | r2_tokens]} -> + {:ok, [{:text, escape(open)}, tree, {:text, escape(close)}], [{lookahead_not, ln} | r2_tokens]} + + {:ok, tree, [{^close_token, _} | r2_tokens]} -> + {:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens} + + {:ok, tree, r2_tokens} -> + {:ok, [{:text, escape(open)}, tree], r2_tokens} + end end - end) - - value || {:error, "Expected markup"} + end + end + defp simple_lookahead_not(_open_token, _close_token, _open_tag, _close_tag, _lookahead_not, _callback, _state, _parser, _tokens) do + {:error, "Expected a simple lookahead not rule"} end + # Helper to efficiently assemble a UTF-8 binary from tokens of the + # given type + defp assemble_binary(token_type, accumulator, [{token_type, t} | stream]) do + assemble_binary(token_type, accumulator <> <>, stream) + end + defp assemble_binary(_token_type, accumulator, tokens), do: {accumulator, tokens} # - # blockquote = - # blockquote_open_cite well_formed_including_paragraphs blockquote_close | - # blockquote_open well_formed_including_paragraphs blockquote_close; + # inline_textile_element = + # opening_markup inline_textile_element* closing_markup (?!quicktxt) | + # closing_markup (?=quicktxt) | + # link_delim block_textile_element* link_url | + # image url? | + # code_delim inline_textile_element* code_delim | + # inline_textile_element_not_opening_markup; # - defp blockquote(parser, [{:blockquote_open_cite, author} | r_tokens]) do - case well_formed_including_paragraphs(parser, :blockquote_close, r_tokens) do - {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} -> - {:ok, [{:markup, ~s|
|}, tree, {:markup, ~s|
|}], r2_tokens} - {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape_nl2br(~s|[bq="#{author}"]|)}, tree], r2_tokens} + defp inline_textile_element(parser, tokens) do + [ + {:b_delim, :b, "", ""}, + {:i_delim, :i, "", ""}, + {:strong_delim, :strong, "", ""}, + {:em_delim, :em, "", ""}, + {:ins_delim, :ins, "", ""}, + {:sup_delim, :sup, "", ""}, + {:del_delim, :del, "", ""}, + {:sub_delim, :sub, "", ""} + ] + |> Enum.find_value(fn {delim_token, state, open_tag, close_tag} -> + simple_lookahead_not( + delim_token, + delim_token, + open_tag, + close_tag, + :quicktxt, + &inline_textile_element/2, + state, + parser, + tokens + ) + |> case do + {:ok, tree, r_tokens} -> + {:ok, tree, r_tokens} + + _ -> + nil + end + end) + |> case do + nil -> inner_inline_textile_element(parser, tokens) + value -> value end end - defp blockquote(parser, [{:blockquote_open, open} | r_tokens]) do - case well_formed_including_paragraphs(parser, :blockquote_close, r_tokens) do - {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} -> - {:ok, [{:markup, ~s|
|}, tree, {:markup, ~s|
|}], r2_tokens} - + defp inner_inline_textile_element(parser, [{token, t}, {:quicktxt, q} | r_tokens]) + when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim] + do + case inline_textile_element(parser, [{:quicktxt, q} | r_tokens]) do {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens} + {:ok, [{:text, escape(t)}, tree], r2_tokens} + + _ -> + {:ok, [{:text, escape(t)}], [{:quicktxt, q} | r_tokens]} end end + defp inner_inline_textile_element(parser, [{:link_delim, open} | r_tokens]) do + case repeat(&block_textile_element/2, parser, r_tokens) do + {:ok, tree, [{:unbracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} -> + href = escape(url) - defp blockquote(_parser, _tokens), - do: {:error, "Expected a blockquote tag with optional citation"} - - - # - # spoiler = - # spoiler_open well_formed_including_paragraphs spoiler_close; - # - defp spoiler(parser, [{:spoiler_open, open} | r_tokens]) do - case well_formed_including_paragraphs(parser, :spoiler_close, r_tokens) do - {:ok, tree, [{:spoiler_close, _close} | r2_tokens]} -> - {:ok, [{:markup, ~s||}, tree, {:markup, ~s||}], r2_tokens} + {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens} + {:ok, [{:text, escape(open)}, tree], r2_tokens} end end + defp inner_inline_textile_element(parser, [{:bracketed_link_open, open} | r_tokens]) do + case repeat(&inline_textile_element/2, parser, r_tokens) do + {:ok, tree, [{:bracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} -> + href = escape(url) - defp spoiler(_parser, _tokens), - do: {:error, "Expected a spoiler tag"} - - - # - # link = - # link_start well_formed_including_paragraphs link_end link_url; - # - defp link(parser, [{:link_start, start} | r_tokens]) do - case well_formed_including_paragraphs(parser, :link_end, r_tokens) do - {:ok, tree, [{:link_end, _end}, {:link_url, url} | r2_tokens]} -> - {:ok, [{:markup, ~s||}, tree, {:markup, ~s||}], r2_tokens} + {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape_nl2br(start)}, tree], r2_tokens} + {:ok, [{:text, escape(open)}, tree], r2_tokens} end end + defp inner_inline_textile_element(parser, [{token, img}, {:unbracketed_image_url, <<":", url::binary>>} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do + img = parser.image_transform.(img) - defp link(_parser, _tokens), - do: {:error, "Expected a link"} + {:ok, [{:markup, ""}], r_tokens} + end + defp inner_inline_textile_element(parser, [{token, img} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do + img = parser.image_transform.(img) + {:ok, [{:markup, ""}], r_tokens} + end + defp inner_inline_textile_element(parser, [{:code_delim, open} | r_tokens]) do + case parser.state do + %{code: _} -> + {:error, "End of rule"} - # - # image = - # image_url image_title? image_link_url?; - # - defp image(parser, [{:image_url, image_url}, {:image_title, title}, {:image_link_url, link_url} | r_tokens]) do - image_url = parser.image_transform.(image_url) + _ -> + case repeat(&inline_textile_element/2, put_state(parser, :code), r_tokens) do + {:ok, tree, [{:code_delim, _} | r2_tokens]} -> + {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} - {:ok, [markup: ~s||], r_tokens} + {:ok, tree, r2_tokens} -> + {:ok, [{:text, escape(open)}, tree], r2_tokens} + end + end + end + defp inner_inline_textile_element(parser, tokens) do + inline_textile_element_not_opening_markup(parser, tokens) end - defp image(parser, [{:image_url, image_url}, {:image_title, title} | r_tokens]) do - image_url = parser.image_transform.(image_url) + # + # bq_cite_text = literal | char | space | quicktxt; + # - {:ok, [markup: ~s||], r_tokens} + # Note that text is not escaped here because it will be escaped + # when the tree is flattened + defp bq_cite_text(_parser, [{:literal, lit} | r_tokens]) do + {:ok, [{:text, lit}], r_tokens} + end + defp bq_cite_text(_parser, [{:char, lit} | r_tokens]) do + {:ok, [{:text, <>}], r_tokens} + end + defp bq_cite_text(_parser, [{:space, _} | r_tokens]) do + {:ok, [{:text, " "}], r_tokens} + end + defp bq_cite_text(_parser, [{:quicktxt, lit} | r_tokens]) do + {:ok, [{:text, <>}], r_tokens} + end + defp bq_cite_text(_parser, _tokens) do + {:error, "Expected cite tokens"} end - defp image(parser, [{:image_url, image_url}, {:image_link_url, link_url} | r_tokens]) do - image_url = parser.image_transform.(image_url) + # + # inline_textile_element_not_opening_markup = + # literal | space | char | + # quicktxt opening_markup quicktxt | + # quicktxt | + # opening_block_tag block_textile_element* closing_block_tag; + # - {:ok, [markup: ~s||], r_tokens} + defp inline_textile_element_not_opening_markup(_parser, [{:literal, lit} | r_tokens]) do + {:ok, [{:markup, ""}, {:markup, escape(lit)}, {:markup, ""}], r_tokens} + end + defp inline_textile_element_not_opening_markup(_parser, [{:space, _} | r_tokens]) do + {:ok, [{:text, " "}], r_tokens} + end + defp inline_textile_element_not_opening_markup(_parser, [{:char, lit} | r_tokens]) do + {binary, r2_tokens} = assemble_binary(:char, <>, r_tokens) + + {:ok, [{:text, escape(binary)}], r2_tokens} + end + defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, q1}, {token, t}, {:quicktxt, q2} | r_tokens]) + when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim] + do + {:ok, [{:text, escape(<>)}, {:text, escape(t)}, {:text, escape(<>)}], r_tokens} + end + defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, lit} | r_tokens]) do + {:ok, [{:text, escape(<>)}], r_tokens} + end + defp inline_textile_element_not_opening_markup(parser, [{:bq_cite_start, start} | r_tokens]) do + case repeat(&bq_cite_text/2, parser, r_tokens) do + {:ok, tree, [{:bq_cite_open, open} | r2_tokens]} -> + case repeat(&block_textile_element/2, parser, r2_tokens) do + {:ok, tree2, [{:bq_close, _} | r3_tokens]} -> + cite = escape(flatten(tree)) + + {:ok, [{:markup, "
"}, tree2, {:markup, "
"}], r3_tokens} + + {:ok, tree2, r3_tokens} -> + {:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}, tree2], r3_tokens} + + _ -> + {:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}], r_tokens} + end + + _ -> + {:ok, [{:text, escape(start)}], r_tokens} + end + end + defp inline_textile_element_not_opening_markup(_parser, [{:bq_cite_open, tok} | r_tokens]) do + {:ok, [{:text, escape(tok)}], r_tokens} + end + defp inline_textile_element_not_opening_markup(parser, tokens) do + [ + {:bq_open, :bq_close, "
", "
"}, + {:spoiler_open, :spoiler_close, "", ""}, + {:bracketed_b_open, :bracketed_b_close, "", ""}, + {:bracketed_i_open, :bracketed_i_close, "", ""}, + {:bracketed_strong_open, :bracketed_strong_close, "", ""}, + {:bracketed_em_open, :bracketed_em_close, "", ""}, + {:bracketed_code_open, :bracketed_code_close, "", ""}, + {:bracketed_ins_open, :bracketed_ins_close, "", ""}, + {:bracketed_sup_open, :bracketed_sup_close, "", ""}, + {:bracketed_del_open, :bracketed_del_close, "", ""}, + {:bracketed_sub_open, :bracketed_sub_close, "", ""} + ] + |> Enum.find_value(fn {open_token, close_token, open_tag, close_tag} -> + simple_recursive( + open_token, + close_token, + open_tag, + close_tag, + &block_textile_element/2, + parser, + tokens + ) + |> case do + {:ok, tree, r_tokens} -> + {:ok, tree, r_tokens} + + _ -> + nil + end + end) + |> Kernel.||({:error, "Expected block markup"}) end - defp image(parser, [{:image_url, image_url} | r_tokens]) do - image_url = parser.image_transform.(image_url) + # + # block_textile_element = + # double_newline | newline | inline_textile_element; + # - {:ok, [markup: ~s||], r_tokens} + defp block_textile_element(_parser, [{:double_newline, _} | r_tokens]) do + {:ok, [{:markup, "

"}], r_tokens} + end + defp block_textile_element(_parser, [{:newline, _} | r_tokens]) do + {:ok, [{:markup, "
"}], r_tokens} + end + defp block_textile_element(parser, tokens) do + inline_textile_element(parser, tokens) end - defp image(_parser, _tokens), - do: {:error, "Expected an image tag"} - # - # bold = - # b_open well_formed b_close | - # b_b_open well_formed b_b_close; - # - attribute_parser(:bold, :b_open, :b_close, "", "") - - # - # italic = - # i_open well_formed i_close | - # b_i_open well_formed b_i_close; - # - attribute_parser(:italic, :i_open, :i_close, "", "") - - # - # strong = - # strong_open well_formed strong_close | - # b_strong_open well_formed b_strong_close; - # - attribute_parser(:strong, :strong_open, :strong_close, "", "") - - # - # emphasis = - # em_open well_formed em_close | - # b_em_open well_formed b_em_close; - # - attribute_parser(:emphasis, :em_open, :em_close, "", "") - - # - # code = - # code_open well_formed code_close | - # b_code_open well_formed b_code_close; - # - attribute_parser(:code, :code_open, :code_close, "", "") - - # - # inserted = - # ins_open well_formed ins_close | - # b_ins_open well_formed b_ins_close; - # - attribute_parser(:inserted, :ins_open, :ins_close, "", "") - - # - # superscript = - # sup_open well_formed sup_close | - # b_sup_open well_formed b_sup_close; - # - attribute_parser(:superscript, :sup_open, :sup_close, "", "") - - # - # deleted = - # del_open well_formed del_close | - # b_del_open well_formed b_del_close; - # - attribute_parser(:deleted, :del_open, :del_close, "", "") - - # - # subscript = - # sub_open well_formed sub_close | - # b_sub_open well_formed b_sub_close; - # - attribute_parser(:subscript, :sub_open, :sub_close, "", "") - - - # - # Terminals + # textile = + # (block_textile_element | TOKEN)* eos; # - defp literal(_parser, [{:literal, text} | r_tokens]), - do: {:ok, [markup: escape_nl2br(text)], r_tokens} + defp textile(parser, tokens) do + case block_textile_element(parser, tokens) do + {:ok, tree, r_tokens} -> + {:ok, tree, r_tokens} - defp literal(_parser, _tokens), - do: {:error, "Expected a literal"} + _ -> + case tokens do + [{_, string} | r_tokens] -> + {:ok, [{:text, escape(string)}], r_tokens} - - defp newline(_parser, [{:newline, _nl} | r_tokens]), - do: {:ok, [markup: "
"], r_tokens} - - defp newline(_parser, _tokens), - do: {:error, "Expected a line break"} - - - defp text(_parser, [{:text, text} | r_tokens]), - do: {:ok, [text: escape_nl2br(text)], r_tokens} - - defp text(_parser, _tokens), - do: {:error, "Expected text"} + _ -> + {:error, "Expected textile"} + end + end + end end diff --git a/lib/textile/parser_helpers.ex b/lib/textile/parser_helpers.ex deleted file mode 100644 index bef206e6..00000000 --- a/lib/textile/parser_helpers.ex +++ /dev/null @@ -1,47 +0,0 @@ -defmodule Textile.ParserHelpers do - import Phoenix.HTML - - defmacro attribute_parser(name, open_token, close_token, open_tag, close_tag) do - quote do - defp unquote(name)(parser, [{unquote(open_token), open} | r_tokens]) do - case well_formed(parser, r_tokens) do - {:ok, tree, [{unquote(close_token), _close} | r2_tokens]} -> - {:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens} - - {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape_html(open)}, tree], r2_tokens} - end - end - - defp unquote(name)(parser, [{unquote(:"b_#{open_token}"), open} | r_tokens]) do - case well_formed(parser, r_tokens) do - {:ok, tree, [{unquote(:"b_#{close_token}"), _close} | r2_tokens]} -> - {:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens} - - {:ok, tree, r2_tokens} -> - {:ok, [{:text, escape_html(open)}, tree], r2_tokens} - end - end - - defp unquote(name)(_parser, _tokens), - do: {:error, "Expected #{unquote(name)} tag"} - end - end - - def remove_linefeeds(text) do - text - |> to_string() - |> String.replace("\r", "") - end - - def escape_nl2br(text) do - text - |> String.split("\n") - |> Enum.map(&escape_html(&1)) - |> Enum.join("
") - end - - def escape_html(text) do - html_escape(text) |> safe_to_string() - end -end diff --git a/lib/textile/token_coalescer.ex b/lib/textile/token_coalescer.ex deleted file mode 100644 index 65f02a96..00000000 --- a/lib/textile/token_coalescer.ex +++ /dev/null @@ -1,31 +0,0 @@ -defmodule Textile.TokenCoalescer do - # The lexer, as a practical concern, does not coalesce runs of - # character tokens. This fixes that. - def coalesce_lex(tokens) do - tokens - |> Enum.chunk_by(&is_number(&1)) - |> Enum.flat_map(fn - [t | _rest] = str when is_number(t) -> - [text: List.to_string(str)] - - t -> - t - end) - end - - def coalesce_parse(tokens) do - tokens - |> List.flatten() - |> Enum.chunk_by(fn {k, _v} -> k == :text end) - |> Enum.flat_map(fn t -> - [{type, _v} | _rest] = t - - value = - t - |> Enum.map(fn {_k, v} -> v end) - |> Enum.join("") - - [{type, value}] - end) - end -end \ No newline at end of file diff --git a/lib/textile/url_lexer.ex b/lib/textile/url_lexer.ex deleted file mode 100644 index 112df9a1..00000000 --- a/lib/textile/url_lexer.ex +++ /dev/null @@ -1,34 +0,0 @@ -defmodule Textile.UrlLexer do - import NimbleParsec - - def url_ending_in(ending_sequence) do - domain_character = - choice([ - ascii_char([?a..?z]), - ascii_char([?A..?Z]), - ascii_char([?0..?9]), - string("-") - ]) - - domain = - repeat( - choice([ - domain_character |> string(".") |> concat(domain_character), - domain_character - ]) - ) - - scheme_and_domain = - choice([ - string("#"), - string("/"), - string("data:image/"), - string("https://") |> concat(domain), - string("http://") |> concat(domain) - ]) - - scheme_and_domain - |> repeat(lookahead_not(ending_sequence) |> utf8_char([])) - |> reduce({List, :to_string, []}) - end -end \ No newline at end of file