diff --git a/lib/fast_textile/lexer.ex b/lib/fast_textile/lexer.ex index 670f1d76..302081ca 100644 --- a/lib/fast_textile/lexer.ex +++ b/lib/fast_textile/lexer.ex @@ -118,8 +118,7 @@ defmodule FastTextile.Lexer do |> repeat(lookahead_not(end_of_link) |> utf8_char([])) unbracketed_image_url = - string("!") - |> concat(unbracketed_url) + unbracketed_url |> reduce({List, :to_string, []}) |> unwrap_and_tag(:unbracketed_image_url) @@ -129,9 +128,9 @@ defmodule FastTextile.Lexer do |> reduce({List, :to_string, []}) |> unwrap_and_tag(:unbracketed_link_url) - unbracketed_link_delim = + link_delim = string("\"") - |> unwrap_and_tag(:unbracketed_link_delim) + |> unwrap_and_tag(:link_delim) bracketed_link_open = string("[\"") @@ -165,16 +164,24 @@ defmodule FastTextile.Lexer do bracketed_del_close = string("-]") |> unwrap_and_tag(:bracketed_del_close) bracketed_sub_close = string("~]") |> unwrap_and_tag(:bracketed_sub_close) - b_delim = string("**") |> unwrap_and_tag(:unbracketed_b_delim) - i_delim = string("__") |> unwrap_and_tag(:unbracketed_i_delim) - strong_delim = string("*") |> unwrap_and_tag(:unbracketed_strong_delim) - em_delim = string("_") |> unwrap_and_tag(:unbracketed_em_delim) - code_delim = string("@") |> unwrap_and_tag(:unbracketed_code_delim) - ins_delim = string("+") |> unwrap_and_tag(:unbracketed_ins_delim) - sup_delim = string("^") |> unwrap_and_tag(:unbracketed_sup_delim) - sub_delim = string("~") |> unwrap_and_tag(:unbracketed_sub_delim) + b_delim = string("**") |> unwrap_and_tag(:b_delim) + i_delim = string("__") |> unwrap_and_tag(:i_delim) + strong_delim = string("*") |> unwrap_and_tag(:strong_delim) + em_delim = string("_") |> unwrap_and_tag(:em_delim) + code_delim = string("@") |> unwrap_and_tag(:code_delim) + ins_delim = string("+") |> unwrap_and_tag(:ins_delim) + sup_delim = string("^") |> unwrap_and_tag(:sup_delim) + sub_delim = string("~") |> unwrap_and_tag(:sub_delim) - del_delim = lookahead_not(string("-"), string(">")) |> unwrap_and_tag(:unbracketed_del_delim) + del_delim = lookahead_not(string("-"), string(">")) |> unwrap_and_tag(:del_delim) + + quicktxt = + utf8_char('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*@_{}') + |> unwrap_and_tag(:quicktxt) + + char = + utf8_char([]) + |> unwrap_and_tag(:char) textile = choice([ @@ -194,7 +201,7 @@ defmodule FastTextile.Lexer do bracketed_link_url, unbracketed_link_url, unbracketed_image_url, - unbracketed_link_delim, + link_delim, bracketed_b_open, bracketed_i_open, bracketed_strong_open, @@ -222,7 +229,8 @@ defmodule FastTextile.Lexer do sup_delim, del_delim, sub_delim, - utf8_char([]) + quicktxt, + char ]) |> repeat() |> eos() diff --git a/lib/fast_textile/parser.ex b/lib/fast_textile/parser.ex index b5cbf95e..cbddd778 100644 --- a/lib/fast_textile/parser.ex +++ b/lib/fast_textile/parser.ex @@ -1,97 +1,12 @@ defmodule FastTextile.Parser do - @moduledoc """ - Textile parser. - - # Block rules - - - bracketed_literal = bracketed_literal(tok); - literal = literal(tok) - - bq_cite = - bq_cite_start (literal newline double_newline char)* bq_cite_open block_markup bq_close; - - bq = - bq_open block_markup bq_close; - - spoiler = - spoiler_open block_markup spoiler_close; - - bracketed_link = - bracketed_link_open block_markup unwrap(bracketed_link_url); - - bracketed_image_with_link = - bracketed_image(tok) unwrap(unbracketed_url); - - bracketed_image = - bracketed_image(tok); - - - # Bracketed markup rules - - - bracketed_bold = bracketed_b_open block_markup bracketed_b_close; - bracketed_italic = bracketed_i_open block_markup bracketed_i_close; - bracketed_strong = bracketed_strong_open block_markup bracketed_strong_close; - bracketed_em = bracketed_em_open block_markup bracketed_em_close; - bracketed_code = bracketed_code_open block_markup bracketed_code_close; - bracketed_ins = bracketed_ins_open block_markup bracketed_ins_close; - bracketed_sup = bracketed_sup_open block_markup bracketed_sup_close; - bracketed_del = bracketed_del_open block_markup bracketed_del_close; - bracketed_sub = bracketed_sub_open block_markup bracketed_sub_close; - - - # Unbracketed markup rules - - - unbracketed_image_with_link = - unbracketed_image(tok) unbracketed_url; - - unbracketed_image = - unbracketed_image(tok); - - - # N.B.: the following rules use a special construction that is not really - # representable in any BNF I'm aware of, but it simply holds the current - # context and prevents rules from recursing into themselves. - - - link = - unbracketed_link_delim block_markup unbracketed_link_delim unbracketed_url; - - bold = b_delim inline_markup b_delim; - italic = i_delim inline_markup i_delim; - strong = strong_delim inline_markup strong_delim; - em = em_delim inline_markup em_delim; - code = code_delim inline_markup code_delim; - ins = ins_delim inline_markup ins_delim; - sup = sup_delim inline_markup sup_delim; - del = del_delim inline_markup del_delim; - sub = sub_delim inline_markup sub_delim; - - - newline = newline(tok); - - - # Top level - inline_markup = - (bracketed_literal | literal | bq_cite | bq | spoiler | bracketed_link | - bracketed_image_with_link | bracketed_image | bracketed_bold | - bracketed_italic | bracketed_strong | bracketed_em | bracketed_code | - bracketed_ins | bracketed_sup | bracketed_del | bracketed_sub | - unbracketed_image_with_link | unbracketed_image | link | bold | italic | - strong | em | code | ins | sup | del | sub | newline | char)*; - - block_markup = - (inline_markup double_newline)*; - """ - alias FastTextile.Lexer alias Phoenix.HTML def parse(parser, input) do + parser = Map.put(parser, :state, %{}) + with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(String.trim(input)), - {:ok, tree, []} <- textile_top(parser, tokens) + {:ok, tree, []} <- repeat(&textile/2, parser, tokens) do partial_flatten(tree) else @@ -100,278 +15,347 @@ defmodule FastTextile.Parser do end end - defp textile_top(_parser, []), do: {:ok, [], []} - defp textile_top(parser, tokens) do - with {:ok, tree, r_tokens} <- block_markup(parser, tokens, %{}), - false <- tree == [], - {:ok, next_tree, r2_tokens} <- textile_top(parser, r_tokens) - do - {:ok, [tree, next_tree], r2_tokens} - else - _ -> - [{_token, string} | r_tokens] = tokens - {:ok, next_tree, r2_tokens} = textile_top(parser, r_tokens) - - {:ok, [{:text, escape(string)}, next_tree], r2_tokens} - end + # Helper to turn a parse tree into a string + def flatten(tree) do + tree + |> List.flatten() + |> Enum.map_join("", fn {_k, v} -> v end) end - defp block_markup(parser, tokens, state) do - case block_markup_element(parser, tokens, state) do - {:ok, tree, r_tokens} when tree != [] -> - {:ok, next_tree, r2_tokens} = block_markup(parser, r_tokens, state) - - {:ok, [tree, next_tree], r2_tokens} - - _ -> - {:ok, [], tokens} - end + # Helper to escape HTML + defp escape(text) do + text + |> HTML.html_escape() + |> HTML.safe_to_string() end - defp block_markup_element(_parser, [{:double_newline, _} | r_tokens], _state), do: {:ok, [{:markup, "

"}], r_tokens} - defp block_markup_element(parser, tokens, state), do: inline_markup(parser, tokens, state) + # Helper to turn a parse tree into a list + def partial_flatten(tree) do + tree + |> List.flatten() + |> Enum.chunk_by(fn {k, _v} -> k end) + |> Enum.flat_map(fn list -> + [{type, _v} | _rest] = list - defp inline_markup(parser, tokens, state) do - case inline_markup_element(parser, tokens, state) do + value = Enum.map_join(list, "", fn {_k, v} -> v end) + + [{type, value}] + end) + end + + defp put_state(parser, new_state) do + state = Map.put(parser.state, new_state, true) + Map.put(parser, :state, state) + end + + # Helper corresponding to Kleene star (*) operator + # Match a specificed rule zero or more times + defp repeat(rule, parser, tokens) do + case rule.(parser, tokens) do {:ok, tree, r_tokens} -> - {:ok, next_tree, r2_tokens} = inline_markup(parser, r_tokens, state) - - {:ok, [tree, next_tree], r2_tokens} + {:ok, tree2, r2_tokens} = repeat(rule, parser, r_tokens) + {:ok, [tree, tree2], r2_tokens} _ -> {:ok, [], tokens} end end - defp inline_markup_element(parser, tokens, state) do - markups = [ - &literal/3, &blockquote_cite/3, &blockquote/3, &spoiler/3, - &bracketed_link/3, &bracketed_image_with_link/3, &bracketed_image/3, - &bracketed_bold/3, &bracketed_italic/3, &bracketed_strong/3, - &bracketed_em/3, &bracketed_code/3, &bracketed_ins/3, &bracketed_sup/3, - &bracketed_del/3, &bracketed_sub/3, &unbracketed_image_with_link/3, - &unbracketed_image/3, &link/3, &bold/3, &italic/3, &strong/3, &em/3, - &code/3, &ins/3, &sup/3, &del/3, &sub/3, &newline/3, &char/3, &space/3 - ] + # Helper to match a simple recursive grammar rule of the following form: + # + # open_token callback* close_token + # + defp simple_recursive(open_token, close_token, open_tag, close_tag, callback, parser, [{open_token, open} | r_tokens]) do + case repeat(callback, parser, r_tokens) do + {:ok, tree, [{^close_token, _} | r2_tokens]} -> + {:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens} - value = - markups - |> Enum.find_value(fn func -> - case func.(parser, tokens, state) do - {:ok, tree, r_tokens} -> - {:ok, tree, r_tokens} + {:ok, tree, r2_tokens} -> + {:ok, [{:text, escape(open)}, tree], r2_tokens} + end + end + defp simple_recursive(_open_token, _close_token, _open_tag, _close_tag, _callback, _parser, _tokens) do + {:error, "Expected a simple recursive rule"} + end - _ -> - nil + # Helper to match a simple recursive grammar rule with negative lookahead: + # + # open_token callback* close_token (?!lookahead_not) + # + defp simple_lookahead_not(open_token, close_token, open_tag, close_tag, lookahead_not, callback, state, parser, [{open_token, open} | r_tokens]) do + case parser.state do + %{^state => _} -> + {:error, "End of rule"} + + _ -> + case repeat(callback, put_state(parser, state), r_tokens) do + {:ok, tree, [{^close_token, close}, {^lookahead_not, ln} | r2_tokens]} -> + {:ok, [{:text, escape(open)}, tree, {:text, escape(close)}], [{lookahead_not, ln} | r2_tokens]} + + {:ok, tree, [{^close_token, _} | r2_tokens]} -> + {:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens} + + {:ok, tree, r2_tokens} -> + {:ok, [{:text, escape(open)}, tree], r2_tokens} end - end) - - value || {:error, "Expected inline markup"} + end + end + defp simple_lookahead_not(_open_token, _close_token, _open_tag, _close_tag, _lookahead_not, _callback, _state, _parser, _tokens) do + {:error, "Expected a simple lookahead not rule"} end - defp blockquote_cite_text(tokens) do - case blockquote_cite_element(tokens) do - {:ok, tree, r_tokens} -> - {:ok, next_tree, r2_tokens} = blockquote_cite_text(r_tokens) + # Helper to efficiently assemble a UTF-8 binary from tokens of the + # given type + defp assemble_binary(token_type, accumulator, [{token_type, t} | stream]) do + assemble_binary(token_type, accumulator <> <>, stream) + end + defp assemble_binary(_token_type, accumulator, tokens), do: {accumulator, tokens} - {:ok, [tree, next_tree], r2_tokens} + # + # inline_textile_element = + # opening_markup inline_textile_element* closing_markup (?!quicktxt) | + # link_delim inline_textile_element* link_url | + # image url? | + # code_delim inline_textile_element* code_delim | + # inline_textile_element_not_opening_markup; + # - _ -> - {:ok, [], tokens} + defp inline_textile_element(parser, tokens) do + # IO.puts "block_textile_element" + # IO.inspect [parser, tokens] + # IO.inspect Process.info(self(), :current_stacktrace) + # :timer.sleep(:timer.seconds(2)) + + [ + {:b_delim, :b, "", ""}, + {:i_delim, :i, "", ""}, + {:strong_delim, :strong, "", ""}, + {:em_delim, :em, "", ""}, + {:ins_delim, :ins, "", ""}, + {:sup_delim, :sup, "", ""}, + {:del_delim, :del, "", ""}, + {:sub_delim, :sub, "", ""} + ] + |> Enum.find_value(fn {delim_token, state, open_tag, close_tag} -> + simple_lookahead_not( + delim_token, + delim_token, + open_tag, + close_tag, + :quicktxt, + &inline_textile_element/2, + state, + parser, + tokens + ) + |> case do + {:ok, tree, r_tokens} -> + {:ok, tree, r_tokens} + + _ -> + nil + end + end) + |> case do + nil -> inner_inline_textile_element(parser, tokens) + value -> value end end - # Text is not escaped here because it will be escaped when it is read into - # the author attribute of the
. - defp blockquote_cite_element([{:literal, lit} | r_tokens]), do: {:ok, [{:text, lit}], r_tokens} - defp blockquote_cite_element([{:space, _} | r_tokens]), do: {:ok, [{:text, " "}], r_tokens} - defp blockquote_cite_element([tok | r_tokens]) when is_integer(tok) do - {rest, r2_tokens} = extract_string(r_tokens, "") + defp inner_inline_textile_element(parser, [{:link_delim, open} | r_tokens]) do + case repeat(&inline_textile_element/2, parser, r_tokens) do + {:ok, tree, [{:unbracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} -> + href = escape(url) - {:ok, [{:text, <> <> rest}], r2_tokens} + {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} + + {:ok, tree, r2_tokens} -> + {:ok, [{:markup, escape(open)}, tree], r2_tokens} + end end - defp blockquote_cite_element(_tokens), do: {:error, "Expected a blockquote cite token"} + defp inner_inline_textile_element(parser, [{:bracketed_link_open, open} | r_tokens]) do + case repeat(&inline_textile_element/2, parser, r_tokens) do + {:ok, tree, [{:bracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} -> + href = escape(url) - defp literal(_parser, [{:literal, literal} | r_tokens], _state), do: {:ok, [{:markup, ""}, {:markup, escape(literal)}, {:markup, ""}], r_tokens} - defp literal(_parser, _tokens, _state), do: {:error, "Expected a bracketed literal"} + {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} - defp blockquote_cite(parser, [{:bq_cite_start, start} | r_tokens], state) do - case blockquote_cite_text(r_tokens) do - {:ok, tree, [{:bq_cite_open, open} | r2_tokens]} -> - case block_markup(parser, r2_tokens, state) do + {:ok, tree, r2_tokens} -> + {:ok, [{:markup, escape(open)}, tree], r2_tokens} + end + end + defp inner_inline_textile_element(parser, [{token, img}, {:unbracketed_image_url, <<":", url::binary>>} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do + img = parser.image_transform.(img) + + {:ok, [{:markup, ""}], r_tokens} + end + defp inner_inline_textile_element(parser, [{token, img} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do + img = parser.image_transform.(img) + + {:ok, [{:markup, ""}], r_tokens} + end + defp inner_inline_textile_element(_parser, [{:unbracketed_image_url, img} | r_tokens]) do + {:ok, [{:text, escape(img)}], r_tokens} + end + defp inner_inline_textile_element(parser, [{:code_delim, open} | r_tokens]) do + case parser.state do + %{code: _} -> + {:error, "End of rule"} + + _ -> + case repeat(&inline_textile_element/2, put_state(parser, :code), r_tokens) do + {:ok, tree, [{:code_delim, _} | r2_tokens]} -> + {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} + + {:ok, tree, r2_tokens} -> + {:ok, [{:markup, escape(open)}, tree], r2_tokens} + end + end + end + defp inner_inline_textile_element(parser, tokens) do + inline_textile_element_not_opening_markup(parser, tokens) + end + + # + # inline_textile_element_not_opening_markup = + # literal inline_textile_element | + # newline inline_textile_element | + # space inline_textile_element | + # char inline_textile_element | + # quicktxt inline_textile_element_not_opening_markup | + # block_textile_element; + # + + defp inline_textile_element_not_opening_markup(_parser, [{:literal, lit} | r_tokens]) do + {:ok, [{:markup, escape(lit)}], r_tokens} + end + defp inline_textile_element_not_opening_markup(_parser, [{:newline, _} | r_tokens]) do + {:ok, [{:markup, "
"}], r_tokens} + end + defp inline_textile_element_not_opening_markup(_parser, [{:space, _} | r_tokens]) do + {:ok, [{:text, " "}], r_tokens} + end + defp inline_textile_element_not_opening_markup(_parser, [{:char, lit} | r_tokens]) do + {binary, r2_tokens} = assemble_binary(:char, <>, r_tokens) + + {:ok, [{:text, escape(binary)}], r2_tokens} + end + defp inline_textile_element_not_opening_markup(parser, [{:quicktxt, lit} | r_tokens]) do + {binary, r2_tokens} = assemble_binary(:quicktxt, <>, r_tokens) + + case inline_textile_element_not_opening_markup(parser, r2_tokens) do + {:ok, tree, r3_tokens} -> + {:ok, [{:text, escape(binary)}, tree], r3_tokens} + + _ -> + {:ok, [{:text, escape(binary)}], r2_tokens} + end + end + defp inline_textile_element_not_opening_markup(parser, tokens) do + block_textile_element(parser, tokens) + end + + # + # bq_cite_text = literal | char | quicktxt; + # + + # Note that text is not escaped here because it will be escaped + # when the tree is flattened + defp bq_cite_text(_parser, [{:literal, lit} | r_tokens]) do + {:ok, [{:text, lit}], r_tokens} + end + defp bq_cite_text(_parser, [{:char, lit} | r_tokens]) do + {:ok, [{:text, <>}], r_tokens} + end + defp bq_cite_text(_parser, [{:quicktxt, lit} | r_tokens]) do + {:ok, [{:text, <>}], r_tokens} + end + defp bq_cite_text(_parser, _tokens) do + {:error, "Expected cite tokens"} + end + + # + # block_textile_element = + # double_newline | + # opening_block_tag inline_textile_element* closing_block_tag; + # + + defp block_textile_element(_parser, [{:double_newline, _} | r_tokens]) do + {:ok, [{:markup, "

"}], r_tokens} + end + defp block_textile_element(parser, [{:bq_cite_start, start} | r_tokens]) do + case repeat(&bq_cite_text/2, parser, r_tokens) do + {:ok, tree, [{:bq_cite_open, _} | r2_tokens]} -> + case repeat(&inline_textile_element/2, parser, r2_tokens) do {:ok, tree2, [{:bq_close, _} | r3_tokens]} -> cite = escape(flatten(tree)) {:ok, [{:markup, "
"}, tree2, {:markup, "
"}], r3_tokens} {:ok, tree2, r3_tokens} -> - {:ok, [{:markup, escape(start)}, tree, {:markup, escape(open)}, tree2], r3_tokens} + {:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, tree2], r3_tokens} + + _ -> + {:ok, [{:text, escape(start)}], r_tokens} end - {:ok, tree, r2_tokens} -> - {:ok, [{:markup, escape(start)}, tree], r2_tokens} - end - end - defp blockquote_cite(_parser, _tokens, _state), do: {:error, "Expected a blockquote with cite"} - - defp blockquote(parser, tokens, state), do: simple_bracketed_attr(:bq_open, :bq_close, "
", "
", &block_markup/3, parser, tokens, state) - defp spoiler(parser, tokens, state), do: simple_bracketed_attr(:spoiler_open, :spoiler_close, "", "", &block_markup/3, parser, tokens, state) - - defp bracketed_link(parser, [{:bracketed_link_open, blopen} | r_tokens], state) do - case block_markup(parser, r_tokens, state) do - {:ok, tree, [{:bracketed_link_url, link_url} | r2_tokens]} -> - href = escape(unwrap_bracketed(link_url)) - - {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} - - {:ok, tree, r2_tokens} -> - {:ok, [{:markup, escape(blopen)}, tree], r2_tokens} - end - end - defp bracketed_link(_parser, _tokens, _state), do: {:error, "Expected a bracketed link"} - - defp bracketed_image_with_link(parser, [{:bracketed_image, img}, {:unbracketed_url, link_url} | r_tokens], _state) do - href = escape(unwrap_bracketed(link_url)) - src = escape(parser.image_transform.(img)) - - {:ok, [{:markup, ""}], r_tokens} - end - defp bracketed_image_with_link(_parser, _tokens, _state), do: {:error, "Expected an bracketed image with link"} - - defp bracketed_image(parser, [{:bracketed_image, img} | r_tokens], _state) do - src = escape(parser.image_transform.(img)) - - {:ok, [{:markup, ""}], r_tokens} - end - defp bracketed_image(_parser, _tokens, _state), do: {:error, "Expected a bracketed image"} - - defp bracketed_bold(parser, tokens, state), do: simple_bracketed_attr(:bracketed_b_open, :bracketed_b_close, "", "", &block_markup/3, parser, tokens, state) - defp bracketed_italic(parser, tokens, state), do: simple_bracketed_attr(:bracketed_i_open, :bracketed_i_close, "", "", &block_markup/3, parser, tokens, state) - defp bracketed_strong(parser, tokens, state), do: simple_bracketed_attr(:bracketed_strong_open, :bracketed_strong_close, "", "", &block_markup/3, parser, tokens, state) - defp bracketed_em(parser, tokens, state), do: simple_bracketed_attr(:bracketed_em_open, :bracketed_em_close, "", "", &block_markup/3, parser, tokens, state) - defp bracketed_code(parser, tokens, state), do: simple_bracketed_attr(:bracketed_code_open, :bracketed_code_close, "", "", &block_markup/3, parser, tokens, state) - defp bracketed_ins(parser, tokens, state), do: simple_bracketed_attr(:bracketed_ins_open, :bracketed_ins_close, "", "", &block_markup/3, parser, tokens, state) - defp bracketed_sup(parser, tokens, state), do: simple_bracketed_attr(:bracketed_sup_open, :bracketed_sup_close, "", "", &block_markup/3, parser, tokens, state) - defp bracketed_del(parser, tokens, state), do: simple_bracketed_attr(:bracketed_del_open, :bracketed_del_close, "", "", &block_markup/3, parser, tokens, state) - defp bracketed_sub(parser, tokens, state), do: simple_bracketed_attr(:bracketed_sub_open, :bracketed_sub_close, "", "", &block_markup/3, parser, tokens, state) - - defp unbracketed_image_with_link(parser, [{:unbracketed_image, img}, {:unbracketed_image_url, link_url} | r_tokens], _state) do - href = escape(unwrap_unbracketed(link_url)) - src = escape(parser.image_transform.(img)) - - {:ok, [{:markup, ""}], r_tokens} - end - defp unbracketed_image_with_link(_parser, _tokens, _state), - do: {:error, "Expected an unbracketed image with link"} - - defp unbracketed_image(parser, [{:unbracketed_image, img} | r_tokens], _state) do - src = escape(parser.image_transform.(img)) - - {:ok, [{:markup, ""}], r_tokens} - end - defp unbracketed_image(_parser, _tokens, _state), - do: {:error, "Expected an unbracketed image"} - - defp link(parser, [{:unbracketed_link_delim, delim} | r_tokens], state) do - case state do - %{link: _value} -> - # Done, error out - {:error, "End of rule"} - _ -> - case block_markup(parser, r_tokens, Map.put(state, :link, true)) do - {:ok, tree, [{:unbracketed_link_url, url} | r2_tokens]} -> - href = escape(unwrap_unbracketed(url)) - - {:ok, [{:markup, ""}, tree, {:markup, ""}], r2_tokens} - - {:ok, tree, r2_tokens} -> - {:ok, [{:unbracketed_link_delim, delim}, tree], r2_tokens} - end + {:ok, [{:text, escape(start)}], r_tokens} end end - defp link(_parser, _tokens, _state), do: {:error, "Expected a link"} + defp block_textile_element(parser, tokens) do + [ + {:bq_open, :bq_close, "
", "
"}, + {:spoiler_open, :spoiler_close, "", ""}, + {:bracketed_b_open, :bracketed_b_close, "", ""}, + {:bracketed_i_open, :bracketed_i_close, "", ""}, + {:bracketed_strong_open, :bracketed_strong_close, "", ""}, + {:bracketed_em_open, :bracketed_em_close, "", ""}, + {:bracketed_code_open, :bracketed_code_close, "", ""}, + {:bracketed_ins_open, :bracketed_ins_close, "", ""}, + {:bracketed_sup_open, :bracketed_sup_close, "", ""}, + {:bracketed_del_open, :bracketed_del_close, "", ""}, + {:bracketed_sub_open, :bracketed_sub_close, "", ""} + ] + |> Enum.find_value(fn {open_token, close_token, open_tag, close_tag} -> + simple_recursive( + open_token, + close_token, + open_tag, + close_tag, + &inline_textile_element/2, + parser, + tokens + ) + |> case do + {:ok, tree, r_tokens} -> + {:ok, tree, r_tokens} - defp bold(parser, tokens, state), do: simple_unbracketed_attr(:bold, :unbracketed_b_delim, "", "", &inline_markup/3, parser, tokens, state) - defp italic(parser, tokens, state), do: simple_unbracketed_attr(:italic, :unbracketed_i_delim, "", "", &inline_markup/3, parser, tokens, state) - defp strong(parser, tokens, state), do: simple_unbracketed_attr(:strong, :unbracketed_strong_delim, "", "", &inline_markup/3, parser, tokens, state) - defp em(parser, tokens, state), do: simple_unbracketed_attr(:em, :unbracketed_em_delim, "", "", &inline_markup/3, parser, tokens, state) - defp code(parser, tokens, state), do: simple_unbracketed_attr(:code, :unbracketed_code_delim, "", "", &inline_markup/3, parser, tokens, state) - defp ins(parser, tokens, state), do: simple_unbracketed_attr(:ins, :unbracketed_ins_delim, "", "", &inline_markup/3, parser, tokens, state) - defp sup(parser, tokens, state), do: simple_unbracketed_attr(:sup, :unbracketed_sup_delim, "", "", &inline_markup/3, parser, tokens, state) - defp del(parser, tokens, state), do: simple_unbracketed_attr(:del, :unbracketed_del_delim, "", "", &inline_markup/3, parser, tokens, state) - defp sub(parser, tokens, state), do: simple_unbracketed_attr(:sub, :unbracketed_sub_delim, "", "", &inline_markup/3, parser, tokens, state) - - defp newline(_parser, [{:newline, _tok} | r_tokens], _state), do: {:ok, [{:markup, "
"}], r_tokens} - defp newline(_parser, _tokens, _state), do: {:error, "Expected a newline"} - - defp space(_parser, [{:space, _} | r_tokens], _state), do: {:ok, [{:text, " "}], r_tokens} - defp space(_parser, _tokens, _state), do: {:error, "Expected whitespace"} - - # Various substitutions - defp char(_parser, '->' ++ r_tokens, _state), do: {:ok, [{:markup, "→"}], r_tokens} - defp char(_parser, '--' ++ r_tokens, _state), do: {:ok, [{:markup, "—"}], r_tokens} - defp char(_parser, '...' ++ r_tokens, _state), do: {:ok, [{:markup, "…"}], r_tokens} - defp char(_parser, '(tm)' ++ r_tokens, _state), do: {:ok, [{:markup, "&tm;"}], r_tokens} - defp char(_parser, '(c)' ++ r_tokens, _state), do: {:ok, [{:markup, "©"}], r_tokens} - defp char(_parser, '(r)' ++ r_tokens, _state), do: {:ok, [{:markup, "®"}], r_tokens} - defp char(_parser, '\'' ++ r_tokens, _state), do: {:ok, [{:markup, "’"}], r_tokens} - defp char(_parser, [tok | r_tokens], _state) when is_integer(tok) do - {rest, r2_tokens} = extract_string(r_tokens, "") - - {:ok, [{:text, escape(<> <> rest)}], r2_tokens} + _ -> + nil + end + end) + |> Kernel.||({:error, "Expected block markup"}) end - defp char(_parser, _tokens, _state), do: {:error, "Expected a char"} - defp extract_string([top | r_tokens], acc) when is_integer(top), do: extract_string(r_tokens, acc <> <>) - defp extract_string(tokens, acc), do: {acc, tokens} + # + # textile = + # (inline_textile_element TOKEN)* eos; + # - defp unwrap_unbracketed(<<"\":", rest::binary>>), do: rest - defp unwrap_unbracketed(<<"!:", rest::binary>>), do: rest - defp unwrap_bracketed(<<"\":", rest::binary>>), do: rest - defp unwrap_bracketed(<<":", rest::binary>>), do: binary_part(rest, 0, byte_size(rest) - 1) - - defp simple_bracketed_attr(open_token, close_token, open_attr, close_attr, callback, parser, [{open_token, token_str} | r_tokens], state) do - case callback.(parser, r_tokens, state) do - {:ok, tree, [{^close_token, _} | r2_tokens]} -> - {:ok, [{:markup, open_attr}, tree, {:markup, close_attr}], r2_tokens} - - {:ok, tree, r2_tokens} -> - {:ok, [{:text, token_str}, tree], r2_tokens} - end - end - defp simple_bracketed_attr(_open_token, _close_token, _open_attr, _close_attr, _callback, _parser, _tokens, _state), - do: {:error, "Expected a simple bracketed attribute"} - - defp simple_unbracketed_attr(this_state, delim_token, open_attr, close_attr, callback, parser, [{delim_token, token_str} | r_tokens], state) do - case state do - %{^this_state => _value} -> - # Exit state: No other rule will match so we can just error out here - {:error, "End of rule"} + defp textile(parser, tokens) do + case inline_textile_element(parser, tokens) do + {:ok, tree, r_tokens} -> + {:ok, tree, r_tokens} _ -> - # Enter state - case callback.(parser, r_tokens, Map.put(state, this_state, true)) do - {:ok, tree, [{^delim_token, _} | r2_tokens]} -> - {:ok, [{:markup, open_attr}, tree, {:markup, close_attr}], r2_tokens} + case tokens do + [{_, string} | r_tokens] -> + {:ok, [{:text, escape(string)}], r_tokens} - {:ok, tree, r2_tokens} -> - {:ok, [{:text, token_str}, tree], r2_tokens} + _ -> + {:error, "Expected textile"} end end end - defp simple_unbracketed_attr(_this_state, _delim_token, _open_attr, _close_attr, _callback, _parser, _tokens, _state), - do: {:error, "Expected a simple unbracketed attribute"} - - def flatten(tree) do - tree - |> List.flatten() - |> Enum.map(fn {_k, v} -> v end) - |> Enum.join() - end - - defp escape(text), do: HTML.html_escape(text) |> HTML.safe_to_string() - defp partial_flatten(tree) do - List.flatten(tree) - end end