diff --git a/lib/textile/parser.ex b/lib/textile/parser.ex index a157b2bd..807c42ef 100644 --- a/lib/textile/parser.ex +++ b/lib/textile/parser.ex @@ -1,3 +1,322 @@ defmodule Textile.Parser do - alias Textile.Lexer + import Textile.ParserHelpers + import Phoenix.HTML + require IEx + + alias Textile.{ + Lexer, + Parser, + TokenCoalescer + } + + defstruct [ + reductions: [], + image_transform: nil + ] + + def parse(%Parser{} = parser, input) do + with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input), + tokens <- TokenCoalescer.coalesce(tokens), + {:ok, tree, []} <- textile_top(parser, tokens) + do + tree + else + err -> + err + end + end + + + # + # Backtracking LL parser for simplified Textile grammar + # + + + # + # textile = (well_formed_including_paragraphs | TOKEN)*; + # + defp textile_top(_parser, []), do: {:ok, [], []} + defp textile_top(parser, tokens) do + with {:ok, tree, r_tokens} <- well_formed_including_paragraphs(parser, tokens), + {:ok, next_tree, r2_tokens} <- textile_top(parser, r_tokens) + do + {:ok, tree ++ next_tree, r2_tokens} + else + _ -> + [{_token, string} | r_tokens] = tokens + {:ok, next_tree, r2_tokens} = textile_top(parser, r_tokens) + + {:ok, [text: escape_nl2br(string)] ++ next_tree, r2_tokens} + end + end + + + # + # well_formed_including_paragraphs = (markup | double_newline)*; + # + defp well_formed_including_paragraphs(_parser, []), do: {:ok, [], []} + defp well_formed_including_paragraphs(parser, [{:double_newline, _nl} | r_tokens]) do + with {:ok, tree, r2_tokens} <- well_formed_including_paragraphs(parser, r_tokens) do + {:ok, [markup: "

"] ++ tree, r2_tokens} + else + _ -> + {:ok, [], r_tokens} + end + end + + defp well_formed_including_paragraphs(parser, tokens) do + with {:ok, tree, r_tokens} <- markup(parser, tokens), + {:ok, next_tree, r2_tokens} <- well_formed_including_paragraphs(parser, r_tokens) + do + {:ok, tree ++ next_tree, r2_tokens} + else + _ -> + {:ok, [], tokens} + end + end + + # + # well_formed = (markup)*; + # + defp well_formed(parser, tokens) do + case markup(parser, tokens) do + {:ok, tree, r_tokens} -> + {:ok, next_tree, r2_tokens} = well_formed(parser, r_tokens) + {:ok, tree ++ next_tree, r2_tokens} + + _ -> + {:ok, [], tokens} + end + end + + + # + # markup = + # blockquote | spoiler | link | image | bold | italic | strong | emphasis | + # code | inserted | superscript | deleted | subscript | newline | literal | + # bracketed_literal | text; + # + defp markup(parser, tokens) do + IEx.pry + + markups = [ + &blockquote/2, &spoiler/2, &link/2, &image/2, &bold/2, &italic/2, &strong/2, + &emphasis/2, &code/2, &inserted/2, &superscript/2, &deleted/2, &subscript/2, + &newline/2, &literal/2, &bracketed_literal/2, &text/2 + ] + + value = + markups + |> Enum.find_value(fn func -> + case func.(parser, tokens) do + {:ok, tree, r_tokens} -> + {:ok, tree, r_tokens} + + _ -> + nil + end + end) + + value || {:error, "Expected markup"} + end + + + # + # blockquote = + # blockquote_open_cite well_formed_including_paragraphs blockquote_close | + # blockquote_open well_formed_including_paragraphs blockquote_close; + # + defp blockquote(parser, [{:blockquote_open_cite, author} | r_tokens]) do + with {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do + {:ok, [markup: ~s|
|] ++ tree ++ [markup: ~s|
|], r2_tokens} + else + _ -> + {:ok, [text: ~s|[bq="#{author}"]|], r_tokens} + end + end + + defp blockquote(parser, [{:blockquote_open, open} | r_tokens]) do + with {:ok, tree, [{:blockquote_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do + {:ok, [markup: ~s|
|] ++ tree ++ [markup: ~s|
|], r2_tokens} + else + _ -> + {:ok, [text: escape_html(open)], r_tokens} + end + end + + defp blockquote(_parser, _tokens), + do: {:error, "Expected a blockquote tag with optional citation"} + + + # + # spoiler = + # spoiler_open well_formed_including_paragraphs spoiler_close; + # + defp spoiler(parser, [{:spoiler_open, open} | r_tokens]) do + with {:ok, tree, [{:spoiler_close, _close} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do + {:ok, [markup: ~s||] ++ tree ++ [markup: ~s||], r2_tokens} + else + _ -> + {:ok, [text: escape_html(open)], r_tokens} + end + end + + defp spoiler(_parser, _tokens), + do: {:error, "Expected a spoiler tag"} + + + # + # link = + # link_start well_formed_including_paragraphs link_end link_url; + # + defp link(parser, [{:link_start, start} | r_tokens]) do + with {:ok, tree, [{:link_end, _end}, {:link_url, url} | r2_tokens]} <- well_formed_including_paragraphs(parser, r_tokens) do + {:ok, [markup: ~s||] ++ tree ++ [markup: ~s||], r2_tokens} + else + _ -> + {:ok, [text: escape_html(start)], r_tokens} + end + end + + defp link(_parser, _tokens), + do: {:error, "Expected a link"} + + + # + # image = + # image_url image_title? image_link_url?; + # + defp image(parser, [{:image_url, image_url}, {:image_title, title}, {:image_link_url, link_url} | r_tokens]) do + image_url = parser.image_transform.(image_url) + + {:ok, [markup: ~s||], r_tokens} + end + + defp image(parser, [{:image_url, image_url}, {:image_title, title} | r_tokens]) do + image_url = parser.image_transform.(image_url) + + {:ok, [markup: ~s||], r_tokens} + end + + defp image(parser, [{:image_url, image_url}, {:image_link_url, link_url} | r_tokens]) do + image_url = parser.image_transform.(image_url) + + {:ok, [markup: ~s||], r_tokens} + end + + defp image(parser, [{:image_url, image_url} | r_tokens]) do + image_url = parser.image_transform.(image_url) + + {:ok, [markup: ~s||], r_tokens} + end + + defp image(_parser, _tokens), + do: {:error, "Expected an image tag"} + + # + # bold = + # b_open well_formed b_close | + # b_b_open well_formed b_b_close; + # + attribute_parser(:bold, :b_open, :b_close, "", "") + + # + # italic = + # i_open well_formed i_close | + # b_i_open well_formed b_i_close; + # + attribute_parser(:italic, :i_open, :i_close, "", "") + + # + # strong = + # strong_open well_formed strong_close | + # b_strong_open well_formed b_strong_close; + # + attribute_parser(:strong, :strong_open, :strong_close, "", "") + + # + # emphasis = + # em_open well_formed em_close | + # b_em_open well_formed b_em_close; + # + attribute_parser(:emphasis, :em_open, :em_close, "", "") + + # + # code = + # code_open well_formed code_close | + # b_code_open well_formed b_code_close; + # + attribute_parser(:code, :code_open, :code_close, "", "") + + # + # inserted = + # ins_open well_formed ins_close | + # b_ins_open well_formed b_ins_close; + # + attribute_parser(:inserted, :ins_open, :ins_close, "", "") + + # + # superscript = + # sup_open well_formed sup_close | + # b_sup_open well_formed b_sup_close; + # + attribute_parser(:superscript, :sup_open, :sup_close, "", "") + + # + # deleted = + # del_open well_formed del_close | + # b_del_open well_formed b_del_close; + # + attribute_parser(:deleted, :del_open, :del_close, "", "") + + # + # subscript = + # sub_open well_formed sub_close | + # b_sub_open well_formed b_sub_close; + # + attribute_parser(:subscript, :sub_open, :sub_close, "", "") + + + # + # Terminals + # + + defp literal(_parser, [{:literal, text} | r_tokens]), + do: {:ok, [markup: escape_nl2br(text)], r_tokens} + + defp literal(_parser, _tokens), + do: {:error, "Expected a literal"} + + + defp newline(_parser, [{:newline, _nl} | r_tokens]), + do: {:ok, [markup: "
"], r_tokens} + + defp newline(_parser, _tokens), + do: {:error, "Expected a line break"} + + + defp bracketed_literal(_parser, [{:bracketed_literal, text} | r_tokens]), + do: {:ok, [markup: escape_nl2br(text)], r_tokens} + + defp bracketed_literal(_parser, _tokens), + do: {:error, "Expected a bracketed literal"} + + + defp text(_parser, [{:text, text} | r_tokens]), + do: {:ok, [text: escape_nl2br(text)], r_tokens} + + defp text(_parser, _tokens), + do: {:error, "Expected text"} + + + defp escape_nl2br(text) do + text + |> String.split("\n", trim: true) + |> Enum.map(&escape_html(&1)) + |> Enum.join("
") + end + + defp escape_html(text) do + html_escape(text) |> safe_to_string() + end end \ No newline at end of file diff --git a/lib/textile/parser_helpers.ex b/lib/textile/parser_helpers.ex new file mode 100644 index 00000000..13a95d97 --- /dev/null +++ b/lib/textile/parser_helpers.ex @@ -0,0 +1,26 @@ +defmodule Textile.ParserHelpers do + defmacro attribute_parser(name, open_token, close_token, open_tag, close_tag) do + quote do + defp unquote(name)(parser, [{unquote(open_token), open} | r_tokens]) do + with {:ok, tree, [{unquote(close_token), _close} | r2_tokens]} <- well_formed(parser, r_tokens) do + {:ok, [markup: unquote(open_tag)] ++ tree ++ [markup: unquote(close_tag)], r2_tokens} + else + _ -> + {:ok, [text: escape_html(open)], r_tokens} + end + end + + defp unquote(name)(parser, [{unquote(:"b_#{open_token}"), open} | r_tokens]) do + with {:ok, tree, [{unquote(:"b_#{close_token}"), _close} | r2_tokens]} <- well_formed(parser, r_tokens) do + {:ok, [markup: unquote(open_tag)] ++ tree ++ [markup: unquote(close_tag)], r2_tokens} + else + _ -> + {:ok, [text: escape_html(open)], r_tokens} + end + end + + defp unquote(name)(_parser, _tokens), + do: {:error, "Expected #{unquote(name)} tag"} + end + end +end \ No newline at end of file diff --git a/lib/textile/token_coalescer.ex b/lib/textile/token_coalescer.ex index 8ef47e59..526545b1 100644 --- a/lib/textile/token_coalescer.ex +++ b/lib/textile/token_coalescer.ex @@ -10,6 +10,6 @@ defmodule Textile.TokenCoalescer do t -> t - ) + end) end end \ No newline at end of file