discard old parser

This commit is contained in:
byte[] 2019-12-31 14:56:38 -05:00
parent 6ccb3f645a
commit 4c82c90cea
13 changed files with 480 additions and 1394 deletions

View file

@ -1,238 +0,0 @@
defmodule FastTextile.Lexer do
import NimbleParsec
space =
utf8_char('\f \r\t\u00a0\u1680\u180e\u202f\u205f\u3000' ++ Enum.to_list(0x2000..0x200a))
extended_space =
choice([
space,
string("\n"),
eos()
])
space_token =
space
|> unwrap_and_tag(:space)
double_newline =
string("\n")
|> repeat(space)
|> string("\n")
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:double_newline)
newline =
string("\n")
|> unwrap_and_tag(:newline)
link_ending_characters =
utf8_char('@#$%&(),.:;<=?\\`|\'')
bracket_link_ending_characters =
utf8_char('" []')
end_of_link =
choice([
concat(link_ending_characters, extended_space),
extended_space
])
bracketed_literal =
ignore(string("[=="))
|> repeat(lookahead_not(string("==]")) |> utf8_char([]))
|> ignore(string("==]"))
unbracketed_literal =
ignore(string("=="))
|> repeat(lookahead_not(string("==")) |> utf8_char([]))
|> ignore(string("=="))
literal =
choice([
bracketed_literal,
unbracketed_literal
])
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:literal)
bq_cite_start =
string("[bq=\"")
|> unwrap_and_tag(:bq_cite_start)
bq_cite_open =
string("\"]")
|> unwrap_and_tag(:bq_cite_open)
bq_open =
string("[bq]")
|> unwrap_and_tag(:bq_open)
bq_close =
string("[/bq]")
|> unwrap_and_tag(:bq_close)
spoiler_open =
string("[spoiler]")
|> unwrap_and_tag(:spoiler_open)
spoiler_close =
string("[/spoiler]")
|> unwrap_and_tag(:spoiler_close)
image_url_scheme =
choice([
string("//"),
string("/"),
string("https://"),
string("http://")
])
link_url_scheme =
choice([
string("#"),
image_url_scheme
])
unbracketed_url =
string(":")
|> concat(link_url_scheme)
|> repeat(lookahead_not(end_of_link) |> utf8_char([]))
unbracketed_image_url =
unbracketed_url
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:unbracketed_image_url)
unbracketed_link_url =
string("\"")
|> concat(unbracketed_url)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:unbracketed_link_url)
unbracketed_image =
ignore(string("!"))
|> concat(image_url_scheme)
|> repeat(utf8_char(not: ?!))
|> ignore(string("!"))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:unbracketed_image)
|> concat(optional(unbracketed_image_url))
bracketed_image =
ignore(string("[!"))
|> concat(image_url_scheme)
|> repeat(lookahead_not(string("!]")) |> utf8_char([]))
|> ignore(string("!]"))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:bracketed_image)
|> concat(optional(unbracketed_image_url))
link_delim =
string("\"")
|> unwrap_and_tag(:link_delim)
bracketed_link_open =
string("[\"")
|> unwrap_and_tag(:bracketed_link_open)
bracketed_link_url =
string("\":")
|> concat(link_url_scheme)
|> repeat(lookahead_not(bracket_link_ending_characters) |> utf8_char([]))
|> ignore(string("]"))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:bracketed_link_url)
bracketed_b_open = string("[**") |> unwrap_and_tag(:bracketed_b_open)
bracketed_i_open = string("[__") |> unwrap_and_tag(:bracketed_i_open)
bracketed_strong_open = string("[*") |> unwrap_and_tag(:bracketed_strong_open)
bracketed_em_open = string("[_") |> unwrap_and_tag(:bracketed_em_open)
bracketed_code_open = string("[@") |> unwrap_and_tag(:bracketed_code_open)
bracketed_ins_open = string("[+") |> unwrap_and_tag(:bracketed_ins_open)
bracketed_sup_open = string("[^") |> unwrap_and_tag(:bracketed_sup_open)
bracketed_del_open = string("[-") |> unwrap_and_tag(:bracketed_del_open)
bracketed_sub_open = string("[~") |> unwrap_and_tag(:bracketed_sub_open)
bracketed_b_close = string("**]") |> unwrap_and_tag(:bracketed_b_close)
bracketed_i_close = string("__]") |> unwrap_and_tag(:bracketed_i_close)
bracketed_strong_close = string("*]") |> unwrap_and_tag(:bracketed_strong_close)
bracketed_em_close = string("_]") |> unwrap_and_tag(:bracketed_em_close)
bracketed_code_close = string("@]") |> unwrap_and_tag(:bracketed_code_close)
bracketed_ins_close = string("+]") |> unwrap_and_tag(:bracketed_ins_close)
bracketed_sup_close = string("^]") |> unwrap_and_tag(:bracketed_sup_close)
bracketed_del_close = string("-]") |> unwrap_and_tag(:bracketed_del_close)
bracketed_sub_close = string("~]") |> unwrap_and_tag(:bracketed_sub_close)
b_delim = string("**") |> unwrap_and_tag(:b_delim)
i_delim = string("__") |> unwrap_and_tag(:i_delim)
strong_delim = string("*") |> unwrap_and_tag(:strong_delim)
em_delim = string("_") |> unwrap_and_tag(:em_delim)
code_delim = string("@") |> unwrap_and_tag(:code_delim)
ins_delim = string("+") |> unwrap_and_tag(:ins_delim)
sup_delim = string("^") |> unwrap_and_tag(:sup_delim)
sub_delim = string("~") |> unwrap_and_tag(:sub_delim)
del_delim = lookahead_not(string("-"), string(">")) |> unwrap_and_tag(:del_delim)
quicktxt =
utf8_char('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*@_{}')
|> unwrap_and_tag(:quicktxt)
char =
utf8_char([])
|> unwrap_and_tag(:char)
textile =
choice([
literal,
double_newline,
newline,
space_token,
bq_cite_start,
bq_cite_open,
bq_open,
bq_close,
spoiler_open,
spoiler_close,
unbracketed_image,
bracketed_image,
bracketed_link_open,
bracketed_link_url,
unbracketed_link_url,
link_delim,
bracketed_b_open,
bracketed_i_open,
bracketed_strong_open,
bracketed_em_open,
bracketed_code_open,
bracketed_ins_open,
bracketed_sup_open,
bracketed_del_open,
bracketed_sub_open,
bracketed_b_close,
bracketed_i_close,
bracketed_strong_close,
bracketed_em_close,
bracketed_code_close,
bracketed_ins_close,
bracketed_sup_close,
bracketed_del_close,
bracketed_sub_close,
b_delim,
i_delim,
strong_delim,
em_delim,
code_delim,
ins_delim,
sup_delim,
del_delim,
sub_delim,
quicktxt,
char
])
|> repeat()
|> eos()
defparsec :lex, textile
end

View file

@ -1,371 +0,0 @@
defmodule FastTextile.Parser do
alias FastTextile.Lexer
alias Phoenix.HTML
def parse(parser, input) do
parser = Map.put(parser, :state, %{})
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(String.trim(input)),
{:ok, tree, []} <- repeat(&textile/2, parser, tokens)
do
partial_flatten(tree)
else
_ ->
[]
end
end
# Helper to turn a parse tree into a string
def flatten(tree) do
tree
|> List.flatten()
|> Enum.map_join("", fn {_k, v} -> v end)
end
# Helper to escape HTML
defp escape(text) do
text
|> HTML.html_escape()
|> HTML.safe_to_string()
end
# Helper to turn a parse tree into a list
def partial_flatten(tree) do
tree
|> List.flatten()
|> Enum.chunk_by(fn {k, _v} -> k end)
|> Enum.map(fn list ->
[{type, _v} | _rest] = list
value = Enum.map_join(list, "", fn {_k, v} -> v end)
{type, value}
end)
end
defp put_state(parser, new_state) do
state = Map.put(parser.state, new_state, true)
Map.put(parser, :state, state)
end
# Helper corresponding to Kleene star (*) operator
# Match a specificed rule zero or more times
defp repeat(rule, parser, tokens) do
case rule.(parser, tokens) do
{:ok, tree, r_tokens} ->
{:ok, tree2, r2_tokens} = repeat(rule, parser, r_tokens)
{:ok, [tree, tree2], r2_tokens}
_ ->
{:ok, [], tokens}
end
end
# Helper to match a simple recursive grammar rule of the following form:
#
# open_token callback* close_token
#
defp simple_recursive(open_token, close_token, open_tag, close_tag, callback, parser, [{open_token, open} | r_tokens]) do
case repeat(callback, parser, r_tokens) do
{:ok, tree, [{^close_token, _} | r2_tokens]} ->
{:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape(open)}, tree], r2_tokens}
end
end
defp simple_recursive(_open_token, _close_token, _open_tag, _close_tag, _callback, _parser, _tokens) do
{:error, "Expected a simple recursive rule"}
end
# Helper to match a simple recursive grammar rule with negative lookahead:
#
# open_token callback* close_token (?!lookahead_not)
#
defp simple_lookahead_not(open_token, close_token, open_tag, close_tag, lookahead_not, callback, state, parser, [{open_token, open} | r_tokens]) do
case parser.state do
%{^state => _} ->
{:error, "End of rule"}
_ ->
case r_tokens do
[{forbidden_lookahead, _la} | _] when forbidden_lookahead in [:space, :newline] ->
{:ok, [{:text, escape(open)}], r_tokens}
_ ->
case repeat(callback, put_state(parser, state), r_tokens) do
{:ok, tree, [{^close_token, close}, {^lookahead_not, ln} | r2_tokens]} ->
{:ok, [{:text, escape(open)}, tree, {:text, escape(close)}], [{lookahead_not, ln} | r2_tokens]}
{:ok, tree, [{^close_token, _} | r2_tokens]} ->
{:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape(open)}, tree], r2_tokens}
end
end
end
end
defp simple_lookahead_not(_open_token, _close_token, _open_tag, _close_tag, _lookahead_not, _callback, _state, _parser, _tokens) do
{:error, "Expected a simple lookahead not rule"}
end
# Helper to efficiently assemble a UTF-8 binary from tokens of the
# given type
defp assemble_binary(token_type, accumulator, [{token_type, t} | stream]) do
assemble_binary(token_type, accumulator <> <<t::utf8>>, stream)
end
defp assemble_binary(_token_type, accumulator, tokens), do: {accumulator, tokens}
#
# inline_textile_element =
# opening_markup inline_textile_element* closing_markup (?!quicktxt) |
# closing_markup (?=quicktxt) |
# link_delim block_textile_element* link_url |
# image url? |
# code_delim inline_textile_element* code_delim |
# inline_textile_element_not_opening_markup;
#
defp inline_textile_element(parser, tokens) do
[
{:b_delim, :b, "<b>", "</b>"},
{:i_delim, :i, "<i>", "</i>"},
{:strong_delim, :strong, "<strong>", "</strong>"},
{:em_delim, :em, "<em>", "</em>"},
{:ins_delim, :ins, "<ins>", "</ins>"},
{:sup_delim, :sup, "<sup>", "</sup>"},
{:del_delim, :del, "<del>", "</del>"},
{:sub_delim, :sub, "<sub>", "</sub>"}
]
|> Enum.find_value(fn {delim_token, state, open_tag, close_tag} ->
simple_lookahead_not(
delim_token,
delim_token,
open_tag,
close_tag,
:quicktxt,
&inline_textile_element/2,
state,
parser,
tokens
)
|> case do
{:ok, tree, r_tokens} ->
{:ok, tree, r_tokens}
_ ->
nil
end
end)
|> case do
nil -> inner_inline_textile_element(parser, tokens)
value -> value
end
end
defp inner_inline_textile_element(parser, [{token, t}, {:quicktxt, q} | r_tokens])
when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim]
do
case inline_textile_element(parser, [{:quicktxt, q} | r_tokens]) do
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape(t)}, tree], r2_tokens}
_ ->
{:ok, [{:text, escape(t)}], [{:quicktxt, q} | r_tokens]}
end
end
defp inner_inline_textile_element(parser, [{:link_delim, open} | r_tokens]) do
case repeat(&block_textile_element/2, parser, r_tokens) do
{:ok, tree, [{:unbracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} ->
href = escape(url)
{:ok, [{:markup, "<a href=\""}, {:markup, href}, {:markup, "\">"}, tree, {:markup, "</a>"}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape(open)}, tree], r2_tokens}
end
end
defp inner_inline_textile_element(parser, [{:bracketed_link_open, open} | r_tokens]) do
case repeat(&inline_textile_element/2, parser, r_tokens) do
{:ok, tree, [{:bracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} ->
href = escape(url)
{:ok, [{:markup, "<a href=\""}, {:markup, href}, {:markup, "\">"}, tree, {:markup, "</a>"}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape(open)}, tree], r2_tokens}
end
end
defp inner_inline_textile_element(parser, [{token, img}, {:unbracketed_image_url, <<":", url::binary>>} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do
img = parser.image_transform.(img)
{:ok, [{:markup, "<a href=\""}, {:markup, escape(url)}, {:markup, "\"><span class=\"imgspoiler\"><img src=\""}, {:markup, escape(img)}, {:markup, "\"/></span></a>"}], r_tokens}
end
defp inner_inline_textile_element(parser, [{token, img} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do
img = parser.image_transform.(img)
{:ok, [{:markup, "<span class=\"imgspoiler\"><img src=\""}, {:markup, escape(img)}, {:markup, "\"/></span>"}], r_tokens}
end
defp inner_inline_textile_element(parser, [{:code_delim, open} | r_tokens]) do
case parser.state do
%{code: _} ->
{:error, "End of rule"}
_ ->
case repeat(&inline_textile_element/2, put_state(parser, :code), r_tokens) do
{:ok, tree, [{:code_delim, _} | r2_tokens]} ->
{:ok, [{:markup, "<code>"}, tree, {:markup, "</code>"}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape(open)}, tree], r2_tokens}
end
end
end
defp inner_inline_textile_element(parser, tokens) do
inline_textile_element_not_opening_markup(parser, tokens)
end
#
# bq_cite_text = literal | char | space | quicktxt;
#
# Note that text is not escaped here because it will be escaped
# when the tree is flattened
defp bq_cite_text(_parser, [{:literal, lit} | r_tokens]) do
{:ok, [{:text, lit}], r_tokens}
end
defp bq_cite_text(_parser, [{:char, lit} | r_tokens]) do
{:ok, [{:text, <<lit::utf8>>}], r_tokens}
end
defp bq_cite_text(_parser, [{:space, _} | r_tokens]) do
{:ok, [{:text, " "}], r_tokens}
end
defp bq_cite_text(_parser, [{:quicktxt, lit} | r_tokens]) do
{:ok, [{:text, <<lit::utf8>>}], r_tokens}
end
defp bq_cite_text(_parser, _tokens) do
{:error, "Expected cite tokens"}
end
#
# inline_textile_element_not_opening_markup =
# literal | space | char |
# quicktxt opening_markup quicktxt |
# quicktxt |
# opening_block_tag block_textile_element* closing_block_tag;
#
defp inline_textile_element_not_opening_markup(_parser, [{:literal, lit} | r_tokens]) do
{:ok, [{:markup, "<span class=\"literal\">"}, {:markup, escape(lit)}, {:markup, "</span>"}], r_tokens}
end
defp inline_textile_element_not_opening_markup(_parser, [{:space, _} | r_tokens]) do
{:ok, [{:text, " "}], r_tokens}
end
defp inline_textile_element_not_opening_markup(_parser, [{:char, lit} | r_tokens]) do
{binary, r2_tokens} = assemble_binary(:char, <<lit::utf8>>, r_tokens)
{:ok, [{:text, escape(binary)}], r2_tokens}
end
defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, q1}, {token, t}, {:quicktxt, q2} | r_tokens])
when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim]
do
{:ok, [{:text, escape(<<q1::utf8>>)}, {:text, escape(t)}, {:text, escape(<<q2::utf8>>)}], r_tokens}
end
defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, lit} | r_tokens]) do
{:ok, [{:text, escape(<<lit::utf8>>)}], r_tokens}
end
defp inline_textile_element_not_opening_markup(parser, [{:bq_cite_start, start} | r_tokens]) do
case repeat(&bq_cite_text/2, parser, r_tokens) do
{:ok, tree, [{:bq_cite_open, open} | r2_tokens]} ->
case repeat(&block_textile_element/2, parser, r2_tokens) do
{:ok, tree2, [{:bq_close, _} | r3_tokens]} ->
cite = escape(flatten(tree))
{:ok, [{:markup, "<blockquote author=\""}, {:markup, cite}, {:markup, "\">"}, tree2, {:markup, "</blockquote>"}], r3_tokens}
{:ok, tree2, r3_tokens} ->
{:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}, tree2], r3_tokens}
_ ->
{:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}], r_tokens}
end
_ ->
{:ok, [{:text, escape(start)}], r_tokens}
end
end
defp inline_textile_element_not_opening_markup(_parser, [{:bq_cite_open, tok} | r_tokens]) do
{:ok, [{:text, escape(tok)}], r_tokens}
end
defp inline_textile_element_not_opening_markup(parser, tokens) do
[
{:bq_open, :bq_close, "<blockquote>", "</blockquote>"},
{:spoiler_open, :spoiler_close, "<span class=\"spoiler\">", "</span>"},
{:bracketed_b_open, :bracketed_b_close, "<b>", "</b>"},
{:bracketed_i_open, :bracketed_i_close, "<i>", "</i>"},
{:bracketed_strong_open, :bracketed_strong_close, "<strong>", "</strong>"},
{:bracketed_em_open, :bracketed_em_close, "<em>", "</em>"},
{:bracketed_code_open, :bracketed_code_close, "<code>", "</code>"},
{:bracketed_ins_open, :bracketed_ins_close, "<ins>", "</ins>"},
{:bracketed_sup_open, :bracketed_sup_close, "<sup>", "</sup>"},
{:bracketed_del_open, :bracketed_del_close, "<del>", "</del>"},
{:bracketed_sub_open, :bracketed_sub_close, "<sub>", "</sub>"}
]
|> Enum.find_value(fn {open_token, close_token, open_tag, close_tag} ->
simple_recursive(
open_token,
close_token,
open_tag,
close_tag,
&block_textile_element/2,
parser,
tokens
)
|> case do
{:ok, tree, r_tokens} ->
{:ok, tree, r_tokens}
_ ->
nil
end
end)
|> Kernel.||({:error, "Expected block markup"})
end
#
# block_textile_element =
# double_newline | newline | inline_textile_element;
#
defp block_textile_element(_parser, [{:double_newline, _} | r_tokens]) do
{:ok, [{:markup, "<br/><br/>"}], r_tokens}
end
defp block_textile_element(_parser, [{:newline, _} | r_tokens]) do
{:ok, [{:markup, "<br/>"}], r_tokens}
end
defp block_textile_element(parser, tokens) do
inline_textile_element(parser, tokens)
end
#
# textile =
# (block_textile_element | TOKEN)* eos;
#
defp textile(parser, tokens) do
case block_textile_element(parser, tokens) do
{:ok, tree, r_tokens} ->
{:ok, tree, r_tokens}
_ ->
case tokens do
[{_, string} | r_tokens] ->
{:ok, [{:text, escape(string)}], r_tokens}
_ ->
{:error, "Expected textile"}
end
end
end
end

View file

@ -1,17 +1,12 @@
defmodule Philomena.Textile.Renderer do
# todo: belongs in PhilomenaWeb
alias Textile.Parser, as: SlowParser
alias FastTextile.Parser, as: FastParser
alias Textile.Parser
alias Philomena.Images.Image
alias Philomena.Repo
import Phoenix.HTML
import Phoenix.HTML.Link
import Ecto.Query
@parser %SlowParser{
image_transform: &Camo.Image.image_url/1
}
# Kill bogus compile time dependency on ImageView
@image_view Module.concat(["PhilomenaWeb.ImageView"])
@ -20,17 +15,8 @@ defmodule Philomena.Textile.Renderer do
end
def render_collection(posts, conn) do
parser =
case conn.cookies["new_parser"] do
"true" -> FastParser
_ -> SlowParser
end
parsed =
posts
|> Enum.map(fn post ->
parser.parse(@parser, post.body)
end)
opts = %{image_transform: &Camo.Image.image_url/1}
parsed = Enum.map(posts, &Parser.parse(opts, &1.body))
images =
parsed

View file

@ -40,7 +40,6 @@ defmodule PhilomenaWeb.SettingController do
|> set_cookie(user_params, "webm", "webm")
|> set_cookie(user_params, "chan_nsfw", "chan_nsfw")
|> set_cookie(user_params, "hide_staff_tools", "hide_staff_tools")
|> set_cookie(user_params, "new_parser", "new_parser")
end
defp set_cookie(conn, params, param_name, cookie_name) do

View file

@ -122,10 +122,6 @@ h1 Content Settings
=> label f, :chan_nsfw, "Show NSFW channels"
=> checkbox f, :chan_nsfw
.fieldlabel: i Show streams marked as NSFW on the channels page.
.field
=> label f, :new_parser, "Use experimental parser"
=> checkbox f, :new_parser
.fieldlabel: i Use the experimental Textile parser.
= if staff?(@conn.assigns.current_user) do
.field
=> label f, :hide_staff_tools

View file

@ -1,6 +1,6 @@
defmodule PhilomenaWeb.PostView do
alias Philomena.Attribution
alias FastTextile.Parser
alias Textile.Parser
use PhilomenaWeb, :view

View file

@ -1,41 +0,0 @@
defmodule Textile.Helpers do
import NimbleParsec
# Helper to "undo" a tokenization and convert it back
# to a string
def unwrap([{_name, value}]), do: value
# Lots of extra unicode space characters
def space do
choice([
utf8_char('\n\r\f \t\u00a0\u1680\u180e\u202f\u205f\u3000'),
utf8_char([0x2000..0x200a])
])
end
# Characters which are valid before and after the main markup characters.
def special_characters do
choice([
space(),
utf8_char('#$%&(),./:;<=?\\`|\'')
])
end
# Simple tag for a markup element that must
# be succeeded immediately by a non-space character
def markup_open_tag(str, char \\ nil, tag_name) do
char = char || binary_head(str)
open_stops =
choice([
space(),
string(char)
])
string(str)
|> lookahead_not(open_stops)
|> unwrap_and_tag(:"#{tag_name}_open")
end
defp binary_head(<<c::utf8, _rest::binary>>), do: <<c::utf8>>
end

View file

@ -1,15 +1,43 @@
defmodule Textile.Lexer do
import NimbleParsec
import Textile.Helpers
import Textile.MarkupLexer
import Textile.UrlLexer
space =
utf8_char('\f \r\t\u00a0\u1680\u180e\u202f\u205f\u3000' ++ Enum.to_list(0x2000..0x200a))
# Structural tags
extended_space =
choice([
space,
string("\n"),
eos()
])
space_token =
space
|> unwrap_and_tag(:space)
double_newline =
string("\n")
|> repeat(space)
|> string("\n")
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:double_newline)
newline =
string("\n")
|> unwrap_and_tag(:newline)
link_ending_characters =
utf8_char('@#$%&(),.:;<=?\\`|\'')
bracket_link_ending_characters =
utf8_char('" []')
end_of_link =
choice([
concat(link_ending_characters, extended_space),
extended_space
])
# Literals enclosed via [== ==]
# Will never contain any markup
bracketed_literal =
ignore(string("[=="))
|> repeat(lookahead_not(string("==]")) |> utf8_char([]))
@ -28,231 +56,183 @@ defmodule Textile.Lexer do
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:literal)
blockquote_cite =
lookahead_not(string("\""))
|> choice([
literal |> reduce(:unwrap),
utf8_char([])
])
|> repeat()
bq_cite_start =
string("[bq=\"")
|> unwrap_and_tag(:bq_cite_start)
# Blockquote opening tag with cite: [bq="the author"]
# Cite can contain bracketed literals or text
blockquote_open_cite =
ignore(string("[bq=\""))
|> concat(blockquote_cite)
|> ignore(string("\"]"))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:blockquote_open_cite)
bq_cite_open =
string("\"]")
|> unwrap_and_tag(:bq_cite_open)
# Blockquote opening tag
blockquote_open =
bq_open =
string("[bq]")
|> unwrap_and_tag(:blockquote_open)
|> unwrap_and_tag(:bq_open)
# Blockquote closing tag
blockquote_close =
bq_close =
string("[/bq]")
|> unwrap_and_tag(:blockquote_close)
|> unwrap_and_tag(:bq_close)
# Spoiler open tag
spoiler_open =
string("[spoiler]")
|> unwrap_and_tag(:spoiler_open)
# Spoiler close tag
spoiler_close =
string("[/spoiler]")
|> unwrap_and_tag(:spoiler_close)
# Images
image_url_with_title =
url_ending_in(string("("))
|> unwrap_and_tag(:image_url)
|> concat(
ignore(string("("))
|> repeat(utf8_char(not: ?)))
|> ignore(string(")"))
|> lookahead(string("!"))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:image_title)
)
image_url_without_title =
url_ending_in(string("!"))
|> unwrap_and_tag(:image_url)
image_url =
image_url_scheme =
choice([
image_url_with_title,
image_url_without_title
string("//"),
string("/"),
string("https://"),
string("http://")
])
bracketed_image_with_link =
ignore(string("[!"))
|> concat(image_url)
|> ignore(string("!:"))
|> concat(
url_ending_in(string("]"))
|> unwrap_and_tag(:image_link_url)
)
bracketed_image_without_link =
ignore(string("[!"))
|> concat(image_url)
|> ignore(string("!]"))
image_with_link =
ignore(string("!"))
|> concat(image_url)
|> ignore(string("!:"))
|> concat(
url_ending_in(space())
|> unwrap_and_tag(:image_link_url)
)
image_without_link =
ignore(string("!"))
|> concat(image_url)
|> ignore(string("!"))
image =
link_url_scheme =
choice([
bracketed_image_with_link,
bracketed_image_without_link,
image_with_link,
image_without_link
string("#"),
image_url_scheme
])
unbracketed_url =
string(":")
|> concat(link_url_scheme)
|> repeat(lookahead_not(end_of_link) |> utf8_char([]))
# Links
unbracketed_image_url =
unbracketed_url
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:unbracketed_image_url)
{link_markup_start, link_markup_element} = markup_ending_in(string("\""))
link_url_stop =
choice([
string("*"),
string("@"),
string("^"),
string("~"),
string(".") |> concat(choice([space(), eos()])),
string("!") |> concat(choice([space(), eos()])),
string(",") |> concat(choice([space(), eos()])),
string("_") |> concat(choice([space(), eos()])),
string("?") |> concat(choice([space(), eos()])),
string(";") |> concat(choice([space(), eos()])),
space(),
eos()
])
link_contents_start =
choice([
image,
spoiler_open,
spoiler_close,
blockquote_open,
blockquote_open_cite,
blockquote_close,
literal,
link_markup_start
])
link_contents_element =
choice([
image,
spoiler_open,
spoiler_close,
blockquote_open,
blockquote_open_cite,
blockquote_close,
literal,
link_markup_element
])
link_contents =
optional(link_contents_start)
|> repeat(link_contents_element)
bracketed_link_end =
string("\":")
|> unwrap_and_tag(:link_end)
|> concat(
url_ending_in(string("]"))
|> ignore(string("]"))
|> unwrap_and_tag(:link_url)
)
bracketed_link =
string("[\"")
|> unwrap_and_tag(:link_start)
|> concat(link_contents)
|> concat(bracketed_link_end)
unbracketed_link_end =
string("\":")
|> unwrap_and_tag(:link_end)
|> concat(
url_ending_in(link_url_stop)
|> unwrap_and_tag(:link_url)
)
unbracketed_link =
unbracketed_link_url =
string("\"")
|> unwrap_and_tag(:link_start)
|> concat(link_contents)
|> concat(unbracketed_link_end)
|> concat(unbracketed_url)
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:unbracketed_link_url)
link =
choice([
bracketed_link,
unbracketed_link
])
unbracketed_image =
ignore(string("!"))
|> concat(image_url_scheme)
|> repeat(utf8_char(not: ?!))
|> ignore(string("!"))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:unbracketed_image)
|> concat(optional(unbracketed_image_url))
bracketed_image =
ignore(string("[!"))
|> concat(image_url_scheme)
|> repeat(lookahead_not(string("!]")) |> utf8_char([]))
|> ignore(string("!]"))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:bracketed_image)
|> concat(optional(unbracketed_image_url))
# Textile
link_delim =
string("\"")
|> unwrap_and_tag(:link_delim)
markup_ends =
choice([
spoiler_close,
blockquote_close,
eos()
])
bracketed_link_open =
string("[\"")
|> unwrap_and_tag(:bracketed_link_open)
{markup_start, markup_element} = markup_ending_in(markup_ends)
bracketed_link_url =
string("\":")
|> concat(link_url_scheme)
|> repeat(lookahead_not(bracket_link_ending_characters) |> utf8_char([]))
|> ignore(string("]"))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:bracketed_link_url)
textile_default =
choice([
literal,
blockquote_open_cite |> optional(markup_start),
blockquote_open |> optional(markup_start),
blockquote_close,
spoiler_open |> optional(markup_start),
spoiler_close,
link,
image
])
bracketed_b_open = string("[**") |> unwrap_and_tag(:bracketed_b_open)
bracketed_i_open = string("[__") |> unwrap_and_tag(:bracketed_i_open)
bracketed_strong_open = string("[*") |> unwrap_and_tag(:bracketed_strong_open)
bracketed_em_open = string("[_") |> unwrap_and_tag(:bracketed_em_open)
bracketed_code_open = string("[@") |> unwrap_and_tag(:bracketed_code_open)
bracketed_ins_open = string("[+") |> unwrap_and_tag(:bracketed_ins_open)
bracketed_sup_open = string("[^") |> unwrap_and_tag(:bracketed_sup_open)
bracketed_del_open = string("[-") |> unwrap_and_tag(:bracketed_del_open)
bracketed_sub_open = string("[~") |> unwrap_and_tag(:bracketed_sub_open)
textile_main =
choice([
textile_default,
markup_element
])
bracketed_b_close = string("**]") |> unwrap_and_tag(:bracketed_b_close)
bracketed_i_close = string("__]") |> unwrap_and_tag(:bracketed_i_close)
bracketed_strong_close = string("*]") |> unwrap_and_tag(:bracketed_strong_close)
bracketed_em_close = string("_]") |> unwrap_and_tag(:bracketed_em_close)
bracketed_code_close = string("@]") |> unwrap_and_tag(:bracketed_code_close)
bracketed_ins_close = string("+]") |> unwrap_and_tag(:bracketed_ins_close)
bracketed_sup_close = string("^]") |> unwrap_and_tag(:bracketed_sup_close)
bracketed_del_close = string("-]") |> unwrap_and_tag(:bracketed_del_close)
bracketed_sub_close = string("~]") |> unwrap_and_tag(:bracketed_sub_close)
textile_start =
choice([
textile_default,
markup_start
])
b_delim = string("**") |> unwrap_and_tag(:b_delim)
i_delim = string("__") |> unwrap_and_tag(:i_delim)
strong_delim = string("*") |> unwrap_and_tag(:strong_delim)
em_delim = string("_") |> unwrap_and_tag(:em_delim)
code_delim = string("@") |> unwrap_and_tag(:code_delim)
ins_delim = string("+") |> unwrap_and_tag(:ins_delim)
sup_delim = string("^") |> unwrap_and_tag(:sup_delim)
sub_delim = string("~") |> unwrap_and_tag(:sub_delim)
del_delim = lookahead_not(string("-"), string(">")) |> unwrap_and_tag(:del_delim)
quicktxt =
utf8_char('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*@_{}')
|> unwrap_and_tag(:quicktxt)
char =
utf8_char([])
|> unwrap_and_tag(:char)
textile =
optional(textile_start)
|> repeat(textile_main)
choice([
literal,
double_newline,
newline,
space_token,
bq_cite_start,
bq_cite_open,
bq_open,
bq_close,
spoiler_open,
spoiler_close,
unbracketed_image,
bracketed_image,
bracketed_link_open,
bracketed_link_url,
unbracketed_link_url,
link_delim,
bracketed_b_open,
bracketed_i_open,
bracketed_strong_open,
bracketed_em_open,
bracketed_code_open,
bracketed_ins_open,
bracketed_sup_open,
bracketed_del_open,
bracketed_sub_open,
bracketed_b_close,
bracketed_i_close,
bracketed_strong_close,
bracketed_em_close,
bracketed_code_close,
bracketed_ins_close,
bracketed_sup_close,
bracketed_del_close,
bracketed_sub_close,
b_delim,
i_delim,
strong_delim,
em_delim,
code_delim,
ins_delim,
sup_delim,
del_delim,
sub_delim,
quicktxt,
char
])
|> repeat()
|> eos()
defparsec :lex, textile
end

View file

@ -1,171 +0,0 @@
defmodule Textile.MarkupLexer do
import NimbleParsec
import Textile.Helpers
# Markup tags
def markup_ending_in(ending_sequence) do
double_newline =
string("\n\n")
|> unwrap_and_tag(:double_newline)
newline =
string("\n")
|> unwrap_and_tag(:newline)
preceding_whitespace =
choice([
double_newline,
newline,
special_characters()
])
# The literal tag is special, because
# 1. It needs to capture everything inside it as a distinct token.
# 2. It can be surrounded by markup on all sides.
# 3. If it successfully tokenizes, it will always be in the output.
literal_open_stops =
choice([
space(),
ending_sequence,
string("=")
])
literal_close_stops =
lookahead_not(
choice([
ending_sequence,
string("\n\n"),
string("="),
space() |> concat(string("="))
])
)
|> utf8_char([])
literal =
ignore(string("=="))
|> lookahead_not(literal_open_stops)
|> repeat(literal_close_stops)
|> ignore(string("=="))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:literal)
b_open = markup_open_tag("**", "*", :b)
i_open = markup_open_tag("__", "*", :i)
strong_open = markup_open_tag("*", :strong)
em_open = markup_open_tag("_", :em)
code_open = markup_open_tag("@", :code)
ins_open = markup_open_tag("+", :ins)
sup_open = markup_open_tag("^", :sup)
del_open = markup_open_tag("-", :del)
sub_open = markup_open_tag("~", :sub)
b_b_open = markup_open_tag("[**", "*", :b_b)
b_i_open = markup_open_tag("[__", "_", :b_i)
b_strong_open = markup_open_tag("[*", "*", :b_strong)
b_em_open = markup_open_tag("[_", "_", :b_em)
b_code_open = markup_open_tag("[@", "@", :b_code)
b_ins_open = markup_open_tag("[+", "+", :b_ins)
b_sup_open = markup_open_tag("[^", "^", :b_sup)
b_del_open = markup_open_tag("[-", "-", :b_del)
b_sub_open = markup_open_tag("[~", "~", :b_sub)
b_b_close = string("**]") |> unwrap_and_tag(:b_b_close)
b_i_close = string("__]") |> unwrap_and_tag(:b_i_close)
b_strong_close = string("*]") |> unwrap_and_tag(:b_strong_close)
b_em_close = string("_]") |> unwrap_and_tag(:b_em_close)
b_code_close = string("@]") |> unwrap_and_tag(:b_code_close)
b_ins_close = string("+]") |> unwrap_and_tag(:b_ins_close)
b_sup_close = string("^]") |> unwrap_and_tag(:b_sup_close)
b_del_close = string("-]") |> unwrap_and_tag(:b_del_close)
b_sub_close = string("~]") |> unwrap_and_tag(:b_sub_close)
b_close = string("**") |> unwrap_and_tag(:b_close)
i_close = string("__") |> unwrap_and_tag(:i_close)
strong_close = string("*") |> unwrap_and_tag(:strong_close)
em_close = string("_") |> unwrap_and_tag(:em_close)
code_close = string("@") |> unwrap_and_tag(:code_close)
ins_close = string("+") |> unwrap_and_tag(:ins_close)
sup_close = string("^") |> unwrap_and_tag(:sup_close)
del_close = string("-") |> unwrap_and_tag(:del_close)
sub_close = string("~") |> unwrap_and_tag(:sub_close)
bracketed_markup_opening_tags =
choice([
b_b_open,
b_i_open,
b_strong_open,
b_em_open,
b_code_open,
b_ins_open,
b_sup_open,
b_del_open,
b_sub_open
])
markup_opening_tags =
choice([
b_open,
i_open,
strong_open,
em_open,
code_open,
ins_open,
sup_open,
del_open |> lookahead_not(string(">")),
sub_open
])
bracketed_markup_closing_tags =
choice([
b_b_close,
b_i_close,
b_strong_close,
b_em_close,
b_code_close,
b_ins_close,
b_sup_close,
b_del_close,
b_sub_close,
])
markup_closing_tags =
choice([
b_close,
i_close,
strong_close,
em_close,
code_close,
ins_close,
sup_close,
del_close,
sub_close
])
markup_at_start =
choice([
times(markup_opening_tags, min: 1),
bracketed_markup_opening_tags
])
markup_element =
lookahead_not(ending_sequence)
|> choice([
literal,
bracketed_markup_closing_tags,
bracketed_markup_opening_tags |> lookahead_not(space()),
preceding_whitespace |> times(markup_opening_tags, min: 1) |> lookahead_not(ending_sequence),
times(markup_closing_tags, min: 1) |> lookahead(choice([special_characters(), ending_sequence])),
double_newline,
newline,
utf8_char([])
])
{markup_at_start, markup_element}
end
end

View file

@ -1,313 +1,371 @@
defmodule Textile.Parser do
import Textile.ParserHelpers
alias Textile.Lexer
alias Phoenix.HTML
alias Textile.{
Lexer,
Parser,
TokenCoalescer
}
def parse(parser, input) do
parser = Map.put(parser, :state, %{})
defstruct [
image_transform: nil
]
def parse(%Parser{} = parser, input) do
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(input |> remove_linefeeds()),
tokens <- TokenCoalescer.coalesce_lex(tokens),
{:ok, tree, []} <- textile_top(parser, tokens),
tree <- TokenCoalescer.coalesce_parse(tree)
with {:ok, tokens, _1, _2, _3, _4} <- Lexer.lex(String.trim(input)),
{:ok, tree, []} <- repeat(&textile/2, parser, tokens)
do
tree
else
err ->
err
end
end
#
# Backtracking LL packrat parser for simplified Textile grammar
#
#
# textile = (well_formed_including_paragraphs | TOKEN)*;
#
defp textile_top(_parser, []), do: {:ok, [], []}
defp textile_top(parser, tokens) do
with {:ok, tree, r_tokens} <- well_formed_including_paragraphs(parser, nil, tokens),
false <- tree == [],
{:ok, next_tree, r2_tokens} <- textile_top(parser, r_tokens)
do
{:ok, [tree, next_tree], r2_tokens}
partial_flatten(tree)
else
_ ->
[{_token, string} | r_tokens] = tokens
{:ok, next_tree, r2_tokens} = textile_top(parser, r_tokens)
{:ok, [{:text, escape_nl2br(string)}, next_tree], r2_tokens}
[]
end
end
#
# well_formed_including_paragraphs = (markup | double_newline)*;
#
defp well_formed_including_paragraphs(_parser, _closing_token, []), do: {:ok, [], []}
defp well_formed_including_paragraphs(parser, closing_token, [{:double_newline, _nl} | r_tokens]) do
{:ok, tree, r2_tokens} = well_formed_including_paragraphs(parser, closing_token, r_tokens)
{:ok, [{:markup, "<br/><br/>"}, tree], r2_tokens}
# Helper to turn a parse tree into a string
def flatten(tree) do
tree
|> List.flatten()
|> Enum.map_join("", fn {_k, v} -> v end)
end
defp well_formed_including_paragraphs(parser, closing_token, tokens) do
with {:markup, {:ok, tree, r_tokens}} <- {:markup, markup(parser, tokens)},
{:ok, next_tree, r2_tokens} <- well_formed_including_paragraphs(parser, closing_token, r_tokens)
do
{:ok, [tree, next_tree], r2_tokens}
else
_ ->
consume_nonclosing(parser, closing_token, tokens)
end
# Helper to escape HTML
defp escape(text) do
text
|> HTML.html_escape()
|> HTML.safe_to_string()
end
defp consume_nonclosing(_parser, closing_token, [{closing_token, _string} | _r_tokens] = tokens) do
{:ok, [], tokens}
end
defp consume_nonclosing(parser, closing_token, [{_next_token, string} | r_tokens]) do
{:ok, next_tree, r2_tokens} = well_formed_including_paragraphs(parser, closing_token, r_tokens)
# Helper to turn a parse tree into a list
def partial_flatten(tree) do
tree
|> List.flatten()
|> Enum.chunk_by(fn {k, _v} -> k end)
|> Enum.map(fn list ->
[{type, _v} | _rest] = list
{:ok, [{:text, escape_nl2br(string)}, next_tree], r2_tokens}
end
defp consume_nonclosing(_parser, _closing_token, []) do
{:ok, [], []}
value = Enum.map_join(list, "", fn {_k, v} -> v end)
{type, value}
end)
end
#
# well_formed = (markup)*;
#
defp well_formed(parser, tokens) do
case markup(parser, tokens) do
defp put_state(parser, new_state) do
state = Map.put(parser.state, new_state, true)
Map.put(parser, :state, state)
end
# Helper corresponding to Kleene star (*) operator
# Match a specificed rule zero or more times
defp repeat(rule, parser, tokens) do
case rule.(parser, tokens) do
{:ok, tree, r_tokens} ->
{:ok, next_tree, r2_tokens} = well_formed(parser, r_tokens)
{:ok, [tree, next_tree], r2_tokens}
{:ok, tree2, r2_tokens} = repeat(rule, parser, r_tokens)
{:ok, [tree, tree2], r2_tokens}
_ ->
{:ok, [], tokens}
end
end
# Helper to match a simple recursive grammar rule of the following form:
#
# markup =
# blockquote | spoiler | link | image | bold | italic | strong | emphasis |
# code | inserted | superscript | deleted | subscript | newline | literal |
# literal | text;
# open_token callback* close_token
#
defp markup(parser, tokens) do
markups = [
&blockquote/2, &spoiler/2, &link/2, &image/2, &bold/2, &italic/2, &strong/2,
&emphasis/2, &code/2, &inserted/2, &superscript/2, &deleted/2, &subscript/2,
&newline/2, &literal/2, &literal/2, &text/2
]
defp simple_recursive(open_token, close_token, open_tag, close_tag, callback, parser, [{open_token, open} | r_tokens]) do
case repeat(callback, parser, r_tokens) do
{:ok, tree, [{^close_token, _} | r2_tokens]} ->
{:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens}
value =
markups
|> Enum.find_value(fn func ->
case func.(parser, tokens) do
{:ok, tree, r_tokens} ->
{:ok, tree, r_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape(open)}, tree], r2_tokens}
end
end
defp simple_recursive(_open_token, _close_token, _open_tag, _close_tag, _callback, _parser, _tokens) do
{:error, "Expected a simple recursive rule"}
end
# Helper to match a simple recursive grammar rule with negative lookahead:
#
# open_token callback* close_token (?!lookahead_not)
#
defp simple_lookahead_not(open_token, close_token, open_tag, close_tag, lookahead_not, callback, state, parser, [{open_token, open} | r_tokens]) do
case parser.state do
%{^state => _} ->
{:error, "End of rule"}
_ ->
case r_tokens do
[{forbidden_lookahead, _la} | _] when forbidden_lookahead in [:space, :newline] ->
{:ok, [{:text, escape(open)}], r_tokens}
_ ->
nil
case repeat(callback, put_state(parser, state), r_tokens) do
{:ok, tree, [{^close_token, close}, {^lookahead_not, ln} | r2_tokens]} ->
{:ok, [{:text, escape(open)}, tree, {:text, escape(close)}], [{lookahead_not, ln} | r2_tokens]}
{:ok, tree, [{^close_token, _} | r2_tokens]} ->
{:ok, [{:markup, open_tag}, tree, {:markup, close_tag}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape(open)}, tree], r2_tokens}
end
end
end)
value || {:error, "Expected markup"}
end
end
defp simple_lookahead_not(_open_token, _close_token, _open_tag, _close_tag, _lookahead_not, _callback, _state, _parser, _tokens) do
{:error, "Expected a simple lookahead not rule"}
end
# Helper to efficiently assemble a UTF-8 binary from tokens of the
# given type
defp assemble_binary(token_type, accumulator, [{token_type, t} | stream]) do
assemble_binary(token_type, accumulator <> <<t::utf8>>, stream)
end
defp assemble_binary(_token_type, accumulator, tokens), do: {accumulator, tokens}
#
# blockquote =
# blockquote_open_cite well_formed_including_paragraphs blockquote_close |
# blockquote_open well_formed_including_paragraphs blockquote_close;
# inline_textile_element =
# opening_markup inline_textile_element* closing_markup (?!quicktxt) |
# closing_markup (?=quicktxt) |
# link_delim block_textile_element* link_url |
# image url? |
# code_delim inline_textile_element* code_delim |
# inline_textile_element_not_opening_markup;
#
defp blockquote(parser, [{:blockquote_open_cite, author} | r_tokens]) do
case well_formed_including_paragraphs(parser, :blockquote_close, r_tokens) do
{:ok, tree, [{:blockquote_close, _close} | r2_tokens]} ->
{:ok, [{:markup, ~s|<blockquote author="#{escape_html(author)}">|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_nl2br(~s|[bq="#{author}"]|)}, tree], r2_tokens}
defp inline_textile_element(parser, tokens) do
[
{:b_delim, :b, "<b>", "</b>"},
{:i_delim, :i, "<i>", "</i>"},
{:strong_delim, :strong, "<strong>", "</strong>"},
{:em_delim, :em, "<em>", "</em>"},
{:ins_delim, :ins, "<ins>", "</ins>"},
{:sup_delim, :sup, "<sup>", "</sup>"},
{:del_delim, :del, "<del>", "</del>"},
{:sub_delim, :sub, "<sub>", "</sub>"}
]
|> Enum.find_value(fn {delim_token, state, open_tag, close_tag} ->
simple_lookahead_not(
delim_token,
delim_token,
open_tag,
close_tag,
:quicktxt,
&inline_textile_element/2,
state,
parser,
tokens
)
|> case do
{:ok, tree, r_tokens} ->
{:ok, tree, r_tokens}
_ ->
nil
end
end)
|> case do
nil -> inner_inline_textile_element(parser, tokens)
value -> value
end
end
defp blockquote(parser, [{:blockquote_open, open} | r_tokens]) do
case well_formed_including_paragraphs(parser, :blockquote_close, r_tokens) do
{:ok, tree, [{:blockquote_close, _close} | r2_tokens]} ->
{:ok, [{:markup, ~s|<blockquote>|}, tree, {:markup, ~s|</blockquote>|}], r2_tokens}
defp inner_inline_textile_element(parser, [{token, t}, {:quicktxt, q} | r_tokens])
when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim]
do
case inline_textile_element(parser, [{:quicktxt, q} | r_tokens]) do
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens}
{:ok, [{:text, escape(t)}, tree], r2_tokens}
_ ->
{:ok, [{:text, escape(t)}], [{:quicktxt, q} | r_tokens]}
end
end
defp inner_inline_textile_element(parser, [{:link_delim, open} | r_tokens]) do
case repeat(&block_textile_element/2, parser, r_tokens) do
{:ok, tree, [{:unbracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} ->
href = escape(url)
defp blockquote(_parser, _tokens),
do: {:error, "Expected a blockquote tag with optional citation"}
#
# spoiler =
# spoiler_open well_formed_including_paragraphs spoiler_close;
#
defp spoiler(parser, [{:spoiler_open, open} | r_tokens]) do
case well_formed_including_paragraphs(parser, :spoiler_close, r_tokens) do
{:ok, tree, [{:spoiler_close, _close} | r2_tokens]} ->
{:ok, [{:markup, ~s|<span class="spoiler">|}, tree, {:markup, ~s|</span>|}], r2_tokens}
{:ok, [{:markup, "<a href=\""}, {:markup, href}, {:markup, "\">"}, tree, {:markup, "</a>"}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_nl2br(open)}, tree], r2_tokens}
{:ok, [{:text, escape(open)}, tree], r2_tokens}
end
end
defp inner_inline_textile_element(parser, [{:bracketed_link_open, open} | r_tokens]) do
case repeat(&inline_textile_element/2, parser, r_tokens) do
{:ok, tree, [{:bracketed_link_url, <<"\":", url::binary>>} | r2_tokens]} ->
href = escape(url)
defp spoiler(_parser, _tokens),
do: {:error, "Expected a spoiler tag"}
#
# link =
# link_start well_formed_including_paragraphs link_end link_url;
#
defp link(parser, [{:link_start, start} | r_tokens]) do
case well_formed_including_paragraphs(parser, :link_end, r_tokens) do
{:ok, tree, [{:link_end, _end}, {:link_url, url} | r2_tokens]} ->
{:ok, [{:markup, ~s|<a href="#{escape_html(url)}">|}, tree, {:markup, ~s|</a>|}], r2_tokens}
{:ok, [{:markup, "<a href=\""}, {:markup, href}, {:markup, "\">"}, tree, {:markup, "</a>"}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_nl2br(start)}, tree], r2_tokens}
{:ok, [{:text, escape(open)}, tree], r2_tokens}
end
end
defp inner_inline_textile_element(parser, [{token, img}, {:unbracketed_image_url, <<":", url::binary>>} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do
img = parser.image_transform.(img)
defp link(_parser, _tokens),
do: {:error, "Expected a link"}
{:ok, [{:markup, "<a href=\""}, {:markup, escape(url)}, {:markup, "\"><span class=\"imgspoiler\"><img src=\""}, {:markup, escape(img)}, {:markup, "\"/></span></a>"}], r_tokens}
end
defp inner_inline_textile_element(parser, [{token, img} | r_tokens]) when token in [:unbracketed_image, :bracketed_image] do
img = parser.image_transform.(img)
{:ok, [{:markup, "<span class=\"imgspoiler\"><img src=\""}, {:markup, escape(img)}, {:markup, "\"/></span>"}], r_tokens}
end
defp inner_inline_textile_element(parser, [{:code_delim, open} | r_tokens]) do
case parser.state do
%{code: _} ->
{:error, "End of rule"}
#
# image =
# image_url image_title? image_link_url?;
#
defp image(parser, [{:image_url, image_url}, {:image_title, title}, {:image_link_url, link_url} | r_tokens]) do
image_url = parser.image_transform.(image_url)
_ ->
case repeat(&inline_textile_element/2, put_state(parser, :code), r_tokens) do
{:ok, tree, [{:code_delim, _} | r2_tokens]} ->
{:ok, [{:markup, "<code>"}, tree, {:markup, "</code>"}], r2_tokens}
{:ok, [markup: ~s|<a href="#{escape_html(link_url)}"><span class="imgspoiler"><img src="#{escape_html(image_url)}" title="#{escape_html(title)}"/></span></a>|], r_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape(open)}, tree], r2_tokens}
end
end
end
defp inner_inline_textile_element(parser, tokens) do
inline_textile_element_not_opening_markup(parser, tokens)
end
defp image(parser, [{:image_url, image_url}, {:image_title, title} | r_tokens]) do
image_url = parser.image_transform.(image_url)
#
# bq_cite_text = literal | char | space | quicktxt;
#
{:ok, [markup: ~s|<span class="imgspoiler"><img src="#{escape_html(image_url)}" title="#{escape_html(title)}"/></span>|], r_tokens}
# Note that text is not escaped here because it will be escaped
# when the tree is flattened
defp bq_cite_text(_parser, [{:literal, lit} | r_tokens]) do
{:ok, [{:text, lit}], r_tokens}
end
defp bq_cite_text(_parser, [{:char, lit} | r_tokens]) do
{:ok, [{:text, <<lit::utf8>>}], r_tokens}
end
defp bq_cite_text(_parser, [{:space, _} | r_tokens]) do
{:ok, [{:text, " "}], r_tokens}
end
defp bq_cite_text(_parser, [{:quicktxt, lit} | r_tokens]) do
{:ok, [{:text, <<lit::utf8>>}], r_tokens}
end
defp bq_cite_text(_parser, _tokens) do
{:error, "Expected cite tokens"}
end
defp image(parser, [{:image_url, image_url}, {:image_link_url, link_url} | r_tokens]) do
image_url = parser.image_transform.(image_url)
#
# inline_textile_element_not_opening_markup =
# literal | space | char |
# quicktxt opening_markup quicktxt |
# quicktxt |
# opening_block_tag block_textile_element* closing_block_tag;
#
{:ok, [markup: ~s|<a href="#{escape_html(link_url)}"><span class="imgspoiler"><img src="#{escape_html(image_url)}"/></span></a>|], r_tokens}
defp inline_textile_element_not_opening_markup(_parser, [{:literal, lit} | r_tokens]) do
{:ok, [{:markup, "<span class=\"literal\">"}, {:markup, escape(lit)}, {:markup, "</span>"}], r_tokens}
end
defp inline_textile_element_not_opening_markup(_parser, [{:space, _} | r_tokens]) do
{:ok, [{:text, " "}], r_tokens}
end
defp inline_textile_element_not_opening_markup(_parser, [{:char, lit} | r_tokens]) do
{binary, r2_tokens} = assemble_binary(:char, <<lit::utf8>>, r_tokens)
{:ok, [{:text, escape(binary)}], r2_tokens}
end
defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, q1}, {token, t}, {:quicktxt, q2} | r_tokens])
when token in [:b_delim, :i_delim, :strong_delim, :em_delim, :ins_delim, :sup_delim, :del_delim, :sub_delim]
do
{:ok, [{:text, escape(<<q1::utf8>>)}, {:text, escape(t)}, {:text, escape(<<q2::utf8>>)}], r_tokens}
end
defp inline_textile_element_not_opening_markup(_parser, [{:quicktxt, lit} | r_tokens]) do
{:ok, [{:text, escape(<<lit::utf8>>)}], r_tokens}
end
defp inline_textile_element_not_opening_markup(parser, [{:bq_cite_start, start} | r_tokens]) do
case repeat(&bq_cite_text/2, parser, r_tokens) do
{:ok, tree, [{:bq_cite_open, open} | r2_tokens]} ->
case repeat(&block_textile_element/2, parser, r2_tokens) do
{:ok, tree2, [{:bq_close, _} | r3_tokens]} ->
cite = escape(flatten(tree))
{:ok, [{:markup, "<blockquote author=\""}, {:markup, cite}, {:markup, "\">"}, tree2, {:markup, "</blockquote>"}], r3_tokens}
{:ok, tree2, r3_tokens} ->
{:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}, tree2], r3_tokens}
_ ->
{:ok, [{:text, escape(start)}, {:text, escape(flatten(tree))}, {:text, escape(open)}], r_tokens}
end
_ ->
{:ok, [{:text, escape(start)}], r_tokens}
end
end
defp inline_textile_element_not_opening_markup(_parser, [{:bq_cite_open, tok} | r_tokens]) do
{:ok, [{:text, escape(tok)}], r_tokens}
end
defp inline_textile_element_not_opening_markup(parser, tokens) do
[
{:bq_open, :bq_close, "<blockquote>", "</blockquote>"},
{:spoiler_open, :spoiler_close, "<span class=\"spoiler\">", "</span>"},
{:bracketed_b_open, :bracketed_b_close, "<b>", "</b>"},
{:bracketed_i_open, :bracketed_i_close, "<i>", "</i>"},
{:bracketed_strong_open, :bracketed_strong_close, "<strong>", "</strong>"},
{:bracketed_em_open, :bracketed_em_close, "<em>", "</em>"},
{:bracketed_code_open, :bracketed_code_close, "<code>", "</code>"},
{:bracketed_ins_open, :bracketed_ins_close, "<ins>", "</ins>"},
{:bracketed_sup_open, :bracketed_sup_close, "<sup>", "</sup>"},
{:bracketed_del_open, :bracketed_del_close, "<del>", "</del>"},
{:bracketed_sub_open, :bracketed_sub_close, "<sub>", "</sub>"}
]
|> Enum.find_value(fn {open_token, close_token, open_tag, close_tag} ->
simple_recursive(
open_token,
close_token,
open_tag,
close_tag,
&block_textile_element/2,
parser,
tokens
)
|> case do
{:ok, tree, r_tokens} ->
{:ok, tree, r_tokens}
_ ->
nil
end
end)
|> Kernel.||({:error, "Expected block markup"})
end
defp image(parser, [{:image_url, image_url} | r_tokens]) do
image_url = parser.image_transform.(image_url)
#
# block_textile_element =
# double_newline | newline | inline_textile_element;
#
{:ok, [markup: ~s|<span class="imgspoiler"><img src="#{escape_html(image_url)}"/></span>|], r_tokens}
defp block_textile_element(_parser, [{:double_newline, _} | r_tokens]) do
{:ok, [{:markup, "<br/><br/>"}], r_tokens}
end
defp block_textile_element(_parser, [{:newline, _} | r_tokens]) do
{:ok, [{:markup, "<br/>"}], r_tokens}
end
defp block_textile_element(parser, tokens) do
inline_textile_element(parser, tokens)
end
defp image(_parser, _tokens),
do: {:error, "Expected an image tag"}
#
# bold =
# b_open well_formed b_close |
# b_b_open well_formed b_b_close;
#
attribute_parser(:bold, :b_open, :b_close, "<b>", "</b>")
#
# italic =
# i_open well_formed i_close |
# b_i_open well_formed b_i_close;
#
attribute_parser(:italic, :i_open, :i_close, "<i>", "</i>")
#
# strong =
# strong_open well_formed strong_close |
# b_strong_open well_formed b_strong_close;
#
attribute_parser(:strong, :strong_open, :strong_close, "<strong>", "</strong>")
#
# emphasis =
# em_open well_formed em_close |
# b_em_open well_formed b_em_close;
#
attribute_parser(:emphasis, :em_open, :em_close, "<em>", "</em>")
#
# code =
# code_open well_formed code_close |
# b_code_open well_formed b_code_close;
#
attribute_parser(:code, :code_open, :code_close, "<code>", "</code>")
#
# inserted =
# ins_open well_formed ins_close |
# b_ins_open well_formed b_ins_close;
#
attribute_parser(:inserted, :ins_open, :ins_close, "<ins>", "</ins>")
#
# superscript =
# sup_open well_formed sup_close |
# b_sup_open well_formed b_sup_close;
#
attribute_parser(:superscript, :sup_open, :sup_close, "<sup>", "</sup>")
#
# deleted =
# del_open well_formed del_close |
# b_del_open well_formed b_del_close;
#
attribute_parser(:deleted, :del_open, :del_close, "<del>", "</del>")
#
# subscript =
# sub_open well_formed sub_close |
# b_sub_open well_formed b_sub_close;
#
attribute_parser(:subscript, :sub_open, :sub_close, "<sub>", "</sub>")
#
# Terminals
# textile =
# (block_textile_element | TOKEN)* eos;
#
defp literal(_parser, [{:literal, text} | r_tokens]),
do: {:ok, [markup: escape_nl2br(text)], r_tokens}
defp textile(parser, tokens) do
case block_textile_element(parser, tokens) do
{:ok, tree, r_tokens} ->
{:ok, tree, r_tokens}
defp literal(_parser, _tokens),
do: {:error, "Expected a literal"}
_ ->
case tokens do
[{_, string} | r_tokens] ->
{:ok, [{:text, escape(string)}], r_tokens}
defp newline(_parser, [{:newline, _nl} | r_tokens]),
do: {:ok, [markup: "<br/>"], r_tokens}
defp newline(_parser, _tokens),
do: {:error, "Expected a line break"}
defp text(_parser, [{:text, text} | r_tokens]),
do: {:ok, [text: escape_nl2br(text)], r_tokens}
defp text(_parser, _tokens),
do: {:error, "Expected text"}
_ ->
{:error, "Expected textile"}
end
end
end
end

View file

@ -1,47 +0,0 @@
defmodule Textile.ParserHelpers do
import Phoenix.HTML
defmacro attribute_parser(name, open_token, close_token, open_tag, close_tag) do
quote do
defp unquote(name)(parser, [{unquote(open_token), open} | r_tokens]) do
case well_formed(parser, r_tokens) do
{:ok, tree, [{unquote(close_token), _close} | r2_tokens]} ->
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_html(open)}, tree], r2_tokens}
end
end
defp unquote(name)(parser, [{unquote(:"b_#{open_token}"), open} | r_tokens]) do
case well_formed(parser, r_tokens) do
{:ok, tree, [{unquote(:"b_#{close_token}"), _close} | r2_tokens]} ->
{:ok, [{:markup, unquote(open_tag)}, tree, {:markup, unquote(close_tag)}], r2_tokens}
{:ok, tree, r2_tokens} ->
{:ok, [{:text, escape_html(open)}, tree], r2_tokens}
end
end
defp unquote(name)(_parser, _tokens),
do: {:error, "Expected #{unquote(name)} tag"}
end
end
def remove_linefeeds(text) do
text
|> to_string()
|> String.replace("\r", "")
end
def escape_nl2br(text) do
text
|> String.split("\n")
|> Enum.map(&escape_html(&1))
|> Enum.join("<br/>")
end
def escape_html(text) do
html_escape(text) |> safe_to_string()
end
end

View file

@ -1,31 +0,0 @@
defmodule Textile.TokenCoalescer do
# The lexer, as a practical concern, does not coalesce runs of
# character tokens. This fixes that.
def coalesce_lex(tokens) do
tokens
|> Enum.chunk_by(&is_number(&1))
|> Enum.flat_map(fn
[t | _rest] = str when is_number(t) ->
[text: List.to_string(str)]
t ->
t
end)
end
def coalesce_parse(tokens) do
tokens
|> List.flatten()
|> Enum.chunk_by(fn {k, _v} -> k == :text end)
|> Enum.flat_map(fn t ->
[{type, _v} | _rest] = t
value =
t
|> Enum.map(fn {_k, v} -> v end)
|> Enum.join("")
[{type, value}]
end)
end
end

View file

@ -1,34 +0,0 @@
defmodule Textile.UrlLexer do
import NimbleParsec
def url_ending_in(ending_sequence) do
domain_character =
choice([
ascii_char([?a..?z]),
ascii_char([?A..?Z]),
ascii_char([?0..?9]),
string("-")
])
domain =
repeat(
choice([
domain_character |> string(".") |> concat(domain_character),
domain_character
])
)
scheme_and_domain =
choice([
string("#"),
string("/"),
string("data:image/"),
string("https://") |> concat(domain),
string("http://") |> concat(domain)
])
scheme_and_domain
|> repeat(lookahead_not(ending_sequence) |> utf8_char([]))
|> reduce({List, :to_string, []})
end
end