diff --git a/lib/textile/markup_lexer.ex b/lib/textile/markup_lexer.ex index 6464c0cb..385c301c 100644 --- a/lib/textile/markup_lexer.ex +++ b/lib/textile/markup_lexer.ex @@ -5,6 +5,13 @@ defmodule Textile.MarkupLexer do # Markup tags def markup_ending_in(ending_sequence) do + double_newline = + string("\n\n") + |> unwrap_and_tag(:double_newline) + + newline = + string("\n") + |> unwrap_and_tag(:newline) # The literal tag is special, because # 1. It needs to capture everything inside it as a distinct token. @@ -147,6 +154,8 @@ defmodule Textile.MarkupLexer do bracketed_markup_opening_tags |> lookahead_not(space()), special_characters() |> concat(markup_opening_tags), markup_closing_tags |> choice([special_characters(), ending_sequence]), + double_newline, + newline, utf8_char([]) ]) diff --git a/lib/textile/token_coalescer.ex b/lib/textile/token_coalescer.ex new file mode 100644 index 00000000..8ef47e59 --- /dev/null +++ b/lib/textile/token_coalescer.ex @@ -0,0 +1,15 @@ +defmodule Textile.TokenCoalescer do + # The lexer, as a practical concern, does not coalesce runs of + # character tokens. This fixes that. + def coalesce(tokens) do + tokens + |> Enum.chunk_by(&is_number(&1)) + |> Enum.flat_map(fn + [t | _rest] = str when is_number(t) -> + [text: List.to_string(str)] + + t -> + t + ) + end +end \ No newline at end of file