philomena/lib/textile/lexer.ex

245 lines
5 KiB
Elixir
Raw Normal View History

2019-11-03 02:47:54 +01:00
defmodule Textile.Lexer do
import NimbleParsec
2019-11-03 21:32:55 +01:00
import Textile.Helpers
import Textile.MarkupLexer
2019-11-04 00:58:11 +01:00
import Textile.UrlLexer
2019-11-03 02:47:54 +01:00
2019-11-03 21:32:55 +01:00
# Structural tags
2019-11-03 02:47:54 +01:00
2019-11-03 21:32:55 +01:00
# Literals enclosed via [== ==]
# Will never contain any markup
2019-11-03 02:47:54 +01:00
bracketed_literal =
ignore(string("[=="))
|> repeat(lookahead_not(string("==]")) |> utf8_char([]))
|> ignore(string("==]"))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:bracketed_literal)
2019-11-03 21:32:55 +01:00
blockquote_cite =
lookahead_not(string("\""))
|> choice([
bracketed_literal |> reduce(:unwrap),
utf8_char([])
2019-11-03 02:47:54 +01:00
])
2019-11-03 21:32:55 +01:00
|> repeat()
2019-11-03 02:47:54 +01:00
2019-11-03 21:32:55 +01:00
# Blockquote opening tag with cite: [bq="the author"]
# Cite can contain bracketed literals or text
blockquote_open_cite =
2019-11-03 02:47:54 +01:00
ignore(string("[bq=\""))
2019-11-03 21:32:55 +01:00
|> concat(blockquote_cite)
2019-11-03 02:47:54 +01:00
|> ignore(string("\"]"))
|> reduce({List, :to_string, []})
2019-11-03 21:32:55 +01:00
|> unwrap_and_tag(:blockquote_open_cite)
2019-11-03 02:47:54 +01:00
2019-11-03 21:32:55 +01:00
# Blockquote opening tag
blockquote_open =
string("[bq]")
|> unwrap_and_tag(:blockquote_open)
2019-11-03 02:47:54 +01:00
2019-11-03 21:32:55 +01:00
# Blockquote closing tag
blockquote_close =
string("[/bq]")
|> unwrap_and_tag(:blockquote_close)
2019-11-03 02:47:54 +01:00
2019-11-03 21:32:55 +01:00
# Spoiler open tag
spoiler_open =
string("[spoiler]")
|> unwrap_and_tag(:spoiler_open)
2019-11-03 02:47:54 +01:00
2019-11-03 21:32:55 +01:00
# Spoiler close tag
spoiler_close =
string("[/spoiler]")
|> unwrap_and_tag(:spoiler_close)
2019-11-03 02:47:54 +01:00
2019-11-04 00:58:11 +01:00
# Images
image_url_with_title =
url_ending_in(string("("))
|> unwrap_and_tag(:image_url)
|> concat(
ignore(string("("))
|> repeat(utf8_char(not: ?)))
|> ignore(string(")"))
|> lookahead(string("!"))
|> reduce({List, :to_string, []})
|> unwrap_and_tag(:image_title)
)
image_url_without_title =
url_ending_in(string("!"))
|> unwrap_and_tag(:image_url)
image_url =
choice([
image_url_with_title,
image_url_without_title
])
bracketed_image_with_link =
ignore(string("[!"))
|> concat(image_url)
|> ignore(string("!:"))
|> concat(
url_ending_in(string("]"))
|> unwrap_and_tag(:image_link_url)
)
bracketed_image_without_link =
ignore(string("[!"))
|> concat(image_url)
|> ignore(string("!]"))
image_with_link =
ignore(string("!"))
|> concat(image_url)
|> ignore(string("!:"))
|> concat(
url_ending_in(space())
|> unwrap_and_tag(:image_link_url)
)
image_without_link =
ignore(string("!"))
|> concat(image_url)
|> ignore(string("!"))
image =
choice([
bracketed_image_with_link,
bracketed_image_without_link,
image_with_link,
image_without_link
])
2019-11-04 01:10:05 +01:00
# Links
{link_markup_start, link_markup_element} = markup_ending_in(string("\""))
link_url_stop =
choice([
string("*"),
string("@"),
string("^"),
string("~"),
2019-12-08 15:43:47 +01:00
string(".") |> concat(choice([space(), eos()])),
string("!") |> concat(choice([space(), eos()])),
string(",") |> concat(choice([space(), eos()])),
2019-11-27 22:52:26 +01:00
string("_") |> concat(choice([space(), eos()])),
2019-11-27 22:53:52 +01:00
string("?") |> concat(choice([space(), eos()])),
2019-11-28 01:19:55 +01:00
string(";") |> concat(choice([space(), eos()])),
2019-11-27 22:53:52 +01:00
space(),
eos()
])
2019-11-27 22:07:22 +01:00
2019-11-04 01:10:05 +01:00
link_contents_start =
choice([
image,
spoiler_open,
spoiler_close,
blockquote_open,
blockquote_open_cite,
blockquote_close,
2019-11-04 01:10:05 +01:00
link_markup_start,
])
link_contents_element =
choice([
image,
spoiler_open,
spoiler_close,
blockquote_open,
blockquote_open_cite,
blockquote_close,
2019-11-04 01:10:05 +01:00
link_markup_element
])
link_contents =
optional(link_contents_start)
|> repeat(link_contents_element)
bracketed_link_end =
string("\":")
|> unwrap_and_tag(:link_end)
|> concat(
url_ending_in(string("]"))
2019-11-11 00:35:52 +01:00
|> ignore(string("]"))
2019-11-04 01:10:05 +01:00
|> unwrap_and_tag(:link_url)
)
bracketed_link =
string("[\"")
|> unwrap_and_tag(:link_start)
|> concat(link_contents)
|> concat(bracketed_link_end)
unbracketed_link_end =
string("\":")
|> unwrap_and_tag(:link_end)
|> concat(
url_ending_in(link_url_stop)
2019-11-04 01:10:05 +01:00
|> unwrap_and_tag(:link_url)
)
unbracketed_link =
string("\"")
|> unwrap_and_tag(:link_start)
|> concat(link_contents)
|> concat(unbracketed_link_end)
link =
choice([
bracketed_link,
unbracketed_link
])
2019-11-04 01:18:32 +01:00
# Textile
2019-11-30 17:59:51 +01:00
markup_ends =
choice([
spoiler_close,
blockquote_close,
eos()
])
2019-11-04 01:18:32 +01:00
2019-11-30 17:59:51 +01:00
{markup_start, markup_element} = markup_ending_in(markup_ends)
2019-11-04 01:18:32 +01:00
2019-11-09 03:13:17 +01:00
textile_default =
2019-11-04 01:18:32 +01:00
choice([
bracketed_literal,
2019-11-30 17:59:51 +01:00
blockquote_open_cite |> optional(markup_start),
blockquote_open |> optional(markup_start),
2019-11-04 01:18:32 +01:00
blockquote_close,
2019-11-30 17:59:51 +01:00
spoiler_open |> optional(markup_start),
2019-11-04 01:18:32 +01:00
spoiler_close,
link,
2019-11-09 03:13:17 +01:00
image
])
textile_main =
choice([
textile_default,
2019-11-04 01:18:32 +01:00
markup_element
])
textile_start =
choice([
2019-11-09 03:13:17 +01:00
textile_default,
2019-11-04 01:18:32 +01:00
markup_start
])
textile =
optional(textile_start)
|> repeat(textile_main)
|> eos()
defparsec :lex, textile
2019-11-03 02:47:54 +01:00
end