From a21189ae10634823d3ed91f3044bfacde14d3b32 Mon Sep 17 00:00:00 2001 From: Liam Date: Sat, 9 Nov 2024 00:11:08 -0500 Subject: [PATCH] Update to Philomena comrak v0.29.0 --- .github/workflows/elixir.yml | 3 + native/philomena/Cargo.lock | 90 ++++++----- native/philomena/Cargo.toml | 2 +- native/philomena/src/domains.rs | 20 +++ native/philomena/src/lib.rs | 2 + native/philomena/src/markdown.rs | 31 +++- native/philomena/src/tests.rs | 257 +++++++++++++++++++++++++++++++ 7 files changed, 356 insertions(+), 49 deletions(-) create mode 100644 native/philomena/src/tests.rs diff --git a/.github/workflows/elixir.yml b/.github/workflows/elixir.yml index 734c0b68..38b201d4 100644 --- a/.github/workflows/elixir.yml +++ b/.github/workflows/elixir.yml @@ -68,6 +68,9 @@ jobs: - name: cargo clippy run: (cd native/philomena && cargo clippy -- -D warnings) + - name: cargo test + run: (cd native/philomena && cargo test) + lint-and-test: name: 'JavaScript Linting and Unit Tests' runs-on: ubuntu-latest diff --git a/native/philomena/Cargo.lock b/native/philomena/Cargo.lock index 623227a1..bc4af0b4 100644 --- a/native/philomena/Cargo.lock +++ b/native/philomena/Cargo.lock @@ -32,6 +32,29 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "bon" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97493a391b4b18ee918675fb8663e53646fd09321c58b46afa04e8ce2499c869" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2af3eac944c12cdf4423eab70d310da0a8e5851a18ffb192c0a5e3f7ae1663" +dependencies = [ + "darling", + "ident_case", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "bumpalo" version = "3.16.0" @@ -44,6 +67,16 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +[[package]] +name = "caseless" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808dab3318747be122cb31d36de18d4d1c81277a76f8332a02b81a3d73463d7f" +dependencies = [ + "regex", + "unicode-normalization", +] + [[package]] name = "cc" version = "1.0.94" @@ -58,12 +91,12 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "comrak" -version = "0.24.1" -source = "git+https://github.com/philomena-dev/comrak?branch=main#6a03dabfc80033b24070dc5826c9225686e3a98a" +version = "0.29.0" +source = "git+https://github.com/philomena-dev/comrak?branch=philomena-0.29.0#0c6fb51a55dddfc1835ed2bedfe3bcb20fb9627e" dependencies = [ - "derive_builder", + "bon", + "caseless", "entities", - "http", "memchr", "once_cell", "regex", @@ -89,9 +122,9 @@ checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "darling" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" dependencies = [ "darling_core", "darling_macro", @@ -99,9 +132,9 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" dependencies = [ "fnv", "ident_case", @@ -113,9 +146,9 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.20.9" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", @@ -133,37 +166,6 @@ dependencies = [ "syn", ] -[[package]] -name = "derive_builder" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7" -dependencies = [ - "derive_builder_macro", -] - -[[package]] -name = "derive_builder_core" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "derive_builder_macro" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" -dependencies = [ - "derive_builder_core", - "syn", -] - [[package]] name = "deunicode" version = "1.4.4" @@ -463,6 +465,12 @@ dependencies = [ "unreachable", ] +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + [[package]] name = "simd-adler32" version = "0.3.7" diff --git a/native/philomena/Cargo.toml b/native/philomena/Cargo.toml index 9ef7caac..19d68342 100644 --- a/native/philomena/Cargo.toml +++ b/native/philomena/Cargo.toml @@ -11,7 +11,7 @@ crate-type = ["dylib"] [dependencies] base64 = "0.21" -comrak = { git = "https://github.com/philomena-dev/comrak", branch = "main", default-features = false } +comrak = { git = "https://github.com/philomena-dev/comrak", branch = "philomena-0.29.0", default-features = false } http = "0.2" jemallocator = { version = "0.5.0", features = ["disable_initial_exec_tls"] } regex = "1" diff --git a/native/philomena/src/domains.rs b/native/philomena/src/domains.rs index c90ddb6f..c5626c12 100644 --- a/native/philomena/src/domains.rs +++ b/native/philomena/src/domains.rs @@ -1,3 +1,5 @@ +use http::Uri; +use regex::Regex; use std::env; pub fn get() -> Option> { @@ -12,3 +14,21 @@ pub fn get() -> Option> { None } + +pub fn relativize(domains: &[String], url: &str) -> Option { + let uri = url.parse::().ok()?; + + if let Some(a) = uri.authority() { + if domains.contains(&a.host().to_string()) { + if let Ok(re) = Regex::new(&format!(r#"^http(s)?://({})"#, regex::escape(a.host()))) { + return Some(re.replace(url, "").into()); + } + } + } + + Some(url.into()) +} + +pub fn relativize_careful(domains: &[String], url: &str) -> String { + relativize(domains, url).unwrap_or_else(|| url.into()) +} diff --git a/native/philomena/src/lib.rs b/native/philomena/src/lib.rs index 6c8f96f7..ccca12a0 100644 --- a/native/philomena/src/lib.rs +++ b/native/philomena/src/lib.rs @@ -5,6 +5,8 @@ use std::collections::HashMap; mod camo; mod domains; mod markdown; +#[cfg(test)] +mod tests; mod zip; #[global_allocator] diff --git a/native/philomena/src/markdown.rs b/native/philomena/src/markdown.rs index a927a6e5..778deb95 100644 --- a/native/philomena/src/markdown.rs +++ b/native/philomena/src/markdown.rs @@ -1,37 +1,54 @@ use crate::{camo, domains}; use comrak::Options; use std::collections::HashMap; +use std::sync::Arc; -fn common_options() -> Options { +pub fn common_options() -> Options { let mut options = Options::default(); + + // Upstream options options.extension.autolink = true; options.extension.table = true; options.extension.description_lists = true; options.extension.superscript = true; options.extension.strikethrough = true; - options.extension.philomena = true; options.parse.smart = true; options.render.hardbreaks = true; options.render.github_pre_lang = true; + options.render.escape = true; - options.extension.camoifier = Some(|s| camo::image_url_careful(&s)); - options.extension.philomena_domains = domains::get(); + // Philomena options + options.extension.underline = true; + options.extension.spoiler = true; + options.extension.greentext = true; + options.extension.subscript = true; + options.extension.philomena = true; + options.render.ignore_empty_links = true; + options.render.ignore_setext = true; + + options.extension.image_url_rewriter = Some(Arc::new(|url: &str| camo::image_url_careful(url))); + + if let Some(domains) = domains::get() { + options.extension.link_url_rewriter = Some(Arc::new(move |url: &str| { + domains::relativize_careful(&domains, url) + })); + } options } pub fn to_html(input: &str, reps: HashMap) -> String { let mut options = common_options(); - options.render.escape = true; - options.extension.philomena_replacements = Some(reps); + options.extension.replacements = Some(reps); comrak::markdown_to_html(input, &options) } pub fn to_html_unsafe(input: &str, reps: HashMap) -> String { let mut options = common_options(); + options.render.escape = false; options.render.unsafe_ = true; - options.extension.philomena_replacements = Some(reps); + options.extension.replacements = Some(reps); comrak::markdown_to_html(input, &options) } diff --git a/native/philomena/src/tests.rs b/native/philomena/src/tests.rs new file mode 100644 index 00000000..9f1f963f --- /dev/null +++ b/native/philomena/src/tests.rs @@ -0,0 +1,257 @@ +use std::{collections::HashMap, sync::Arc}; + +use crate::{domains, markdown::*}; + +fn test_options() -> comrak::Options { + let mut options = common_options(); + options.extension.image_url_rewriter = None; + options.extension.link_url_rewriter = None; + options +} + +fn html(input: &str, expected: &str) { + html_opts_w(input, expected, &test_options()); +} + +fn html_opts_i(input: &str, expected: &str, opts: F) +where + F: Fn(&mut comrak::Options), +{ + let mut options = test_options(); + opts(&mut options); + + html_opts_w(input, expected, &options); +} + +fn html_opts_w(input: &str, expected: &str, options: &comrak::Options) { + let output = comrak::markdown_to_html(input, options); + + if output != expected { + println!("Input:"); + println!("========================"); + println!("{}", input); + println!("========================"); + println!("Expected:"); + println!("========================"); + println!("{}", expected); + println!("========================"); + println!("Output:"); + println!("========================"); + println!("{}", output); + println!("========================"); + } + assert_eq!(output, expected); +} + +#[test] +fn subscript() { + html("H%2%O\n", "
H2O
\n"); +} + +#[test] +fn spoiler() { + html( + "The ||dog dies at the end of Marley and Me||.\n", + "
The dog dies at the end of Marley and Me.
\n", + ); +} + +#[test] +fn spoiler_in_table() { + html( + "Text | Result\n--- | ---\n`||some clever text||` | ||some clever text||\n", + concat!( + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
TextResult
||some clever text||some clever text
\n" + ), + ); +} + +#[test] +fn spoiler_regressions() { + html( + "|should not be spoiler|\n||should be spoiler||\n|||should be spoiler surrounded by pipes|||", + concat!( + "
|should not be spoiler|
\n", + "should be spoiler
\n", + "|should be spoiler surrounded by pipes|
\n" + ), + ); +} + +#[test] +fn mismatched_spoilers() { + html( + "|||this is a spoiler with pipe in front||\n||this is not a spoiler|\n||this is a spoiler with pipe after|||", + concat!( + "
|this is a spoiler with pipe in front
\n", + "||this is not a spoiler|
\n", + "this is a spoiler with pipe after|
\n" + ), + ); +} + +#[test] +fn underline() { + html( + "__underlined__\n", + "
underlined
\n", + ); +} + +#[test] +fn no_setext_headings_in_philomena() { + html( + "text text\n---", + "
text text
\n
\n", + ); +} + +#[test] +fn greentext_preserved() { + html( + ">implying\n>>implying", + "
>implying
\n>>implying
\n", + ); +} + +#[test] +fn separate_quotes_on_line_end() { + html( + "> 1\n>\n> 2", + "
\n
1
\n
\n
>
\n
\n
2
\n
\n" + ); +} + +#[test] +fn unnest_quotes_on_line_end() { + html( + "> 1\n> > 2\n> 1", + "
\n
1
\n
\n
2
\n
\n
1
\n
\n", + ); +} + +#[test] +fn unnest_quotes_on_line_end_commonmark() { + html( + "> 1\n> > 2\n> \n> 1", + "
\n
1
\n
\n
2
\n
\n
1
\n
\n", + ); +} + +#[test] +fn philomena_images() { + html( + "![full](http://example.com/image.png)", + "
\"full\"
\n", + ); +} + +#[test] +fn no_empty_link() { + html_opts_i( + "[](https://example.com/evil.domain.for.seo.spam)", + "
[](https://example.com/evil.domain.for.seo.spam)
\n", + |opts| opts.extension.autolink = false, + ); + + html_opts_i( + "[ ](https://example.com/evil.domain.for.seo.spam)", + "
[ ](https://example.com/evil.domain.for.seo.spam)
\n", + |opts| opts.extension.autolink = false, + ); +} + +#[test] +fn empty_image_allowed() { + html( + "![ ](https://example.com/evil.domain.for.seo.spam)", + "
\"
\n", + ); +} + +#[test] +fn image_inside_link_allowed() { + html( + "[![](https://example.com/image.png)](https://example.com/)", + "
\"\"
\n", + ); +} + +#[test] +fn image_mention() { + html_opts_i( + "hello world >>1234p >>1337", + "
hello world
p
>>1337
\n", + |opts| { + let mut replacements = HashMap::new(); + replacements.insert("1234p".to_string(), "
p
".to_string()); + + opts.extension.replacements = Some(replacements); + }, + ); +} + +#[test] +fn image_mention_line_start() { + html_opts_i( + ">>1234p", + "
p
\n", + |opts| { + let mut replacements = HashMap::new(); + replacements.insert("1234p".to_string(), "
p
".to_string()); + + opts.extension.replacements = Some(replacements); + }, + ); +} + +#[test] +fn auto_relative_links() { + let domains: Vec = vec!["example.com".into()]; + let f = Arc::new(move |url: &str| domains::relativize_careful(&domains, url)); + + html_opts_i( + "[some link text](https://example.com/some/path)", + "\n", + |opts| { + opts.extension.link_url_rewriter = Some(f.clone()); + }, + ); + + html_opts_i( + "https://example.com/some/path", + "\n", + |opts| { + opts.extension.link_url_rewriter = Some(f.clone()); + }, + ); + + html_opts_i( + "[some link text](https://example.com/some/path?parameter=aaaaaa&other_parameter=bbbbbb#id12345)", + "\n", + |opts| { + opts.extension.link_url_rewriter = Some(f.clone()); + }, + ); + + html_opts_i( + "https://example.com/some/path?parameter=aaaaaa&other_parameter=bbbbbb#id12345", + "\n", + |opts| { + opts.extension.link_url_rewriter = Some(f.clone()); + }, + ); +}