From 75e760961041c3d3ef17d9ceffb69687b18d41f2 Mon Sep 17 00:00:00 2001 From: sgoudham Date: Sat, 30 Apr 2022 17:28:22 +0100 Subject: [PATCH] [v0.1.0] - Improve regex to capture newlines & add tests --- src/links.rs | 262 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 235 insertions(+), 27 deletions(-) diff --git a/src/links.rs b/src/links.rs index 7f1acfa..cfaff9a 100644 --- a/src/links.rs +++ b/src/links.rs @@ -1,5 +1,7 @@ +use std::collections::VecDeque; use std::fs; use std::path::{Path, PathBuf}; +use std::str::SplitN; use aho_corasick::AhoCorasick; use anyhow::Context; @@ -8,20 +10,21 @@ use lazy_static::lazy_static; use mdbook::errors::Result; const ESCAPE_CHAR: char = '\\'; +const LINE_BREAKS: &[char] = &['\n', '\r']; lazy_static! { - // r"(?x)\\\{\{\#.*\}\}|\{\{\s*\#(template)\s+([a-zA-Z0-9_.\/-]+)\s*([^}]+)\}\}") + // r"(?x)\\\{\{\#.*\}\}|\{\{\s*\#(template)\s+([a-zA-Z0-9_^'<>().:*+|\\\/?-]+)\s+([^}]+)\}\}") static ref WHOLE_TEMPLATE: Regex = Regex::new( - r"(?x) # insignificant whitespace mode - \\\{\{\#.*\}\} # match escaped link - | # or - \{\{\s* # link opening parens and whitespace - \#(template) # link type - template - \s+ # separating whitespace - ([a-zA-Z0-9_.\/-]+) # relative path to template file - \s+ # separating whitespace - ([^}]+) # get all template arguments - \}\} # link closing parens" + r"(?x) # insignificant whitespace mode + \\\{\{\#.*\}\} # match escaped link + | # or + \{\{\s* # link opening parens and whitespace + \#(template) # link type - template + \s+ # separating whitespace + ([a-zA-Z0-9_^'<>().:*+|\\\/?-]+) # relative path to template file + \s+ # separating whitespace + ([^}]+) # get all template arguments + \}\} # link closing parens" ) .unwrap(); // https://stackoverflow.com/questions/22871602/optimizing-regex-to-fine-key-value-pairs-space-delimited @@ -46,17 +49,35 @@ impl<'a> Link<'a> { let mut values: Vec = vec![]; let link_type = match (cap.get(0), cap.get(1), cap.get(2), cap.get(3)) { + (Some(mat), _, _, _) if mat.as_str().contains(LINE_BREAKS) => { + let mut args = mat + .as_str() + .lines() + .map(|line| { + let end_trimmed = line.trim_end_matches(LINE_BREAKS); + end_trimmed.trim_start_matches(LINE_BREAKS) + }) + .collect::>(); + + // Remove {{#template + args.pop_front(); + // Remove ending }} + args.pop_back(); + // Store relative path of template file + let file = args.pop_front().unwrap(); + + for arg in args { + let capture = arg.splitn(2, '='); + populate_key_values(&mut keys, &mut values, capture); + } + + Some(LinkType::Template(PathBuf::from(file.trim()))) + } (_, _, Some(file), Some(args)) => { let matches = ARGS.captures_iter(args.as_str()); for mat in matches { let capture = mat.unwrap().get(0).unwrap().as_str().splitn(2, '='); - for (i, capt) in capture.enumerate() { - if i % 2 == 0 { - keys.push(format!("{{{}}}", capt)); - } else { - values.push(capt.to_string()); - } - } + populate_key_values(&mut keys, &mut values, capture); } Some(LinkType::Template(PathBuf::from(file.as_str()))) } @@ -86,7 +107,7 @@ impl<'a> Link<'a> { fs::read_to_string(&target) .with_context(|| { format!( - "Could not read file for link {} ({})", + "Could not read template file {} ({})", self.link_text, target.display(), ) @@ -140,6 +161,20 @@ pub(crate) fn extract_template_links(contents: &str) -> LinkIter<'_> { LinkIter(WHOLE_TEMPLATE.captures_iter(contents)) } +fn populate_key_values<'a>( + keys: &mut Vec, + values: &mut Vec, + split_str: SplitN<'a, char>, +) { + for (i, capt) in split_str.enumerate() { + if i % 2 == 0 { + keys.push(format!("{{{}}}", capt.trim())); + } else { + values.push(capt.to_string()); + } + } +} + #[cfg(test)] mod link_tests { use std::path::PathBuf; @@ -164,20 +199,44 @@ mod link_tests { #[test] fn test_extract_zero_template_links() { - let string = "This is some text without any template links"; - assert_eq!(extract_template_links(string).collect::>(), vec![]) + let s = "This is some text without any template links"; + assert_eq!(extract_template_links(s).collect::>(), vec![]) } #[test] fn test_extract_zero_template_links_without_args() { - let string = "{{#template templates/footer.md}}"; - assert_eq!(extract_template_links(string).collect::>(), vec![]) + let s = "{{#template templates/footer.md}}"; + assert_eq!(extract_template_links(s).collect::>(), vec![]) } #[test] - fn test_extract_template_links_simple_link() { + fn test_extract_template_links_partial_match() { + let s = "Some random text with {{#template..."; + assert_eq!(extract_template_links(s).collect::>(), vec![]); + let s = "Some random text with {{#template footer.md..."; + assert_eq!(extract_template_links(s).collect::>(), vec![]); + let s = "Some random text with {{#template footer.md path=../images..."; + assert_eq!(extract_template_links(s).collect::>(), vec![]); + let s = "Some random text with \\{{#template..."; + assert_eq!(extract_template_links(s).collect::>(), vec![]); + } + + #[test] + fn test_extract_template_links_empty() { + let s = "Some random text with {{#template}} and {{#template }} {{}} {{#}}..."; + assert_eq!(extract_template_links(s).collect::>(), vec![]); + } + + #[test] + fn test_extract_template_links_unknown() { + let s = "Some random text with {{#templatee file.rs}} and {{#include}} {{#playground}} {{#tempate}}..."; + assert!(extract_template_links(s).collect::>() == vec![]); + } + + #[test] + fn test_extract_template_links_simple() { let s = - "Some random text with {{#template file.rs}} and {{#template test.rs test=nice}}..."; + "Some random text with {{#template file.rs}} and {{#template test.rs lang=rust}}..."; let res = extract_template_links(s).collect::>(); @@ -187,8 +246,157 @@ mod link_tests { start_index: 48, end_index: 79, link_type: LinkType::Template(PathBuf::from("test.rs")), - link_text: "{{#template test.rs test=nice}}", - args: VecPair(vec!["{test}".to_string()], vec!["nice".to_string()]) + link_text: "{{#template test.rs lang=rust}}", + args: VecPair(vec!["{lang}".to_string()], vec!["rust".to_string()]) + },] + ); + } + + #[test] + fn test_extract_template_links_simple_with_equals_sign() { + let s = "Some random text with{{#template test.rs lang=rust math=2+2=4}}..."; + + let res = extract_template_links(s).collect::>(); + + assert_eq!( + res, + vec![Link { + start_index: 21, + end_index: 63, + link_type: LinkType::Template(PathBuf::from("test.rs")), + link_text: "{{#template test.rs lang=rust math=2+2=4}}", + args: VecPair( + vec!["{lang}".to_string(), "{math}".to_string()], + vec!["rust".to_string(), "2+2=4".to_string()], + ) + },] + ); + } + + #[test] + fn test_extract_template_links_simple_with_whitespace() { + let s = "Some random text with {{#template test.rs lang=rust authors=Goudham & Hazel}}..."; + + let res = extract_template_links(s).collect::>(); + + assert_eq!( + res, + vec![Link { + start_index: 22, + end_index: 77, + link_type: LinkType::Template(PathBuf::from("test.rs")), + link_text: "{{#template test.rs lang=rust authors=Goudham & Hazel}}", + args: VecPair( + vec!["{lang}".to_string(), "{authors}".to_string()], + vec!["rust".to_string(), "Goudham & Hazel".to_string()] + ) + },] + ); + } + + #[test] + fn test_extract_template_links_simple_with_tabs() { + let s = "Some random text with {{#template test.rs lang=rust authors=Goudham & Hazel}}..."; + + let res = extract_template_links(s).collect::>(); + + assert_eq!( + res, + vec![Link { + start_index: 22, + end_index: 87, + link_type: LinkType::Template(PathBuf::from("test.rs")), + link_text: "{{#template test.rs lang=rust authors=Goudham & Hazel}}", + args: VecPair( + vec!["{lang}".to_string(), "{authors}".to_string()], + vec!["rust".to_string(), "Goudham & Hazel".to_string()] + ) + },] + ); + } + + #[test] + fn test_extract_template_links_with_special_characters() { + let s = "Some random text with {{#template foo-bar\\-baz/_c++.rs path=images}}..."; + + let res = extract_template_links(s).collect::>(); + + assert_eq!( + res, + vec![Link { + start_index: 22, + end_index: 68, + link_type: LinkType::Template(PathBuf::from("foo-bar\\-baz/_c++.rs")), + link_text: "{{#template foo-bar\\-baz/_c++.rs path=images}}", + args: VecPair(vec!["{path}".to_string()], vec!["images".to_string()]) + },] + ); + } + + #[test] + fn test_extract_template_links_newlines() { + let s = "{{#template + test.rs + lang=rust + authors=Goudham & Hazel + year=2022 + }}"; + + let res = extract_template_links(s).collect::>(); + + assert_eq!( + res, + vec![Link { + start_index: 0, + end_index: 122, + link_type: LinkType::Template(PathBuf::from("test.rs")), + link_text: "{{#template\n test.rs\n lang=rust\n authors=Goudham & Hazel\n year=2022\n }}", + args: VecPair( + vec![ + "{lang}".to_string(), + "{authors}".to_string(), + "{year}".to_string() + ], + vec![ + "rust".to_string(), + "Goudham & Hazel".to_string(), + "2022".to_string() + ] + ) + },] + ); + } + + #[test] + fn test_extract_template_links_with_newlines_tabs() { + let s = "{{#template + test.rs +lang=rust + authors=Goudham & Hazel +year=2022 +}}"; + + let res = extract_template_links(s).collect::>(); + + assert_eq!( + res, + vec![Link { + start_index: 0, + end_index: 78, + link_type: LinkType::Template(PathBuf::from("test.rs")), + link_text: "{{#template\n test.rs\nlang=rust\n authors=Goudham & Hazel\nyear=2022\n}}", + args: VecPair( + vec![ + "{lang}".to_string(), + "{authors}".to_string(), + "{year}".to_string() + ], + vec![ + "rust".to_string(), + "Goudham & Hazel".to_string(), + "2022".to_string() + ] + ) },] ); }