From 7088cc2c6e8324eaf64e7fe3b7896ea4a4022936 Mon Sep 17 00:00:00 2001 From: sgoudham Date: Mon, 2 May 2022 02:24:27 +0100 Subject: [PATCH 1/6] [TEM #1] - Add ARGS regex and use HashMap instead of (Vec, Vec) --- src/links.rs | 206 ++++++++++++++++++++++++++++----------------------- 1 file changed, 112 insertions(+), 94 deletions(-) diff --git a/src/links.rs b/src/links.rs index cfaff9a..347ed5b 100644 --- a/src/links.rs +++ b/src/links.rs @@ -1,7 +1,6 @@ -use std::collections::VecDeque; +use std::collections::{HashMap, VecDeque}; use std::fs; use std::path::{Path, PathBuf}; -use std::str::SplitN; use aho_corasick::AhoCorasick; use anyhow::Context; @@ -13,43 +12,81 @@ const ESCAPE_CHAR: char = '\\'; const LINE_BREAKS: &[char] = &['\n', '\r']; lazy_static! { - // r"(?x)\\\{\{\#.*\}\}|\{\{\s*\#(template)\s+([a-zA-Z0-9_^'<>().:*+|\\\/?-]+)\s+([^}]+)\}\}") - static ref WHOLE_TEMPLATE: Regex = Regex::new( - r"(?x) # insignificant whitespace mode - \\\{\{\#.*\}\} # match escaped link - | # or - \{\{\s* # link opening parens and whitespace - \#(template) # link type - template - \s+ # separating whitespace - ([a-zA-Z0-9_^'<>().:*+|\\\/?-]+) # relative path to template file - \s+ # separating whitespace - ([^}]+) # get all template arguments - \}\} # link closing parens" - ) - .unwrap(); // https://stackoverflow.com/questions/22871602/optimizing-regex-to-fine-key-value-pairs-space-delimited - static ref ARGS: Regex = Regex::new(r"(?<=\s|\A)([^\s=]+)=(.*?)(?=(?:\s[^\s=]+=|$))").unwrap(); + static ref TEMPLATE_ARGS: Regex = Regex::new(r"(?<=\s|\A)([^\s=]+)=(.*?)(?=(?:\s[^\s=]+=|$))").unwrap(); + + // r"(?x)\\\{\{\#.*\}\}|\{\{\s*\#(template)\s+([a-zA-Z0-9_^'<>().:*+|\\\/?-]+)\s+([^}]+)\}\}" + static ref TEMPLATE: Regex = Regex::new( + r"(?x) # enable insignificant whitespace mode + + \\\{\{ # escaped link opening parens + \#.* # match any character + \}\} # escaped link closing parens + + | # or + + \{\{\s* # link opening parens and whitespace(s) + \#(template) # link type - template + \s+ # separating whitespace + ([\w'<>.:^\-\(\)\*\+\|\\\/\?]+) # relative path to template file + \s+ # separating whitespace(s) + ([^}]+) # get all template arguments + \}\} # link closing parens" + ) + .unwrap(); + + // r"(?x)\\\{\{\#.*\}\}|\{\{\s*\#([\w'<>.:^\-\(\)\*\+\|\\\/\?]+)\s*\}\}|\{\{\s*\#([\w'<>.:^\-\(\)\*\+\|\\\/\?]+)\s+([^}]+)\}\}" + static ref ARGS: Regex = Regex::new( + r"(?x) # enable insignificant whitespace mode + + \\\{\{ # escaped link opening parens + \#.* # match any character + \}\} # escaped link closing parens + + | # or + + \{\{\s* # link opening parens and whitespace(s) + \#([\w'<>.:^\-\(\)\*\+\|\\\/\?]+) # arg name + \s* # optional separating whitespace(s) + \}\} # link closing parens + + | # or + + \{\{\s* # link opening parens and whitespace + \#([\w'<>.:^\-\(\)\*\+\|\\\/\?]+) # arg name + \s+ # separating whitespace(s) + ([^}]+) # get default value for argument + \}\} # link closing parens" + ) + .unwrap(); } -#[derive(PartialEq, Debug, Clone)] -struct VecPair(Vec, Vec); - #[derive(PartialEq, Debug, Clone)] pub(crate) struct Link<'a> { pub(crate) start_index: usize, pub(crate) end_index: usize, pub(crate) link_type: LinkType, pub(crate) link_text: &'a str, - args: VecPair, + args: HashMap, } impl<'a> Link<'a> { fn from_capture(cap: Captures<'a>) -> Option> { - let mut keys: Vec = vec![]; - let mut values: Vec = vec![]; + let mut all_args = HashMap::with_capacity(20); let link_type = match (cap.get(0), cap.get(1), cap.get(2), cap.get(3)) { (Some(mat), _, _, _) if mat.as_str().contains(LINE_BREAKS) => { + /* + Given a template string that looks like: + {{#template + footer.md + path=../images + author=Hazel + }} + + The resulting args: will look like: + ["{{#template", "footer.md", "path=../images", "author=Hazel", "}}"] + */ let mut args = mat .as_str() .lines() @@ -57,7 +94,7 @@ impl<'a> Link<'a> { let end_trimmed = line.trim_end_matches(LINE_BREAKS); end_trimmed.trim_start_matches(LINE_BREAKS) }) - .collect::>(); + .collect::>(); // Remove {{#template args.pop_front(); @@ -66,19 +103,33 @@ impl<'a> Link<'a> { // Store relative path of template file let file = args.pop_front().unwrap(); - for arg in args { - let capture = arg.splitn(2, '='); - populate_key_values(&mut keys, &mut values, capture); - } + let split_args = args + .into_iter() + .map(|arg| { + let mut split_n = arg.splitn(2, '='); + let key = format!("{{{}}}", split_n.next().unwrap().trim()); + let value = split_n.next().unwrap(); + (key, value) + }) + .collect::>(); + all_args.extend(split_args); Some(LinkType::Template(PathBuf::from(file.trim()))) } (_, _, Some(file), Some(args)) => { - let matches = ARGS.captures_iter(args.as_str()); - for mat in matches { - let capture = mat.unwrap().get(0).unwrap().as_str().splitn(2, '='); - populate_key_values(&mut keys, &mut values, capture); - } + let matches = TEMPLATE_ARGS.captures_iter(args.as_str()); + + let split_args = matches + .into_iter() + .map(|mat| { + let mut split_n = mat.unwrap().get(0).unwrap().as_str().splitn(2, '='); + let key = format!("{{{}}}", split_n.next().unwrap().trim()); + let value = split_n.next().unwrap(); + (key, value) + }) + .collect::>(); + all_args.extend(split_args); + Some(LinkType::Template(PathBuf::from(file.as_str()))) } (Some(mat), _, _, _) if mat.as_str().starts_with(ESCAPE_CHAR) => { @@ -93,7 +144,7 @@ impl<'a> Link<'a> { end_index: mat.end(), link_type: lnk_type, link_text: mat.as_str(), - args: VecPair(keys, values), + args: all_args, }) }) } @@ -114,8 +165,10 @@ impl<'a> Link<'a> { }) .map(|hay| { let pair = &self.args; - let ac = AhoCorasick::new_auto_configured(pair.0.as_slice()); - ac.replace_all(hay.as_str(), pair.1.as_slice()) + let ac = AhoCorasick::new_auto_configured( + pair.keys().collect::>().as_slice(), + ); + ac.replace_all(hay.as_str(), pair.values().collect::>().as_slice()) }) } } @@ -158,28 +211,15 @@ impl<'a> Iterator for LinkIter<'a> { } pub(crate) fn extract_template_links(contents: &str) -> LinkIter<'_> { - LinkIter(WHOLE_TEMPLATE.captures_iter(contents)) -} - -fn populate_key_values<'a>( - keys: &mut Vec, - values: &mut Vec, - split_str: SplitN<'a, char>, -) { - for (i, capt) in split_str.enumerate() { - if i % 2 == 0 { - keys.push(format!("{{{}}}", capt.trim())); - } else { - values.push(capt.to_string()); - } - } + LinkIter(TEMPLATE.captures_iter(contents)) } #[cfg(test)] mod link_tests { + use std::collections::HashMap; use std::path::PathBuf; - use crate::links::{extract_template_links, Link, LinkType, VecPair}; + use crate::links::{extract_template_links, Link, LinkType}; use crate::replace; #[test] @@ -247,7 +287,7 @@ mod link_tests { end_index: 79, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template test.rs lang=rust}}", - args: VecPair(vec!["{lang}".to_string()], vec!["rust".to_string()]) + args: HashMap::from([("{lang}".to_string(), "rust")]) },] ); } @@ -265,10 +305,10 @@ mod link_tests { end_index: 63, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template test.rs lang=rust math=2+2=4}}", - args: VecPair( - vec!["{lang}".to_string(), "{math}".to_string()], - vec!["rust".to_string(), "2+2=4".to_string()], - ) + args: HashMap::from([ + ("{lang}".to_string(), "rust"), + ("{math}".to_string(), "2+2=4") + ]), },] ); } @@ -286,10 +326,10 @@ mod link_tests { end_index: 77, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template test.rs lang=rust authors=Goudham & Hazel}}", - args: VecPair( - vec!["{lang}".to_string(), "{authors}".to_string()], - vec!["rust".to_string(), "Goudham & Hazel".to_string()] - ) + args: HashMap::from([ + ("{lang}".to_string(), "rust"), + ("{authors}".to_string(), "Goudham & Hazel") + ]), },] ); } @@ -307,17 +347,17 @@ mod link_tests { end_index: 87, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template test.rs lang=rust authors=Goudham & Hazel}}", - args: VecPair( - vec!["{lang}".to_string(), "{authors}".to_string()], - vec!["rust".to_string(), "Goudham & Hazel".to_string()] - ) + args: HashMap::from([ + ("{lang}".to_string(), "rust"), + ("{authors}".to_string(), "Goudham & Hazel") + ]), },] ); } #[test] fn test_extract_template_links_with_special_characters() { - let s = "Some random text with {{#template foo-bar\\-baz/_c++.rs path=images}}..."; + let s = "Some random text with {{#template foo-bar\\-baz/_c++.'.rs path=images}}..."; let res = extract_template_links(s).collect::>(); @@ -325,10 +365,10 @@ mod link_tests { res, vec![Link { start_index: 22, - end_index: 68, - link_type: LinkType::Template(PathBuf::from("foo-bar\\-baz/_c++.rs")), - link_text: "{{#template foo-bar\\-baz/_c++.rs path=images}}", - args: VecPair(vec!["{path}".to_string()], vec!["images".to_string()]) + end_index: 70, + link_type: LinkType::Template(PathBuf::from("foo-bar\\-baz/_c++.'.rs")), + link_text: "{{#template foo-bar\\-baz/_c++.'.rs path=images}}", + args: HashMap::from([("{path}".to_string(), "images")]), },] ); } @@ -351,18 +391,7 @@ mod link_tests { end_index: 122, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template\n test.rs\n lang=rust\n authors=Goudham & Hazel\n year=2022\n }}", - args: VecPair( - vec![ - "{lang}".to_string(), - "{authors}".to_string(), - "{year}".to_string() - ], - vec![ - "rust".to_string(), - "Goudham & Hazel".to_string(), - "2022".to_string() - ] - ) + args: HashMap::from([("{lang}".to_string(), "rust"), ("{authors}".to_string(), "Goudham & Hazel"), ("{year}".to_string(), "2022")]), },] ); } @@ -385,18 +414,7 @@ year=2022 end_index: 78, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template\n test.rs\nlang=rust\n authors=Goudham & Hazel\nyear=2022\n}}", - args: VecPair( - vec![ - "{lang}".to_string(), - "{authors}".to_string(), - "{year}".to_string() - ], - vec![ - "rust".to_string(), - "Goudham & Hazel".to_string(), - "2022".to_string() - ] - ) + args: HashMap::from([("{lang}".to_string(), "rust"), ("{authors}".to_string(), "Goudham & Hazel"), ("{year}".to_string(), "2022")]), },] ); } From 00879a22d10d77f18c7b7f6562d930e0ec681a07 Mon Sep 17 00:00:00 2001 From: sgoudham Date: Mon, 2 May 2022 02:31:39 +0100 Subject: [PATCH 2/6] [TEM #1] - Refactor method 'replace' -> 'replace_template' --- src/lib.rs | 11 ++++++++--- src/links.rs | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8978361..e6ced57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,7 +36,7 @@ impl Preprocessor for Template { .map(|dir| src_dir.join(dir)) .expect("All book items have a parent"); - let content = replace(&chapter.content, base, source, 0); + let content = replace_template(&chapter.content, base, source, 0); chapter.content = content; } } @@ -50,7 +50,7 @@ impl Preprocessor for Template { } } -fn replace(chapter_content: &str, base: P1, source: P2, depth: usize) -> String +fn replace_template(chapter_content: &str, base: P1, source: P2, depth: usize) -> String where P1: AsRef, P2: AsRef, @@ -68,7 +68,12 @@ where Ok(new_content) => { if depth < MAX_LINK_NESTED_DEPTH { if let Some(rel_path) = link.link_type.relative_path(path) { - replaced.push_str(&replace(&new_content, rel_path, source, depth + 1)); + replaced.push_str(&replace_template( + &new_content, + rel_path, + source, + depth + 1, + )); } else { replaced.push_str(&new_content); } diff --git a/src/links.rs b/src/links.rs index 347ed5b..cf8ea1a 100644 --- a/src/links.rs +++ b/src/links.rs @@ -220,7 +220,7 @@ mod link_tests { use std::path::PathBuf; use crate::links::{extract_template_links, Link, LinkType}; - use crate::replace; + use crate::replace_template; #[test] fn test_escaped_template_link() { @@ -234,7 +234,7 @@ mod link_tests { ```hbs {{#template template.md}} << an escaped link! ```"; - assert_eq!(replace(start, "", "", 0), end); + assert_eq!(replace_template(start, "", "", 0), end); } #[test] From d405808a9ba4f276a99d5e0e0ac2cb81a782b14e Mon Sep 17 00:00:00 2001 From: sgoudham Date: Mon, 2 May 2022 03:11:35 +0100 Subject: [PATCH 3/6] [TEM #1] - Add TemplateArgsIter --- src/links.rs | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/src/links.rs b/src/links.rs index cf8ea1a..72599f2 100644 --- a/src/links.rs +++ b/src/links.rs @@ -91,8 +91,8 @@ impl<'a> Link<'a> { .as_str() .lines() .map(|line| { - let end_trimmed = line.trim_end_matches(LINE_BREAKS); - end_trimmed.trim_start_matches(LINE_BREAKS) + line.trim_end_matches(LINE_BREAKS) + .trim_start_matches(LINE_BREAKS) }) .collect::>(); @@ -117,19 +117,7 @@ impl<'a> Link<'a> { Some(LinkType::Template(PathBuf::from(file.trim()))) } (_, _, Some(file), Some(args)) => { - let matches = TEMPLATE_ARGS.captures_iter(args.as_str()); - - let split_args = matches - .into_iter() - .map(|mat| { - let mut split_n = mat.unwrap().get(0).unwrap().as_str().splitn(2, '='); - let key = format!("{{{}}}", split_n.next().unwrap().trim()); - let value = split_n.next().unwrap(); - (key, value) - }) - .collect::>(); - all_args.extend(split_args); - + all_args.extend(extract_template_args(args.as_str()).collect::>()); Some(LinkType::Template(PathBuf::from(file.as_str()))) } (Some(mat), _, _, _) if mat.as_str().starts_with(ESCAPE_CHAR) => { @@ -200,7 +188,8 @@ pub(crate) struct LinkIter<'a>(CaptureMatches<'a, 'a>); impl<'a> Iterator for LinkIter<'a> { type Item = Link<'a>; - fn next(&mut self) -> Option> { + + fn next(&mut self) -> Option { for cap in &mut self.0 { if let Some(inc) = Link::from_capture(cap.unwrap()) { return Some(inc); @@ -214,6 +203,26 @@ pub(crate) fn extract_template_links(contents: &str) -> LinkIter<'_> { LinkIter(TEMPLATE.captures_iter(contents)) } +struct TemplateArgsIter<'a>(CaptureMatches<'a, 'a>); + +impl<'a> Iterator for TemplateArgsIter<'a> { + type Item = (String, &'a str); + + fn next(&mut self) -> Option { + for mat in &mut self.0 { + let mut split_capt = mat.unwrap().get(0).unwrap().as_str().splitn(2, '='); + let key = format!("{{{}}}", split_capt.next().unwrap().trim()); + let value = split_capt.next().unwrap(); + return Some((key, value)); + } + None + } +} + +fn extract_template_args(contents: &str) -> TemplateArgsIter<'_> { + TemplateArgsIter(TEMPLATE_ARGS.captures_iter(contents)) +} + #[cfg(test)] mod link_tests { use std::collections::HashMap; From 9c32fabb50a8ad9d2f2d2f9d08f039131302b683 Mon Sep 17 00:00:00 2001 From: sgoudham Date: Mon, 2 May 2022 03:12:08 +0100 Subject: [PATCH 4/6] [TEM #1] - Rename method 'substitute_args_in_template' -> 'replace_args' --- src/lib.rs | 2 +- src/links.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e6ced57..c2b7e2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,7 +64,7 @@ where for link in links::extract_template_links(chapter_content) { replaced.push_str(&chapter_content[previous_end_index..link.start_index]); - match link.substitute_args_in_template(&path) { + match link.replace_args(&path) { Ok(new_content) => { if depth < MAX_LINK_NESTED_DEPTH { if let Some(rel_path) = link.link_type.relative_path(path) { diff --git a/src/links.rs b/src/links.rs index 72599f2..85e7eae 100644 --- a/src/links.rs +++ b/src/links.rs @@ -137,7 +137,7 @@ impl<'a> Link<'a> { }) } - pub(crate) fn substitute_args_in_template>(&self, base: P) -> Result { + pub(crate) fn replace_args>(&self, base: P) -> Result { match self.link_type { LinkType::Escaped => Ok((&self.link_text[1..]).to_owned()), LinkType::Template(ref pat) => { From 913d8edd7a5dbc31a60d311555a1f33f738e6d9f Mon Sep 17 00:00:00 2001 From: sgoudham Date: Mon, 2 May 2022 08:06:42 +0100 Subject: [PATCH 5/6] [TEM #1] - Stop using Ahacorasick and now replace arguments manually --- src/links.rs | 286 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 237 insertions(+), 49 deletions(-) diff --git a/src/links.rs b/src/links.rs index 85e7eae..a0f9346 100644 --- a/src/links.rs +++ b/src/links.rs @@ -3,8 +3,8 @@ use std::fs; use std::path::{Path, PathBuf}; use aho_corasick::AhoCorasick; -use anyhow::Context; -use fancy_regex::{CaptureMatches, Captures, Regex}; +use anyhow::{Context, Error}; +use fancy_regex::{CaptureMatches, Captures, Match, Regex}; use lazy_static::lazy_static; use mdbook::errors::Result; @@ -48,14 +48,6 @@ lazy_static! { \{\{\s* # link opening parens and whitespace(s) \#([\w'<>.:^\-\(\)\*\+\|\\\/\?]+) # arg name \s* # optional separating whitespace(s) - \}\} # link closing parens - - | # or - - \{\{\s* # link opening parens and whitespace - \#([\w'<>.:^\-\(\)\*\+\|\\\/\?]+) # arg name - \s+ # separating whitespace(s) - ([^}]+) # get default value for argument \}\} # link closing parens" ) .unwrap(); @@ -67,7 +59,7 @@ pub(crate) struct Link<'a> { pub(crate) end_index: usize, pub(crate) link_type: LinkType, pub(crate) link_text: &'a str, - args: HashMap, + args: HashMap<&'a str, &'a str>, } impl<'a> Link<'a> { @@ -107,7 +99,7 @@ impl<'a> Link<'a> { .into_iter() .map(|arg| { let mut split_n = arg.splitn(2, '='); - let key = format!("{{{}}}", split_n.next().unwrap().trim()); + let key = split_n.next().unwrap().trim(); let value = split_n.next().unwrap(); (key, value) }) @@ -143,21 +135,15 @@ impl<'a> Link<'a> { LinkType::Template(ref pat) => { let target = base.as_ref().join(pat); - fs::read_to_string(&target) - .with_context(|| { - format!( - "Could not read template file {} ({})", - self.link_text, - target.display(), - ) - }) - .map(|hay| { - let pair = &self.args; - let ac = AhoCorasick::new_auto_configured( - pair.keys().collect::>().as_slice(), - ); - ac.replace_all(hay.as_str(), pair.values().collect::>().as_slice()) - }) + let contents = fs::read_to_string(&target).with_context(|| { + format!( + "Could not read template file {} ({})", + self.link_text, + target.display(), + ) + })?; + + Ok(Args::replace(contents.as_str(), &self.args)) } } } @@ -206,13 +192,13 @@ pub(crate) fn extract_template_links(contents: &str) -> LinkIter<'_> { struct TemplateArgsIter<'a>(CaptureMatches<'a, 'a>); impl<'a> Iterator for TemplateArgsIter<'a> { - type Item = (String, &'a str); + type Item = (&'a str, &'a str); fn next(&mut self) -> Option { - for mat in &mut self.0 { - let mut split_capt = mat.unwrap().get(0).unwrap().as_str().splitn(2, '='); - let key = format!("{{{}}}", split_capt.next().unwrap().trim()); - let value = split_capt.next().unwrap(); + for cap in &mut self.0 { + let mut split_args = cap.unwrap().get(0).unwrap().as_str().splitn(2, '='); + let key = split_args.next().unwrap().trim(); + let value = split_args.next().unwrap(); return Some((key, value)); } None @@ -223,12 +209,106 @@ fn extract_template_args(contents: &str) -> TemplateArgsIter<'_> { TemplateArgsIter(TEMPLATE_ARGS.captures_iter(contents)) } +#[derive(PartialEq, Debug, Clone)] +struct Args<'a> { + start_index: usize, + end_index: usize, + args_type: ArgsType<'a>, + args_text: &'a str, +} + +impl<'a> Args<'a> { + fn replace(contents: &str, all_args: &HashMap<&str, &str>) -> String { + // Must keep track of indices as they will not correspond after string substitution + let mut previous_end_index = 0; + let mut replaced = String::with_capacity(contents.len()); + + for captured_arg in extract_args(contents) { + replaced.push_str(&contents[previous_end_index..captured_arg.start_index]); + + match captured_arg.args_type { + ArgsType::Escaped => replaced.push_str(&captured_arg.args_text[1..]), + ArgsType::Plain(argument) => match all_args.get(argument) { + None => {} + Some(value) => replaced.push_str(value), + }, + ArgsType::Default(argument, default_value) => { + // [TEM #2] + // check if captured_arg exists within hashmap + // if so, replace arg with corresponding value and push to replaced string + // if not, replace arg with default value and push to replaced string + } + } + + previous_end_index = captured_arg.end_index; + } + + replaced.push_str(&contents[previous_end_index..]); + replaced + } + + fn from_capture(cap: Captures<'a>) -> Option> { + let arg_type = match (cap.get(0), cap.get(1), cap.get(2)) { + (_, Some(argument), None) => { + println!("Argument -> {:?}", argument); + Some(ArgsType::Plain(argument.as_str())) + } + (_, Some(argument), Some(default_value)) => { + println!("Argument -> {:?}", argument); + println!("Default Value -> {:?}", default_value); + Some(ArgsType::Default(argument.as_str(), default_value.as_str())) + } + (Some(mat), _, _) if mat.as_str().starts_with(ESCAPE_CHAR) => { + println!("Escaped -> {}", mat.as_str()); + Some(ArgsType::Escaped) + } + _ => None, + }; + + arg_type.and_then(|arg_type| { + cap.get(0).map(|capt| Args { + start_index: capt.start(), + end_index: capt.end(), + args_type: arg_type, + args_text: capt.as_str(), + }) + }) + } +} + +#[derive(PartialEq, Debug, Clone)] +enum ArgsType<'a> { + Escaped, + Plain(&'a str), + Default(&'a str, &'a str), +} + +struct ArgsIter<'a>(CaptureMatches<'a, 'a>); + +impl<'a> Iterator for ArgsIter<'a> { + type Item = Args<'a>; + + fn next(&mut self) -> Option { + for cap in &mut self.0 { + if let Some(inc) = Args::from_capture(cap.unwrap()) { + return Some(inc); + } + } + None + } +} + +fn extract_args(contents: &str) -> ArgsIter<'_> { + ArgsIter(ARGS.captures_iter(contents)) +} + #[cfg(test)] mod link_tests { + use std::any::Any; use std::collections::HashMap; use std::path::PathBuf; - use crate::links::{extract_template_links, Link, LinkType}; + use crate::links::{extract_args, extract_template_links, Args, ArgsType, Link, LinkType}; use crate::replace_template; #[test] @@ -296,7 +376,7 @@ mod link_tests { end_index: 79, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template test.rs lang=rust}}", - args: HashMap::from([("{lang}".to_string(), "rust")]) + args: HashMap::from([("lang", "rust")]) },] ); } @@ -314,10 +394,7 @@ mod link_tests { end_index: 63, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template test.rs lang=rust math=2+2=4}}", - args: HashMap::from([ - ("{lang}".to_string(), "rust"), - ("{math}".to_string(), "2+2=4") - ]), + args: HashMap::from([("lang", "rust"), ("math", "2+2=4")]), },] ); } @@ -335,10 +412,7 @@ mod link_tests { end_index: 77, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template test.rs lang=rust authors=Goudham & Hazel}}", - args: HashMap::from([ - ("{lang}".to_string(), "rust"), - ("{authors}".to_string(), "Goudham & Hazel") - ]), + args: HashMap::from([("lang", "rust"), ("authors", "Goudham & Hazel")]), },] ); } @@ -356,10 +430,7 @@ mod link_tests { end_index: 87, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template test.rs lang=rust authors=Goudham & Hazel}}", - args: HashMap::from([ - ("{lang}".to_string(), "rust"), - ("{authors}".to_string(), "Goudham & Hazel") - ]), + args: HashMap::from([("lang", "rust"), ("authors", "Goudham & Hazel")]), },] ); } @@ -377,7 +448,7 @@ mod link_tests { end_index: 70, link_type: LinkType::Template(PathBuf::from("foo-bar\\-baz/_c++.'.rs")), link_text: "{{#template foo-bar\\-baz/_c++.'.rs path=images}}", - args: HashMap::from([("{path}".to_string(), "images")]), + args: HashMap::from([("path", "images")]), },] ); } @@ -400,7 +471,7 @@ mod link_tests { end_index: 122, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template\n test.rs\n lang=rust\n authors=Goudham & Hazel\n year=2022\n }}", - args: HashMap::from([("{lang}".to_string(), "rust"), ("{authors}".to_string(), "Goudham & Hazel"), ("{year}".to_string(), "2022")]), + args: HashMap::from([("lang", "rust"), ("authors", "Goudham & Hazel"), ("year", "2022")]), },] ); } @@ -423,8 +494,125 @@ year=2022 end_index: 78, link_type: LinkType::Template(PathBuf::from("test.rs")), link_text: "{{#template\n test.rs\nlang=rust\n authors=Goudham & Hazel\nyear=2022\n}}", - args: HashMap::from([("{lang}".to_string(), "rust"), ("{authors}".to_string(), "Goudham & Hazel"), ("{year}".to_string(), "2022")]), + args: HashMap::from([("lang", "rust"), ("authors", "Goudham & Hazel"), ("year", "2022")]), },] ); } + + #[test] + fn test_extract_zero_args() { + let s = "This is some text without any template links"; + assert_eq!(extract_args(s).collect::>(), vec![]) + } + + #[test] + fn test_extract_args_partial_match() { + let s = "Some random text with {{#height..."; + assert_eq!(extract_args(s).collect::>(), vec![]); + let s = "Some random text with {{#image ferris.png..."; + assert_eq!(extract_args(s).collect::>(), vec![]); + let s = "Some random text with {{#width 550..."; + assert_eq!(extract_args(s).collect::>(), vec![]); + let s = "Some random text with \\{{#title..."; + assert_eq!(extract_args(s).collect::>(), vec![]); + } + + #[test] + fn test_extract_args_empty() { + let s = "Some random text with {{}} {{#}}..."; + assert_eq!(extract_args(s).collect::>(), vec![]); + } + + #[test] + fn test_extract_args_simple() { + let s = "This is some random text with {{#path}} and then some more random text"; + + let res = extract_args(s).collect::>(); + + assert_eq!( + res, + vec![Args { + start_index: 30, + end_index: 39, + args_type: ArgsType::Plain("path"), + args_text: "{{#path}}" + }] + ); + } + + #[test] + fn test_extract_args_escaped() { + let start = r" + Example Text + \{{#height 200px}} << an escaped argument! + "; + let end = r" + Example Text + {{#height 200px}} << an escaped argument! + "; + assert_eq!(Args::replace(start, &HashMap::<&str, &str>::new()), end); + } + + #[test] + fn test_replace_args_simple() { + let start = r" + Example Text + {{#height}} << an argument! + "; + let end = r" + Example Text + 200px << an argument! + "; + assert_eq!( + Args::replace(start, &HashMap::from([("height", "200px")])), + end + ); + } + + #[test] + fn test_extract_args_with_spaces() { + let s1 = "This is some random text with {{ #path }}"; + let s2 = "This is some random text with {{#path }}"; + let s3 = "This is some random text with {{ #path}}"; + + let res1 = extract_args(s1).collect::>(); + let res2 = extract_args(s2).collect::>(); + let res3 = extract_args(s3).collect::>(); + + assert_eq!( + res1, + vec![Args { + start_index: 30, + end_index: 51, + args_type: ArgsType::Plain("path"), + args_text: "{{ #path }}" + }] + ); + + assert_eq!( + res2, + vec![Args { + start_index: 30, + end_index: 46, + args_type: ArgsType::Plain("path"), + args_text: "{{#path }}" + }] + ); + + assert_eq!( + res3, + vec![Args { + start_index: 30, + end_index: 44, + args_type: ArgsType::Plain("path"), + args_text: "{{ #path}}" + }] + ); + } + + // #[test] + fn test_extract_args_with_default_value() {} + + // #[test] + fn test_extract_args_with_default_value_and_spaces() {} } \ No newline at end of file From 53da878bf347bf531740a9457d4ceec0dfd2cc22 Mon Sep 17 00:00:00 2001 From: sgoudham Date: Mon, 2 May 2022 08:09:40 +0100 Subject: [PATCH 6/6] [TEM #1] - Remove dependency on aha-corasick --- Cargo.toml | 3 +-- src/links.rs | 7 +++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 36f324b..48fa6fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,5 +27,4 @@ anyhow = "1.0.57" env_logger = "0.9.0" log = "0.4.16" lazy_static = "1.4.0" -fancy-regex = "0.10.0" -aho-corasick = "0.7.18" \ No newline at end of file +fancy-regex = "0.10.0" \ No newline at end of file diff --git a/src/links.rs b/src/links.rs index a0f9346..632482a 100644 --- a/src/links.rs +++ b/src/links.rs @@ -2,9 +2,8 @@ use std::collections::{HashMap, VecDeque}; use std::fs; use std::path::{Path, PathBuf}; -use aho_corasick::AhoCorasick; -use anyhow::{Context, Error}; -use fancy_regex::{CaptureMatches, Captures, Match, Regex}; +use anyhow::Context; +use fancy_regex::{CaptureMatches, Captures, Regex}; use lazy_static::lazy_static; use mdbook::errors::Result; @@ -195,7 +194,7 @@ impl<'a> Iterator for TemplateArgsIter<'a> { type Item = (&'a str, &'a str); fn next(&mut self) -> Option { - for cap in &mut self.0 { + if let Some(cap) = (&mut self.0).next() { let mut split_args = cap.unwrap().get(0).unwrap().as_str().splitn(2, '='); let key = split_args.next().unwrap().trim(); let value = split_args.next().unwrap();