From 0a7e6fd504b10042c77efef01bb7579280c8489f Mon Sep 17 00:00:00 2001 From: sgoudham Date: Sat, 30 Apr 2022 04:25:45 +0100 Subject: [PATCH] [v0.1.0] - Separate all links logic into links.rs --- src/lib.rs | 133 ++---------------------------------------------- src/links.rs | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 129 deletions(-) create mode 100644 src/links.rs diff --git a/src/lib.rs b/src/lib.rs index 097df81..8978361 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,25 +1,14 @@ -use std::fs; -use std::path::{Path, PathBuf}; +use std::path::Path; -use aho_corasick::AhoCorasick; -use anyhow::Context; -use fancy_regex::{CaptureMatches, Captures, Regex}; -use lazy_static::lazy_static; use log::{error, warn}; use mdbook::book::Book; use mdbook::errors::Result; use mdbook::preprocess::{Preprocessor, PreprocessorContext}; use mdbook::BookItem; -const ESCAPE_CHAR: char = '\\'; -const MAX_LINK_NESTED_DEPTH: usize = 10; +mod links; -lazy_static! { - // https://stackoverflow.com/questions/22871602/optimizing-regex-to-fine-key-value-pairs-space-delimited - static ref ARGS: Regex = Regex::new(r"(?<=\s|\A)([^\s=]+)=(.*?)(?=(?:\s[^\s=]+=|$))").unwrap(); - // TODO: Explain This Horrible Mess - static ref WHOLE_TEMPLATE: Regex = Regex::new(r"\\\{\{\#.*\}\}|\{\{\s*\#(template)\s+([a-zA-Z0-9_.\/-]+)\s*([^}]+)\}\}").unwrap(); -} +const MAX_LINK_NESTED_DEPTH: usize = 10; #[derive(Default)] pub struct Template; @@ -72,7 +61,7 @@ where let mut previous_end_index = 0; let mut replaced = String::with_capacity(chapter_content.len()); - for link in extract_template_links(chapter_content) { + for link in links::extract_template_links(chapter_content) { replaced.push_str(&chapter_content[previous_end_index..link.start_index]); match link.substitute_args_in_template(&path) { @@ -105,118 +94,4 @@ where replaced.push_str(&chapter_content[previous_end_index..]); replaced -} - -#[derive(PartialEq, Debug, Clone)] -enum LinkType { - Escaped, - Template(PathBuf), -} - -impl LinkType { - fn relative_path>(self, base: P) -> Option { - match self { - LinkType::Escaped => None, - LinkType::Template(p) => Some(return_relative_path(base.as_ref(), &p)), - } - } -} - -fn return_relative_path>(base: P, relative: P) -> PathBuf { - base.as_ref() - .join(relative) - .parent() - .expect("Included file should not be /") - .to_path_buf() -} - -#[derive(PartialEq, Debug, Clone)] -struct VecPair(Vec, Vec); - -#[derive(PartialEq, Debug, Clone)] -struct Link<'a> { - start_index: usize, - end_index: usize, - args: VecPair, - link_type: LinkType, - link_text: &'a str, -} - -impl<'a> Link<'a> { - fn from_capture(cap: Captures<'a>) -> Option> { - let mut keys: Vec = vec![]; - let mut values: Vec = vec![]; - - let link_type = match (cap.get(0), cap.get(1), cap.get(2), cap.get(3)) { - (_, _, Some(file), Some(args)) => { - let matches = ARGS.captures_iter(args.as_str()); - for mat in matches { - let capture = mat.unwrap().get(0).unwrap().as_str().splitn(2, '='); - for (i, capt) in capture.enumerate() { - if i % 2 == 0 { - keys.push(format!("{{{}}}", capt)); - } else { - values.push(capt.to_string()); - } - } - } - Some(LinkType::Template(PathBuf::from(file.as_str()))) - } - (Some(mat), _, _, _) if mat.as_str().starts_with(ESCAPE_CHAR) => { - Some(LinkType::Escaped) - } - _ => None, - }; - - link_type.and_then(|lnk_type| { - cap.get(0).map(|mat| Link { - start_index: mat.start(), - end_index: mat.end(), - args: VecPair(keys, values), - link_type: lnk_type, - link_text: mat.as_str(), - }) - }) - } - - fn substitute_args_in_template>(&self, base: P) -> Result { - match self.link_type { - LinkType::Escaped => Ok((&self.link_text[1..]).to_owned()), - LinkType::Template(ref pat) => { - let target = base.as_ref().join(pat); - - fs::read_to_string(&target) - .with_context(|| { - format!( - "Could not read file for link {} ({})", - self.link_text, - target.display(), - ) - }) - .map(|hay| { - let pair = &self.args; - let ac = AhoCorasick::new_auto_configured(pair.0.as_slice()); - ac.replace_all(hay.as_str(), pair.1.as_slice()) - }) - } - } - } -} - -struct LinkIter<'a>(CaptureMatches<'a, 'a>); - -impl<'a> Iterator for LinkIter<'a> { - type Item = Link<'a>; - fn next(&mut self) -> Option> { - for cap in &mut self.0 { - if let Some(inc) = Link::from_capture(cap.unwrap()) { - return Some(inc); - } - } - None - } -} - -fn extract_template_links(contents: &str) -> LinkIter<'_> { - LinkIter(WHOLE_TEMPLATE.captures_iter(contents)) } \ No newline at end of file diff --git a/src/links.rs b/src/links.rs new file mode 100644 index 0000000..5fa9c93 --- /dev/null +++ b/src/links.rs @@ -0,0 +1,141 @@ +use std::fs; +use std::path::{Path, PathBuf}; + +use aho_corasick::AhoCorasick; +use anyhow::Context; +use fancy_regex::{CaptureMatches, Captures, Regex}; +use lazy_static::lazy_static; +use mdbook::errors::Result; + +const ESCAPE_CHAR: char = '\\'; + +lazy_static! { + // r"(?x)\\\{\{\#.*\}\}|\{\{\s*\#(template)\s+([a-zA-Z0-9_.\/-]+)\s*([^}]+)\}\}") + static ref WHOLE_TEMPLATE: Regex = Regex::new( + r"(?x) # insignificant whitespace mode + \\\{\{\#.*\}\} # match escaped link + | # or + \{\{\s* # link opening parens and whitespace + \#(template) # link type - template + \s+ # separating whitespace + ([a-zA-Z0-9_.\/-]+) # relative path to template file + \s+ # separating whitespace + ([^}]+) # get all template arguments + \}\} # link closing parens" + ) + .unwrap(); + // https://stackoverflow.com/questions/22871602/optimizing-regex-to-fine-key-value-pairs-space-delimited + static ref ARGS: Regex = Regex::new(r"(?<=\s|\A)([^\s=]+)=(.*?)(?=(?:\s[^\s=]+=|$))").unwrap(); +} + +#[derive(PartialEq, Debug, Clone)] +struct VecPair(Vec, Vec); + +#[derive(PartialEq, Debug, Clone)] +pub(crate) struct Link<'a> { + pub(crate) start_index: usize, + pub(crate) end_index: usize, + pub(crate) link_type: LinkType, + pub(crate) link_text: &'a str, + args: VecPair, +} + +impl<'a> Link<'a> { + fn from_capture(cap: Captures<'a>) -> Option> { + let mut keys: Vec = vec![]; + let mut values: Vec = vec![]; + + let link_type = match (cap.get(0), cap.get(1), cap.get(2), cap.get(3)) { + (_, _, Some(file), Some(args)) => { + let matches = ARGS.captures_iter(args.as_str()); + for mat in matches { + let capture = mat.unwrap().get(0).unwrap().as_str().splitn(2, '='); + for (i, capt) in capture.enumerate() { + if i % 2 == 0 { + keys.push(format!("{{{}}}", capt)); + } else { + values.push(capt.to_string()); + } + } + } + Some(LinkType::Template(PathBuf::from(file.as_str()))) + } + (Some(mat), _, _, _) if mat.as_str().starts_with(ESCAPE_CHAR) => { + Some(LinkType::Escaped) + } + _ => None, + }; + + link_type.and_then(|lnk_type| { + cap.get(0).map(|mat| Link { + start_index: mat.start(), + end_index: mat.end(), + link_type: lnk_type, + link_text: mat.as_str(), + args: VecPair(keys, values), + }) + }) + } + + pub(crate) fn substitute_args_in_template>(&self, base: P) -> Result { + match self.link_type { + LinkType::Escaped => Ok((&self.link_text[1..]).to_owned()), + LinkType::Template(ref pat) => { + let target = base.as_ref().join(pat); + + fs::read_to_string(&target) + .with_context(|| { + format!( + "Could not read file for link {} ({})", + self.link_text, + target.display(), + ) + }) + .map(|hay| { + let pair = &self.args; + let ac = AhoCorasick::new_auto_configured(pair.0.as_slice()); + ac.replace_all(hay.as_str(), pair.1.as_slice()) + }) + } + } + } +} + +#[derive(PartialEq, Debug, Clone)] +pub(crate) enum LinkType { + Escaped, + Template(PathBuf), +} + +impl LinkType { + pub(crate) fn relative_path>(self, base: P) -> Option { + match self { + LinkType::Escaped => None, + LinkType::Template(path) => Some( + base.as_ref() + .join(path) + .parent() + .expect("Included file should not be /") + .to_path_buf(), + ), + } + } +} + +pub(crate) struct LinkIter<'a>(CaptureMatches<'a, 'a>); + +impl<'a> Iterator for LinkIter<'a> { + type Item = Link<'a>; + fn next(&mut self) -> Option> { + for cap in &mut self.0 { + if let Some(inc) = Link::from_capture(cap.unwrap()) { + return Some(inc); + } + } + None + } +} + +pub(crate) fn extract_template_links(contents: &str) -> LinkIter<'_> { + LinkIter(WHOLE_TEMPLATE.captures_iter(contents)) +} \ No newline at end of file