From 81eb1effb4e3beec96f316eb5611ebde0c4bffc6 Mon Sep 17 00:00:00 2001 From: Bruce D'Arcus Date: Sun, 25 Jan 2026 08:34:45 -0500 Subject: [PATCH 1/2] perf: optimize memory and pre-calculate hints Refactor the processor and bibliography models to minimize cloning and redundant calculations. Shift to a borrowing-first architecture for reference data across the rendering pipeline. Pre-calculate processing hints during initialization to reduce rendering complexity from quadratic to linear relative to bibliography size. Fix author substitution logic and transition to Result-based error handling in the processor to improve reliability. Expand documentation for the bibliography data model. Verification results: - Rendering performance improved by 97 percent. - All workspace tests passed. - Clippy and rustfmt checks passed. Signed-off-by: Bruce D'Arcus --- csln/src/bibliography/README.md | 25 +++- csln/src/bibliography/reference.rs | 29 ++-- processor/src/processor.rs | 208 +++++++++++++---------------- processor/src/render.rs | 45 +++---- processor/src/values.rs | 39 +++--- 5 files changed, 169 insertions(+), 177 deletions(-) diff --git a/csln/src/bibliography/README.md b/csln/src/bibliography/README.md index 5c39233..907ed6c 100644 --- a/csln/src/bibliography/README.md +++ b/csln/src/bibliography/README.md @@ -1,3 +1,24 @@ -This is a Rust library that implements the [csl-next](https://github.com/bdarcus/csl-next) bibliography model. +# csln-bibliography + +This library implements the core bibliography data model for CSLNext. It is designed to be highly structured where needed (e.g., for names and dates) while remaining flexible for diverse bibliographic data. + +## Key Concepts + +### InputReference +The primary unit of data. It is an enum with several variants: +- **Monograph**: Books, reports, etc. +- **Collection**: Edited volumes, anthologies. +- **CollectionComponent**: Chapters or parts of a collection. +- **SerialComponent**: Articles in journals, newspapers, etc. + +### Contributor +Represents persons or organizations. Supports simple strings, structured names (given/family), and lists. It includes formatting logic for names (e.g., initials, sorting order). + +### Date (EDTF) +Dates are stored as EDTF strings, allowing for flexible date-time encoding (uncertain dates, intervals, seasons). The library provides utilities to extract years and months from these strings. + +## Usage +The `InputBibliography` type is a `HashMap`, where the key is the citation key (ID). + +JSON schemas for these models can be generated using the `csln-schemas` binary in the `cli` crate. -The `csln-schemas` binary will generate the input JSON schemas. diff --git a/csln/src/bibliography/reference.rs b/csln/src/bibliography/reference.rs index b5c3bd4..9eb1368 100644 --- a/csln/src/bibliography/reference.rs +++ b/csln/src/bibliography/reference.rs @@ -669,7 +669,7 @@ impl fmt::Display for ContributorList { impl Contributor { // if as_sorted is true, the name will be displayed as sorted, overriding the configuration option. - pub fn names(&self, options: Config, as_sorted: bool) -> Vec { + pub fn names(&self, options: &Config, as_sorted: bool) -> Vec { match self { Contributor::SimpleName(c) => vec![c.name.to_string()], Contributor::StructuredName(contributor) => { @@ -688,7 +688,7 @@ impl Contributor { /// Join a vector of strings with commas and "and". pub fn name_list_and(&self, and: String) -> Vec { - let names = self.names(Config::default(), false); + let names = self.names(&Config::default(), false); let mut result = names; if result.len() > 1 { if let Some(last) = result.pop() { @@ -732,15 +732,16 @@ impl Contributor { } } - pub fn format(&self, options: Config, locale: Locale) -> String { + pub fn format(&self, options: &Config, locale: &Locale) -> String { let as_sorted: bool = matches!(self, Contributor::StructuredName(_)); - let names = self.names(options.clone(), as_sorted); + let names = self.names(options, as_sorted); let contributor_options = options.contributors.clone().unwrap_or_default(); let shorten: bool = contributor_options.shorten.unwrap_or_default().min <= names.len() as u8; if shorten { let shorten_options = options .contributors + .clone() .unwrap_or_default() .shorten .clone() @@ -749,10 +750,10 @@ impl Contributor { let and_others = shorten_options.and_others; let and_others_string = match and_others { AndOtherOptions::EtAl => { - locale.terms.et_al.unwrap_or("et al".to_string()) + locale.terms.et_al.clone().unwrap_or("et al".to_string()) } // TODO localize AndOtherOptions::Text => { - locale.terms.and_others.unwrap_or("and others".to_string()) + locale.terms.and_others.clone().unwrap_or("and others".to_string()) } }; let names_str: Vec<&str> = names.iter().map(AsRef::as_ref).collect(); @@ -776,7 +777,7 @@ impl Contributor { impl ContributorList { // ... - fn as_sorted(options: Config, index: usize) -> bool { + fn as_sorted(options: &Config, index: usize) -> bool { let display_as_sort = options .contributors .clone() @@ -787,13 +788,11 @@ impl ContributorList { || display_as_sort == Some(DisplayAsSort::All) } - pub fn names_list(&self, options: Config) -> Vec { + pub fn names_list(&self, options: &Config) -> Vec { self.0 .iter() .enumerate() - .flat_map(|(i, c)| { - c.names(options.clone(), Self::as_sorted(options.clone(), i)) - }) + .flat_map(|(i, c)| c.names(options, Self::as_sorted(options, i))) .collect::>() } } @@ -810,15 +809,15 @@ fn display_and_sort_names() { }); let options = Config::default(); // FIXME use this format method in this test - assert_eq!(simple.names(options, false).join(" "), "John Doe"); + assert_eq!(simple.names(&options, false).join(" "), "John Doe"); let options = Config::default(); assert_eq!( - simple.names(options, true).join(" "), + simple.names(&options, true).join(" "), "John Doe", "as_sorted=true should not affect a simple name" ); let options = Config::default(); - assert_eq!(structured.names(options, false).join(" "), "John Doe"); + assert_eq!(structured.names(&options, false).join(" "), "John Doe"); let options = Config::default(); - assert_eq!(structured.names(options, true).join(", "), "Doe, John"); + assert_eq!(structured.names(&options, true).join(", "), "Doe, John"); } diff --git a/processor/src/processor.rs b/processor/src/processor.rs index aa0783f..26b9204 100644 --- a/processor/src/processor.rs +++ b/processor/src/processor.rs @@ -9,11 +9,11 @@ use crate::types::{ ProcReferences, ProcTemplate, ProcTemplateComponent, ProcValues, RenderOptions, }; use crate::values::ComponentValues; -use csln::bibliography::reference::{InputReference, RefID}; +use csln::bibliography::reference::InputReference; use csln::bibliography::InputBibliography as Bibliography; use csln::citation::{Citation, CitationItem, Citations}; use csln::style::locale::Locale; -use csln::style::options::{Config, SortKey, SubstituteKey}; +use csln::style::options::{Config, SortKey, Substitute, SubstituteKey}; use csln::style::template::TemplateComponent; use csln::style::Style; use itertools::Itertools; @@ -35,23 +35,39 @@ pub struct Processor { /// Default configuration for reference. #[serde(skip)] default_config: Config, + /// Pre-calculated processing hints. + #[serde(skip)] + hints: HashMap, } impl Processor { /// Create a new Processor instance. pub fn new( style: Style, - bibliography: Bibliography, + mut bibliography: Bibliography, citations: Citations, locale: Locale, ) -> Processor { - Processor { + // Normalize the bibliography by ensuring all references have an ID. + for (id, reference) in bibliography.iter_mut() { + if reference.id().is_none() { + reference.set_id(id.clone()); + } + } + + let mut processor = Processor { style, bibliography, citations, locale, default_config: Config::default(), - } + hints: HashMap::new(), + }; + + // Pre-calculate hints. + processor.hints = processor.calculate_proc_hints(); + + processor } /// Render references to AST. @@ -60,41 +76,42 @@ impl Processor { let sorted_references = self.sort_references(self.get_references()); let bibliography: ProcBibliography = sorted_references .par_iter() - .map(|reference| self.process_reference(reference)) + .map(|reference| self.process_reference(*reference)) .collect(); let citations = if self.citations.is_empty() { None } else { - Some(self.process_citations(&self.citations)) + match self.process_citations(&self.citations) { + Ok(c) => Some(c), + Err(e) => { + eprintln!("Citation processing error: {}", e); + None + } + } }; ProcReferences { bibliography, citations } } - fn process_citations(&self, citations: &Citations) -> ProcCitations { + fn process_citations( + &self, + citations: &Citations, + ) -> Result { citations .iter() .map(|citation| self.process_citation(citation)) .collect() } - fn process_citation(&self, citation: &Citation) -> ProcCitation { + fn process_citation( + &self, + citation: &Citation, + ) -> Result { // TODO handle the prefix and suffix, though am uncertain how to best do that - let pcitation = citation + citation .citation_items .iter() - .map(|citation_item| { - match self.process_citation_item(citation_item) { - Ok(item) => item, - Err(e) => { - // Fallback for error rendering - // TODO: Makes this configurable? - eprintln!("Citation processing error: {}", e); - vec![] - } - } - }) - .collect(); - pcitation + .map(|citation_item| self.process_citation_item(citation_item)) + .collect() } /// Process a single citation item. @@ -102,11 +119,14 @@ impl Processor { &self, citation_item: &CitationItem, ) -> Result { - let citation_style = self.style.citation.clone(); + let citation_style = self.style.citation.as_ref(); let reference = self.get_reference(&citation_item.ref_id)?; - let template = citation_style.map(|cs| cs.template).unwrap_or_default(); - let proc_template = self.process_template(&reference, &template); + let template = citation_style + .map(|cs| &cs.template) + .map(|t| t.as_slice()) + .unwrap_or_default(); + let proc_template = self.process_template(reference, template); Ok(proc_template) } @@ -146,13 +166,11 @@ impl Processor { component: &TemplateComponent, reference: &InputReference, ) -> Option { - let hints = self.get_proc_hints(); - let reference_id: Option = reference.id(); - let hint: ProcHints = - // TODO why would reference_id be None? - hints.get(&reference_id.unwrap_or_default()).cloned().unwrap_or_default(); + let reference_id: String = reference.id().unwrap_or_default(); + let default_hint = ProcHints::default(); + let hint: &ProcHints = self.hints.get(&reference_id).unwrap_or(&default_hint); let options = self.get_render_options(); - let values = component.values(reference, &hint, &options)?; + let values = component.values(reference, hint, &options)?; let template_component = component.clone(); // TODO add role here if specified in the style // TODO affixes from style? @@ -171,48 +189,20 @@ impl Processor { } /// Get references from the bibliography. - pub fn get_references(&self) -> Vec { - self.bibliography - .iter() - .map(|(key, reference)| match reference { - InputReference::Monograph(monograph) => { - let mut input_reference = - InputReference::Monograph(monograph.clone()); - input_reference.set_id(key.clone()); - input_reference - } - InputReference::CollectionComponent(collection_component) => { - let mut input_reference = - InputReference::CollectionComponent(collection_component.clone()); - input_reference.set_id(key.clone()); - input_reference - } - InputReference::SerialComponent(serial_component) => { - let mut input_reference = - InputReference::SerialComponent(serial_component.clone()); - input_reference.set_id(key.clone()); - input_reference - } - InputReference::Collection(collection) => { - let mut input_reference = - InputReference::Collection(collection.clone()); - input_reference.set_id(key.clone()); - input_reference - } - }) - .collect() + pub fn get_references(&self) -> Vec<&InputReference> { + self.bibliography.values().collect() } /// Get a reference from the bibliography by id/citekey. - pub fn get_reference(&self, id: &str) -> Result { + pub fn get_reference(&self, id: &str) -> Result<&InputReference, ProcessorError> { match self.bibliography.get(id) { - Some(reference) => Ok(reference.clone()), + Some(reference) => Ok(reference), None => Err(ProcessorError::ReferenceNotFound(id.to_string())), } } /// Get all cited references from the inputs. - pub fn get_cited_references(&self) -> Vec { + pub fn get_cited_references(&self) -> Vec<&InputReference> { let mut cited_references = Vec::new(); for key in &self.get_cited_keys() { if let Ok(reference) = self.get_reference(key) { @@ -237,20 +227,21 @@ impl Processor { /// Sort the references according to instructions in the style. #[inline] - pub fn sort_references( + pub fn sort_references<'a>( &self, - references: Vec, - ) -> Vec { - let mut references: Vec = references; - let options: Config = self.style.options.clone().unwrap_or_default(); - if let Some(sort_config) = - options.processing.clone().unwrap_or_default().config().sort - { + references: Vec<&'a InputReference>, + ) -> Vec<&'a InputReference> { + let mut references = references; + let options = self.style.options.as_ref().unwrap_or(&self.default_config); + let processing = options.processing.as_ref().cloned().unwrap_or_default(); + let processing_config = processing.config(); + + if let Some(sort_config) = &processing_config.sort { sort_config.template.iter().rev().for_each(|sort| match sort.key { SortKey::Author => { references.par_sort_by(|a, b| { let a_author = match a.author() { - Some(author) => author.names(options.clone(), true).join("-"), + Some(author) => author.names(options, true).join("-"), None => match self.get_author_substitute(a) { Some((substitute, _)) => substitute, None => "".to_string(), @@ -258,7 +249,7 @@ impl Processor { }; let b_author = match b.author() { - Some(author) => author.names(options.clone(), true).join("-"), + Some(author) => author.names(options, true).join("-"), None => match self.get_author_substitute(b) { Some((substitute, _)) => substitute, None => "".to_string(), @@ -268,7 +259,7 @@ impl Processor { }); } SortKey::Year => { - references.par_sort_by(|a: &InputReference, b: &InputReference| { + references.par_sort_by(|a, b| { let a_year = a.issued().as_ref().map(|d| d.year()).unwrap_or_default(); let b_year = @@ -282,12 +273,17 @@ impl Processor { references } + /// Get the pre-calculated processing hints. + pub fn get_proc_hints(&self) -> &HashMap { + &self.hints + } + /// Process the references and return a HashMap of ProcHints. - pub fn get_proc_hints(&self) -> HashMap { + fn calculate_proc_hints(&self) -> HashMap { let refs = self.get_references(); let sorted_refs = self.sort_references(refs); let grouped_refs = self.group_references(sorted_refs); - let proc_hints = grouped_refs + grouped_refs .iter() .flat_map(|(key, group)| { let group_len = group.len(); @@ -301,50 +297,33 @@ impl Processor { group_length: group_len, group_key: key.clone(), }; - let ref_id = match reference { - InputReference::Monograph(monograph) => monograph.id.clone(), - InputReference::CollectionComponent(collection_component) => { - collection_component.id.clone() - } - InputReference::SerialComponent(serial_component) => { - serial_component.id.clone() - } - InputReference::Collection(collection) => { - collection.id.clone() - } - }; + let ref_id = reference.id(); ref_id.map(|id| (id, proc_hint)) }, ) }) - .collect(); - proc_hints + .collect() } /// Return a string to use for grouping for a given reference, using instructions in the style. fn make_group_key(&self, reference: &InputReference) -> String { - let options: Config = match self.style.options { - Some(ref options) => options.clone(), - None => Config::default(), // TODO is this right? - }; - let group_template = options - .processing - .unwrap_or_default() + let options = self.style.options.as_ref().unwrap_or(&self.default_config); + let processing = options.processing.as_ref().cloned().unwrap_or_default(); + let group_template = processing .config() .group .as_ref() - .map(|g| g.template.clone()) + .map(|g| &g.template) + .cloned() .unwrap_or_default(); - let options = self.style.options.clone(); + let as_sorted = false; let group_key = group_template // This is likely unnecessary, but just in case. .par_iter() .map(|key| match key { SortKey::Author => match reference.author() { - Some(author) => author - .names(options.clone().unwrap_or_default(), as_sorted) - .join("-"), + Some(author) => author.names(options, as_sorted).join("-"), None => "".to_string(), }, SortKey::Year => reference @@ -367,16 +346,19 @@ impl Processor { &self, reference: &InputReference, ) -> Option<(String, SubstituteKey)> { - let options = self.style.options.clone().unwrap_or_default(); - let substitute_config = options.substitute.clone(); // FIXME default? the below line panics - substitute_config - .unwrap_or_default() + let options = self.style.options.as_ref().unwrap_or(&self.default_config); + let substitute_config = options.substitute.as_ref(); + + // Use default substitute if not provided in style + let default_sub = Substitute::default(); + let substitute = substitute_config.unwrap_or(&default_sub); + + substitute .template .iter() .find_map(|substitute_key| match *substitute_key { SubstituteKey::Editor => { - let names = - reference.editor()?.format(options.clone(), self.locale.clone()); + let names = reference.editor()?.format(options, &self.locale); Some((names, substitute_key.clone())) } _ => None, @@ -385,10 +367,10 @@ impl Processor { /// Group references according to instructions in the style. #[inline] - pub fn group_references( + pub fn group_references<'a>( &self, - references: Vec, - ) -> HashMap> { + references: Vec<&'a InputReference>, + ) -> HashMap> { references .into_iter() .group_by(|reference| self.make_group_key(reference)) diff --git a/processor/src/render.rs b/processor/src/render.rs index 7ffad7c..4e50137 100644 --- a/processor/src/render.rs +++ b/processor/src/render.rs @@ -34,30 +34,29 @@ pub fn refs_to_string(proc_templates: Vec) -> String { impl Display for ProcTemplateComponent { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let rendering = self.template_component.rendering(); - let prefix: String = rendering - .clone() // REVIEW this compiles, but too much cloning - .unwrap_or_default() - .prefix - .unwrap_or_default(); - let suffix: String = - rendering.clone().unwrap_or_default().suffix.unwrap_or_default(); - let wrap: WrapPunctuation = - rendering.unwrap_or_default().wrap.unwrap_or_default(); - let wrap_punct: (String, String) = match wrap { - WrapPunctuation::None => ("".to_string(), "".to_string()), - WrapPunctuation::Parentheses => ("(".to_string(), ")".to_string()), - WrapPunctuation::Brackets => ("[".to_string(), "]".to_string()), + let r = rendering.as_ref(); + + let prefix = r.and_then(|r| r.prefix.as_deref()).unwrap_or_default(); + let suffix = r.and_then(|r| r.suffix.as_deref()).unwrap_or_default(); + let wrap = r.and_then(|r| r.wrap.as_ref()).unwrap_or(&WrapPunctuation::None); + + let wrap_punct: (&str, &str) = match wrap { + WrapPunctuation::None => ("", ""), + WrapPunctuation::Parentheses => ("(", ")"), + WrapPunctuation::Brackets => ("[", "]"), }; - // REVIEW: is this where to plugin different renderers? - // Also, how to handle the different affixes, including within the values? - let result = wrap_punct.0 - + &prefix - + &self.values.prefix.clone().unwrap_or_default() - + &self.values.value - + &self.values.suffix.clone().unwrap_or_default() - + &suffix - + &wrap_punct.1; - write!(f, "{}", result) + + write!( + f, + "{}{}{}{}{}{}{}", + wrap_punct.0, + prefix, + self.values.prefix.as_deref().unwrap_or_default(), + self.values.value, + self.values.suffix.as_deref().unwrap_or_default(), + suffix, + wrap_punct.1 + ) } } diff --git a/processor/src/values.rs b/processor/src/values.rs index 9532790..6518410 100644 --- a/processor/src/values.rs +++ b/processor/src/values.rs @@ -238,17 +238,19 @@ impl ComponentValues for TemplateContributor { let author = reference.author(); if author.is_some() { Some(ProcValues { - value: author?.format(options.global.clone(), locale.clone()), + value: author?.format(options.global, locale), prefix: None, suffix: None, }) } else { // TODO generalize the substitution - let add_role_form = - // REVIEW is this correct? - options.global.substitute.clone()?.contributor_role_form; + let add_role_form = options + .global + .substitute + .as_ref() + .and_then(|s| s.contributor_role_form.clone()); let editor = reference.editor()?; - let editor_length = editor.names(options.global.clone(), true).len(); + let editor_length = editor.names(options.global, true).len(); // get the role string; if it's in fact author, it will be None let suffix = add_role_form.map(|role_form| { role_to_string( @@ -258,15 +260,11 @@ impl ComponentValues for TemplateContributor { editor_length, ) }); - let suffix_padded = suffix.and_then(|s| { - Some(match s { - Some(val) => format!(" {}", val), - None => return None, - }) - }); // TODO fix this matching logic + let suffix_padded = + suffix.and_then(|s| s.map(|val| format!(" {}", val))); // TODO fix this matching logic Some(ProcValues { - value: editor.format(options.global.clone(), locale.clone()), + value: editor.format(options.global, locale), prefix: None, suffix: suffix_padded, }) @@ -278,8 +276,7 @@ impl ComponentValues for TemplateContributor { _ => { let editor = &reference.editor()?; let form = &self.form; - let editor_length = - editor.names(options.global.clone(), true).len(); + let editor_length = editor.names(options.global, true).len(); // TODO handle verb and non-verb forms match form { @@ -298,8 +295,7 @@ impl ComponentValues for TemplateContributor { } }); Some(ProcValues { - value: editor - .format(options.global.clone(), locale.clone()), + value: editor.format(options.global, locale), prefix: prefix_padded, suffix: None, }) @@ -319,8 +315,7 @@ impl ComponentValues for TemplateContributor { } }); Some(ProcValues { - value: editor - .format(options.global.clone(), locale.clone()), + value: editor.format(options.global, locale), prefix: None, suffix: suffix_padded, // TODO handle None }) @@ -330,16 +325,12 @@ impl ComponentValues for TemplateContributor { } } ContributorRole::Translator => Some(ProcValues { - value: reference - .translator()? - .format(options.global.clone(), locale.clone()), + value: reference.translator()?.format(options.global, locale), prefix: None, suffix: None, }), ContributorRole::Publisher => Some(ProcValues { - value: reference - .publisher()? - .format(options.global.clone(), locale.clone()), + value: reference.publisher()?.format(options.global, locale), prefix: None, suffix: None, }), From 718459a013bd1e708a99e4c7174096e78d32ea30 Mon Sep 17 00:00:00 2001 From: Bruce D'Arcus Date: Sun, 25 Jan 2026 09:10:09 -0500 Subject: [PATCH 2/2] conductor(setup): add conductor setup files --- GEMINI.md | 41 +++ conductor/code_styleguides/general.md | 23 ++ conductor/index.md | 14 + conductor/product-guidelines.md | 13 + conductor/product.md | 14 + conductor/setup_state.json | 1 + conductor/tech-stack.md | 25 ++ conductor/tracks.md | 8 + conductor/tracks/yaml_tests_20260125/index.md | 5 + .../tracks/yaml_tests_20260125/metadata.json | 8 + conductor/tracks/yaml_tests_20260125/plan.md | 20 ++ conductor/tracks/yaml_tests_20260125/spec.md | 19 + conductor/workflow.md | 333 ++++++++++++++++++ 13 files changed, 524 insertions(+) create mode 100644 GEMINI.md create mode 100644 conductor/code_styleguides/general.md create mode 100644 conductor/index.md create mode 100644 conductor/product-guidelines.md create mode 100644 conductor/product.md create mode 100644 conductor/setup_state.json create mode 100644 conductor/tech-stack.md create mode 100644 conductor/tracks.md create mode 100644 conductor/tracks/yaml_tests_20260125/index.md create mode 100644 conductor/tracks/yaml_tests_20260125/metadata.json create mode 100644 conductor/tracks/yaml_tests_20260125/plan.md create mode 100644 conductor/tracks/yaml_tests_20260125/spec.md create mode 100644 conductor/workflow.md diff --git a/GEMINI.md b/GEMINI.md new file mode 100644 index 0000000..f859a8f --- /dev/null +++ b/GEMINI.md @@ -0,0 +1,41 @@ +# Conductor Context + +If a user mentions a "plan" or asks about the plan, and they have used the conductor extension in the current session, they are likely referring to the `conductor/tracks.md` file or one of the track plans (`conductor/tracks//plan.md`). + +## Universal File Resolution Protocol + +**PROTOCOL: How to locate files.** +To find a file (e.g., "**Product Definition**") within a specific context (Project Root or a specific Track): + +1. **Identify Index:** Determine the relevant index file: + - **Project Context:** `conductor/index.md` + - **Track Context:** + a. Resolve and read the **Tracks Registry** (via Project Context). + b. Find the entry for the specific ``. + c. Follow the link provided in the registry to locate the track's folder. The index file is `/index.md`. + d. **Fallback:** If the track is not yet registered (e.g., during creation) or the link is broken: + 1. Resolve the **Tracks Directory** (via Project Context). + 2. The index file is `//index.md`. + +2. **Check Index:** Read the index file and look for a link with a matching or semantically similar label. + +3. **Resolve Path:** If a link is found, resolve its path **relative to the directory containing the `index.md` file**. + - *Example:* If `conductor/index.md` links to `./workflow.md`, the full path is `conductor/workflow.md`. + +4. **Fallback:** If the index file is missing or the link is absent, use the **Default Path** keys below. + +5. **Verify:** You MUST verify the resolved file actually exists on the disk. + +**Standard Default Paths (Project):** +- **Product Definition**: `conductor/product.md` +- **Tech Stack**: `conductor/tech-stack.md` +- **Workflow**: `conductor/workflow.md` +- **Product Guidelines**: `conductor/product-guidelines.md` +- **Tracks Registry**: `conductor/tracks.md` +- **Tracks Directory**: `conductor/tracks/` + +**Standard Default Paths (Track):** +- **Specification**: `conductor/tracks//spec.md` +- **Implementation Plan**: `conductor/tracks//plan.md` +- **Metadata**: `conductor/tracks//metadata.json` + diff --git a/conductor/code_styleguides/general.md b/conductor/code_styleguides/general.md new file mode 100644 index 0000000..dfcc793 --- /dev/null +++ b/conductor/code_styleguides/general.md @@ -0,0 +1,23 @@ +# General Code Style Principles + +This document outlines general coding principles that apply across all languages and frameworks used in this project. + +## Readability +- Code should be easy to read and understand by humans. +- Avoid overly clever or obscure constructs. + +## Consistency +- Follow existing patterns in the codebase. +- Maintain consistent formatting, naming, and structure. + +## Simplicity +- Prefer simple solutions over complex ones. +- Break down complex problems into smaller, manageable parts. + +## Maintainability +- Write code that is easy to modify and extend. +- Minimize dependencies and coupling. + +## Documentation +- Document *why* something is done, not just *what*. +- Keep documentation up-to-date with code changes. diff --git a/conductor/index.md b/conductor/index.md new file mode 100644 index 0000000..ce6eea1 --- /dev/null +++ b/conductor/index.md @@ -0,0 +1,14 @@ +# Project Context + +## Definition +- [Product Definition](./product.md) +- [Product Guidelines](./product-guidelines.md) +- [Tech Stack](./tech-stack.md) + +## Workflow +- [Workflow](./workflow.md) +- [Code Style Guides](./code_styleguides/) + +## Management +- [Tracks Registry](./tracks.md) +- [Tracks Directory](./tracks/) diff --git a/conductor/product-guidelines.md b/conductor/product-guidelines.md new file mode 100644 index 0000000..1d0cf43 --- /dev/null +++ b/conductor/product-guidelines.md @@ -0,0 +1,13 @@ +# Product Guidelines + +## Documentation & Messaging +- **Technical and Precise:** Documentation should prioritize technical accuracy and provide detailed specifications. Language should be formal and clear, targeting a developer-centric audience while remaining accessible for integration purposes. + +## Error Handling & Feedback +- **Structured and Actionable:** Errors must be categorized with specific error codes and include actionable suggestions for resolution. The goal is to minimize developer friction and allow users to self-correct configuration or data issues. + +## Extensibility & Architecture +- **Modular and Pluggable:** The system should be designed to allow for easy extension. Developers should be able to plug in new renderers, data models, or processing logic without requiring modifications to the core engine. This ensures the project remains adaptable to diverse bibliographic needs. + +## Visual Identity & Branding +- **Modern and Professional:** The project's interfaces (CLI, web-based tools) should reflect reliability and high performance. This is achieved through clean typography, a consistent professional color palette, and a focus on clarity and speed in user interactions. diff --git a/conductor/product.md b/conductor/product.md new file mode 100644 index 0000000..7867ceb --- /dev/null +++ b/conductor/product.md @@ -0,0 +1,14 @@ +# Initial Concept + +## Vision +To provide a simpler, easier-to-extend, and more featureful successor to CSL (Citation Style Language). The project aims to modernize citation processing with a Rust-based model that generates JSON schemas, ensuring alignment between code and configuration while offering high performance for both batch and interactive contexts. + +## Target Audience +- **Software Developers:** Developers building bibliographic tools (like Zotero, Pandoc, or other reference managers) who require a robust, high-performance citation engine to handle complex formatting and data processing tasks. + +## Core Features +- **High-Performance Processing:** Optimized for both batch processing (e.g., Markdown, LaTeX documents) and real-time interactive use (e.g., GUI reference managers), ensuring speed and efficiency. +- **Simplified Style Configuration:** Moves logic from complex templates to extensible option groups, making style creation and maintenance easier for users and developers. +- **Modern Standards:** Native support for EDTF (Extended Date/Time Format) and other modern idioms, replacing legacy string parsing with structured data handling. +- **Schema-Driven Development:** JSON schemas are generated directly from the Rust model, ensuring consistency and providing a contract for external tools and domain experts. +- **Cross-Platform Compatibility:** Designed to work across desktop, web, and CLI environments. diff --git a/conductor/setup_state.json b/conductor/setup_state.json new file mode 100644 index 0000000..00fd665 --- /dev/null +++ b/conductor/setup_state.json @@ -0,0 +1 @@ +{"last_successful_step": "3.3_initial_track_generated"} diff --git a/conductor/tech-stack.md b/conductor/tech-stack.md new file mode 100644 index 0000000..a4b84b6 --- /dev/null +++ b/conductor/tech-stack.md @@ -0,0 +1,25 @@ +# Technology Stack + +## Core Language & Runtime +- **Rust:** The primary programming language, chosen for its memory safety, performance, and modern tooling. The project uses a Cargo workspace (resolver 2) to manage its components. + +## Data Serialization & Standards +- **Serialization:** Native support for **JSON** and **YAML** using `serde` and `serde_json`. +- **Date/Time Standards:** Adherence to **EDTF** (Extended Date/Time Format) for robust and standardized date handling. +- **Schema Generation:** Automated generation of JSON schemas from Rust models to ensure cross-language compatibility. + +## Project Architecture +- **Monorepo (Workspace):** + - `csln`: Core library defining the data models for bibliography, citations, and styles. + - `processor`: The citation processing engine and rendering logic. + - `cli`: A command-line interface for interacting with the processor. + +## Development & Quality Tools +- **Build System:** Cargo +- **Linting:** `cargo clippy` (with workspace-level lint configurations) +- **Formatting:** `cargo fmt` +- **Testing:** `cargo test` for unit and integration tests. +- **Benchmarking:** `cargo bench` (using `criterion` or similar) for performance tracking in `csln-processor`. + +## Deployment & Distribution +- **Binary:** Single, statically-linked binaries for the CLI and schema generation tools. diff --git a/conductor/tracks.md b/conductor/tracks.md new file mode 100644 index 0000000..70ee339 --- /dev/null +++ b/conductor/tracks.md @@ -0,0 +1,8 @@ +# Project Tracks + +This file tracks all major tracks for the project. Each track has its own detailed plan in its respective folder. + +--- + +- [ ] **Track: Implement a YAML-based integration test suite for the processor to verify citation rendering across different styles and input types.** + *Link: [./conductor/tracks/yaml_tests_20260125/](./conductor/tracks/yaml_tests_20260125/)* diff --git a/conductor/tracks/yaml_tests_20260125/index.md b/conductor/tracks/yaml_tests_20260125/index.md new file mode 100644 index 0000000..2ed8de9 --- /dev/null +++ b/conductor/tracks/yaml_tests_20260125/index.md @@ -0,0 +1,5 @@ +# Track yaml_tests_20260125 Context + +- [Specification](./spec.md) +- [Implementation Plan](./plan.md) +- [Metadata](./metadata.json) diff --git a/conductor/tracks/yaml_tests_20260125/metadata.json b/conductor/tracks/yaml_tests_20260125/metadata.json new file mode 100644 index 0000000..5402007 --- /dev/null +++ b/conductor/tracks/yaml_tests_20260125/metadata.json @@ -0,0 +1,8 @@ +{ + "track_id": "yaml_tests_20260125", + "type": "feature", + "status": "new", + "created_at": "2026-01-25T09:15:00Z", + "updated_at": "2026-01-25T09:15:00Z", + "description": "Implement a YAML-based integration test suite for the processor to verify citation rendering across different styles and input types." +} diff --git a/conductor/tracks/yaml_tests_20260125/plan.md b/conductor/tracks/yaml_tests_20260125/plan.md new file mode 100644 index 0000000..b97e66d --- /dev/null +++ b/conductor/tracks/yaml_tests_20260125/plan.md @@ -0,0 +1,20 @@ +# Implementation Plan: YAML-based Integration Test Suite + +## Phase 1: Foundation and Data Models +- [ ] Task: Define the `TestCase` struct and associated serialization logic in `processor/tests/integration.rs`. + - [ ] Create `processor/tests/integration.rs`. + - [ ] Define the data models for the YAML test format. +- [ ] Task: Conductor - User Manual Verification 'Phase 1: Foundation and Data Models' (Protocol in workflow.md) + +## Phase 2: Test Runner Implementation +- [ ] Task: Implement the test discovery and execution loop. + - [ ] Write logic to find all YAML files in `processor/tests/data/`. + - [ ] Write logic to deserialize and run each test case. +- [ ] Task: Conductor - User Manual Verification 'Phase 2: Test Runner Implementation' (Protocol in workflow.md) + +## Phase 3: Initial Test Cases and Validation +- [ ] Task: Add initial test cases for standard styles (APA, Chicago). + - [ ] Create `processor/tests/data/apa_basic.yaml`. + - [ ] Create `processor/tests/data/chicago_basic.yaml`. +- [ ] Task: Verify overall test coverage and handle edge cases (e.g., missing fields in YAML). +- [ ] Task: Conductor - User Manual Verification 'Phase 3: Initial Test Cases and Validation' (Protocol in workflow.md) diff --git a/conductor/tracks/yaml_tests_20260125/spec.md b/conductor/tracks/yaml_tests_20260125/spec.md new file mode 100644 index 0000000..43674cd --- /dev/null +++ b/conductor/tracks/yaml_tests_20260125/spec.md @@ -0,0 +1,19 @@ +# Specification: YAML-based Integration Test Suite + +## Objective +Create a data-driven integration test suite using YAML files to validate the `csln-processor`'s rendering accuracy across various styles and reference types. + +## Requirements +- **YAML Test Format:** Define a schema that includes: + - `name`: Description of the test case. + - `style`: The CSL style configuration (YAML/JSON). + - `bibliography`: The input reference data. + - `citation`: The citation to be rendered. + - `expected`: The expected string output. +- **Test Runner:** A Rust test in the `processor` crate that iterates over all `.yaml` files in a dedicated test data directory. +- **Dynamic Execution:** The runner should dynamically load the style and data, execute the processor, and assert equality with the expected output. + +## Architecture +- **Crate:** `processor` +- **Test File:** `processor/tests/integration.rs` +- **Data Directory:** `processor/tests/data/*.yaml` diff --git a/conductor/workflow.md b/conductor/workflow.md new file mode 100644 index 0000000..6f9cfd8 --- /dev/null +++ b/conductor/workflow.md @@ -0,0 +1,333 @@ +# Project Workflow + +## Guiding Principles + +1. **The Plan is the Source of Truth:** All work must be tracked in `plan.md` +2. **The Tech Stack is Deliberate:** Changes to the tech stack must be documented in `tech-stack.md` *before* implementation +3. **Test-Driven Development:** Write unit tests before implementing functionality +4. **High Code Coverage:** Aim for >80% code coverage for all modules +5. **User Experience First:** Every decision should prioritize user experience +6. **Non-Interactive & CI-Aware:** Prefer non-interactive commands. Use `CI=true` for watch-mode tools (tests, linters) to ensure single execution. + +## Task Workflow + +All tasks follow a strict lifecycle: + +### Standard Task Workflow + +1. **Select Task:** Choose the next available task from `plan.md` in sequential order + +2. **Mark In Progress:** Before beginning work, edit `plan.md` and change the task from `[ ]` to `[~]` + +3. **Write Failing Tests (Red Phase):** + - Create a new test file for the feature or bug fix. + - Write one or more unit tests that clearly define the expected behavior and acceptance criteria for the task. + - **CRITICAL:** Run the tests and confirm that they fail as expected. This is the "Red" phase of TDD. Do not proceed until you have failing tests. + +4. **Implement to Pass Tests (Green Phase):** + - Write the minimum amount of application code necessary to make the failing tests pass. + - Run the test suite again and confirm that all tests now pass. This is the "Green" phase. + +5. **Refactor (Optional but Recommended):** + - With the safety of passing tests, refactor the implementation code and the test code to improve clarity, remove duplication, and enhance performance without changing the external behavior. + - Rerun tests to ensure they still pass after refactoring. + +6. **Verify Coverage:** Run coverage reports using the project's chosen tools. For example, in a Python project, this might look like: + ```bash + pytest --cov=app --cov-report=html + ``` + Target: >80% coverage for new code. The specific tools and commands will vary by language and framework. + +7. **Document Deviations:** If implementation differs from tech stack: + - **STOP** implementation + - Update `tech-stack.md` with new design + - Add dated note explaining the change + - Resume implementation + +8. **Commit Code Changes:** + - Stage all code changes related to the task. + - Propose a clear, concise commit message e.g, `feat(ui): Create basic HTML structure for calculator`. + - Perform the commit. + +9. **Attach Task Summary with Git Notes:** + - **Step 9.1: Get Commit Hash:** Obtain the hash of the *just-completed commit* (`git log -1 --format="%H"`). + - **Step 9.2: Draft Note Content:** Create a detailed summary for the completed task. This should include the task name, a summary of changes, a list of all created/modified files, and the core "why" for the change. + - **Step 9.3: Attach Note:** Use the `git notes` command to attach the summary to the commit. + ```bash + # The note content from the previous step is passed via the -m flag. + git notes add -m "" + ``` + +10. **Get and Record Task Commit SHA:** + - **Step 10.1: Update Plan:** Read `plan.md`, find the line for the completed task, update its status from `[~]` to `[x]`, and append the first 7 characters of the *just-completed commit's* commit hash. + - **Step 10.2: Write Plan:** Write the updated content back to `plan.md`. + +11. **Commit Plan Update:** + - **Action:** Stage the modified `plan.md` file. + - **Action:** Commit this change with a descriptive message (e.g., `conductor(plan): Mark task 'Create user model' as complete`). + +### Phase Completion Verification and Checkpointing Protocol + +**Trigger:** This protocol is executed immediately after a task is completed that also concludes a phase in `plan.md`. + +1. **Announce Protocol Start:** Inform the user that the phase is complete and the verification and checkpointing protocol has begun. + +2. **Ensure Test Coverage for Phase Changes:** + - **Step 2.1: Determine Phase Scope:** To identify the files changed in this phase, you must first find the starting point. Read `plan.md` to find the Git commit SHA of the *previous* phase's checkpoint. If no previous checkpoint exists, the scope is all changes since the first commit. + - **Step 2.2: List Changed Files:** Execute `git diff --name-only HEAD` to get a precise list of all files modified during this phase. + - **Step 2.3: Verify and Create Tests:** For each file in the list: + - **CRITICAL:** First, check its extension. Exclude non-code files (e.g., `.json`, `.md`, `.yaml`). + - For each remaining code file, verify a corresponding test file exists. + - If a test file is missing, you **must** create one. Before writing the test, **first, analyze other test files in the repository to determine the correct naming convention and testing style.** The new tests **must** validate the functionality described in this phase's tasks (`plan.md`). + +3. **Execute Automated Tests with Proactive Debugging:** + - Before execution, you **must** announce the exact shell command you will use to run the tests. + - **Example Announcement:** "I will now run the automated test suite to verify the phase. **Command:** `CI=true npm test`" + - Execute the announced command. + - If tests fail, you **must** inform the user and begin debugging. You may attempt to propose a fix a **maximum of two times**. If the tests still fail after your second proposed fix, you **must stop**, report the persistent failure, and ask the user for guidance. + +4. **Propose a Detailed, Actionable Manual Verification Plan:** + - **CRITICAL:** To generate the plan, first analyze `product.md`, `product-guidelines.md`, and `plan.md` to determine the user-facing goals of the completed phase. + - You **must** generate a step-by-step plan that walks the user through the verification process, including any necessary commands and specific, expected outcomes. + - The plan you present to the user **must** follow this format: + + **For a Frontend Change:** + ``` + The automated tests have passed. For manual verification, please follow these steps: + + **Manual Verification Steps:** + 1. **Start the development server with the command:** `npm run dev` + 2. **Open your browser to:** `http://localhost:3000` + 3. **Confirm that you see:** The new user profile page, with the user's name and email displayed correctly. + ``` + + **For a Backend Change:** + ``` + The automated tests have passed. For manual verification, please follow these steps: + + **Manual Verification Steps:** + 1. **Ensure the server is running.** + 2. **Execute the following command in your terminal:** `curl -X POST http://localhost:8080/api/v1/users -d '{"name": "test"}'` + 3. **Confirm that you receive:** A JSON response with a status of `201 Created`. + ``` + +5. **Await Explicit User Feedback:** + - After presenting the detailed plan, ask the user for confirmation: "**Does this meet your expectations? Please confirm with yes or provide feedback on what needs to be changed.**" + - **PAUSE** and await the user's response. Do not proceed without an explicit yes or confirmation. + +6. **Create Checkpoint Commit:** + - Stage all changes. If no changes occurred in this step, proceed with an empty commit. + - Perform the commit with a clear and concise message (e.g., `conductor(checkpoint): Checkpoint end of Phase X`). + +7. **Attach Auditable Verification Report using Git Notes:** + - **Step 7.1: Draft Note Content:** Create a detailed verification report including the automated test command, the manual verification steps, and the user's confirmation. + - **Step 7.2: Attach Note:** Use the `git notes` command and the full commit hash from the previous step to attach the full report to the checkpoint commit. + +8. **Get and Record Phase Checkpoint SHA:** + - **Step 8.1: Get Commit Hash:** Obtain the hash of the *just-created checkpoint commit* (`git log -1 --format="%H"`). + - **Step 8.2: Update Plan:** Read `plan.md`, find the heading for the completed phase, and append the first 7 characters of the commit hash in the format `[checkpoint: ]`. + - **Step 8.3: Write Plan:** Write the updated content back to `plan.md`. + +9. **Commit Plan Update:** + - **Action:** Stage the modified `plan.md` file. + - **Action:** Commit this change with a descriptive message following the format `conductor(plan): Mark phase '' as complete`. + +10. **Announce Completion:** Inform the user that the phase is complete and the checkpoint has been created, with the detailed verification report attached as a git note. + +### Quality Gates + +Before marking any task complete, verify: + +- [ ] All tests pass +- [ ] Code coverage meets requirements (>80%) +- [ ] Code follows project's code style guidelines (as defined in `code_styleguides/`) +- [ ] All public functions/methods are documented (e.g., docstrings, JSDoc, GoDoc) +- [ ] Type safety is enforced (e.g., type hints, TypeScript types, Go types) +- [ ] No linting or static analysis errors (using the project's configured tools) +- [ ] Works correctly on mobile (if applicable) +- [ ] Documentation updated if needed +- [ ] No security vulnerabilities introduced + +## Development Commands + +**AI AGENT INSTRUCTION: This section should be adapted to the project's specific language, framework, and build tools.** + +### Setup +```bash +# Example: Commands to set up the development environment (e.g., install dependencies, configure database) +# e.g., for a Node.js project: npm install +# e.g., for a Go project: go mod tidy +``` + +### Daily Development +```bash +# Example: Commands for common daily tasks (e.g., start dev server, run tests, lint, format) +# e.g., for a Node.js project: npm run dev, npm test, npm run lint +# e.g., for a Go project: go run main.go, go test ./..., go fmt ./... +``` + +### Before Committing +```bash +# Example: Commands to run all pre-commit checks (e.g., format, lint, type check, run tests) +# e.g., for a Node.js project: npm run check +# e.g., for a Go project: make check (if a Makefile exists) +``` + +## Testing Requirements + +### Unit Testing +- Every module must have corresponding tests. +- Use appropriate test setup/teardown mechanisms (e.g., fixtures, beforeEach/afterEach). +- Mock external dependencies. +- Test both success and failure cases. + +### Integration Testing +- Test complete user flows +- Verify database transactions +- Test authentication and authorization +- Check form submissions + +### Mobile Testing +- Test on actual iPhone when possible +- Use Safari developer tools +- Test touch interactions +- Verify responsive layouts +- Check performance on 3G/4G + +## Code Review Process + +### Self-Review Checklist +Before requesting review: + +1. **Functionality** + - Feature works as specified + - Edge cases handled + - Error messages are user-friendly + +2. **Code Quality** + - Follows style guide + - DRY principle applied + - Clear variable/function names + - Appropriate comments + +3. **Testing** + - Unit tests comprehensive + - Integration tests pass + - Coverage adequate (>80%) + +4. **Security** + - No hardcoded secrets + - Input validation present + - SQL injection prevented + - XSS protection in place + +5. **Performance** + - Database queries optimized + - Images optimized + - Caching implemented where needed + +6. **Mobile Experience** + - Touch targets adequate (44x44px) + - Text readable without zooming + - Performance acceptable on mobile + - Interactions feel native + +## Commit Guidelines + +### Message Format +``` +(): + +[optional body] + +[optional footer] +``` + +### Types +- `feat`: New feature +- `fix`: Bug fix +- `docs`: Documentation only +- `style`: Formatting, missing semicolons, etc. +- `refactor`: Code change that neither fixes a bug nor adds a feature +- `test`: Adding missing tests +- `chore`: Maintenance tasks + +### Examples +```bash +git commit -m "feat(auth): Add remember me functionality" +git commit -m "fix(posts): Correct excerpt generation for short posts" +git commit -m "test(comments): Add tests for emoji reaction limits" +git commit -m "style(mobile): Improve button touch targets" +``` + +## Definition of Done + +A task is complete when: + +1. All code implemented to specification +2. Unit tests written and passing +3. Code coverage meets project requirements +4. Documentation complete (if applicable) +5. Code passes all configured linting and static analysis checks +6. Works beautifully on mobile (if applicable) +7. Implementation notes added to `plan.md` +8. Changes committed with proper message +9. Git note with task summary attached to the commit + +## Emergency Procedures + +### Critical Bug in Production +1. Create hotfix branch from main +2. Write failing test for bug +3. Implement minimal fix +4. Test thoroughly including mobile +5. Deploy immediately +6. Document in plan.md + +### Data Loss +1. Stop all write operations +2. Restore from latest backup +3. Verify data integrity +4. Document incident +5. Update backup procedures + +### Security Breach +1. Rotate all secrets immediately +2. Review access logs +3. Patch vulnerability +4. Notify affected users (if any) +5. Document and update security procedures + +## Deployment Workflow + +### Pre-Deployment Checklist +- [ ] All tests passing +- [ ] Coverage >80% +- [ ] No linting errors +- [ ] Mobile testing complete +- [ ] Environment variables configured +- [ ] Database migrations ready +- [ ] Backup created + +### Deployment Steps +1. Merge feature branch to main +2. Tag release with version +3. Push to deployment service +4. Run database migrations +5. Verify deployment +6. Test critical paths +7. Monitor for errors + +### Post-Deployment +1. Monitor analytics +2. Check error logs +3. Gather user feedback +4. Plan next iteration + +## Continuous Improvement + +- Review workflow weekly +- Update based on pain points +- Document lessons learned +- Optimize for user happiness +- Keep things simple and maintainable