diff --git a/README.md b/README.md index 970e91f..b1ee6a8 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,15 @@ langcodec translate \ --provider openai \ --model gpt-5.4 +# Draft translations between single-language files too +langcodec translate \ + --source en.lproj/Localizable.strings \ + --target values-fr/strings.xml \ + --source-lang en \ + --target-lang fr \ + --provider openai \ + --model gpt-5.4 + # Generate translator-facing comments from source usage langcodec annotate \ --input Localizable.xcstrings \ @@ -94,6 +103,13 @@ langcodec annotate \ --source-root Modules \ --provider openai \ --model gpt-5.4 + +# Annotate Apple .strings or Android XML inline +langcodec annotate \ + --input en.lproj/Localizable.strings \ + --source-root Sources \ + --provider openai \ + --model gpt-5.4 ``` ## Packages @@ -115,7 +131,7 @@ langcodec annotate \ ## AI Workflows -`langcodec` is built for app localization workflows, not just isolated text snippets. `translate` and `annotate` can be driven from a shared `langcodec.toml`, use supported providers such as OpenAI, Anthropic, and Gemini, and scale from a single catalog to config-driven runs across larger repos. +`langcodec` is built for app localization workflows, not just isolated text snippets. `translate` and `annotate` can be driven from a shared `langcodec.toml`, use supported providers such as OpenAI, Anthropic, and Gemini, and scale from single-language files or `.xcstrings` catalogs to config-driven runs across larger repos. ```toml [openai] diff --git a/langcodec-cli/README.md b/langcodec-cli/README.md index 982f7c8..22a31d0 100644 --- a/langcodec-cli/README.md +++ b/langcodec-cli/README.md @@ -39,6 +39,7 @@ langcodec translate \ `translate` is built for app catalogs, not just raw text: - updates multi-language files like `.xcstrings` in place +- supports single-language Apple `.strings` and Android `strings.xml` files too - supports multiple target languages in one run - can prefill from Tolgee before using AI fallback - shows live progress with `--ui auto|plain|tui` @@ -56,7 +57,15 @@ langcodec annotate \ --model gpt-5.4 ``` -`annotate` looks through your codebase and writes better `.xcstrings` comments for translators while preserving manual comments. +`annotate` looks through your codebase and writes better translator comments for `.xcstrings`, Apple `.strings`, and Android `strings.xml` files while preserving manual comments. + +```sh +langcodec annotate \ + --input en.lproj/Localizable.strings \ + --source-root Sources \ + --provider openai \ + --model gpt-5.4 +``` ### Tolgee sync without a pile of project scripts diff --git a/langcodec-cli/src/annotate.rs b/langcodec-cli/src/annotate.rs index 8f6d3cd..6352eb3 100644 --- a/langcodec-cli/src/annotate.rs +++ b/langcodec-cli/src/annotate.rs @@ -10,8 +10,9 @@ use crate::{ }; use async_trait::async_trait; use langcodec::{ - Resource, Translation, - formats::{XcstringsFormat, xcstrings::Item}, + Codec, Entry, FormatType, ReadOptions, Resource, Translation, + formats::{AndroidStringsFormat, StringsFormat, XcstringsFormat}, + infer_format_from_extension, infer_language_from_path, traits::Parser, }; use mentra::{ @@ -36,7 +37,8 @@ use tokio::{ const DEFAULT_CONCURRENCY: usize = 4; const DEFAULT_TOOL_BUDGET: usize = 16; -const ANNOTATION_SYSTEM_PROMPT: &str = "You write translator-facing comments for Xcode xcstrings entries. Use the files tool or shell tool when needed to inspect source code. Prefer shell commands like rg for fast code search, then read the most relevant files before drafting. Prefer a short, concrete explanation of where or how the text is used so a translator can choose the right wording. If you are uncertain, say what the UI usage appears to be instead of inventing product meaning. Return JSON only with the shape {\"comment\":\"...\",\"confidence\":\"high|medium|low\"}."; +const GENERATED_COMMENT_MARKER: &str = "langcodec:auto-generated"; +const ANNOTATION_SYSTEM_PROMPT: &str = "You write translator-facing comments for application localization entries. Use the files tool or shell tool when needed to inspect source code. Prefer shell commands like rg for fast code search, then read the most relevant files before drafting. Prefer a short, concrete explanation of where or how the text is used so a translator can choose the right wording. If you are uncertain, say what the UI usage appears to be instead of inventing product meaning. Return JSON only with the shape {\"comment\":\"...\",\"confidence\":\"high|medium|low\"}."; #[derive(Debug, Clone)] pub struct AnnotateOptions { @@ -83,6 +85,29 @@ struct AnnotationResponse { confidence: String, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum AnnotationFormat { + Xcstrings, + Strings, + AndroidStrings, +} + +impl AnnotationFormat { + fn to_format_type(self) -> FormatType { + match self { + Self::Xcstrings => FormatType::Xcstrings, + Self::Strings => FormatType::Strings(None), + Self::AndroidStrings => FormatType::AndroidStrings(None), + } + } +} + +#[derive(Debug, Clone)] +struct AnnotationTarget { + key: String, + existing_comment: Option, +} + enum WorkerUpdate { Started { worker_id: usize, @@ -191,25 +216,29 @@ fn run_annotate_with_backend( opts: ResolvedAnnotateOptions, backend: Arc, ) -> Result<(), String> { - let mut catalog = XcstringsFormat::read_from(&opts.input) - .map_err(|e| format!("Failed to read '{}': {}", opts.input, e))?; - let resources = Vec::::try_from(catalog.clone()) - .map_err(|e| format!("Failed to decode xcstrings '{}': {}", opts.input, e))?; - + let annotation_format = annotation_format_from_path(&opts.input)?; + let mut codec = read_annotation_codec(&opts.input, annotation_format)?; let source_lang = opts .source_lang .clone() - .unwrap_or_else(|| catalog.source_language.clone()); + .or_else(|| default_source_language(&codec)) + .ok_or_else(|| { + format!( + "Could not infer source language for '{}'; pass --source-lang", + opts.input + ) + })?; validate_language_code(&source_lang)?; - let source_values = source_value_map(&resources, &source_lang); + let source_values = source_value_map(&codec.resources, &source_lang); let requests = build_annotation_requests( - &catalog, + &codec, + annotation_format, &source_lang, &source_values, &opts.source_roots, &opts.workspace_root, - )?; + ); if requests.is_empty() { println!("No entries require annotation updates."); @@ -230,28 +259,20 @@ fn run_annotate_with_backend( opts.concurrency ), }); - let results = annotate_requests(requests, backend, opts.concurrency, &mut *reporter); + let results = annotate_requests(requests.clone(), backend, opts.concurrency, &mut *reporter); let results = results?; let mut changed = 0usize; let mut unmatched = 0usize; - let mut keys = catalog.strings.keys().cloned().collect::>(); - keys.sort(); - for key in keys { - let Some(item) = catalog.strings.get_mut(&key) else { - continue; - }; - if should_preserve_manual_comment(item) { - continue; - } - - match results.get(&key) { + for request in &requests { + match results.get(&request.key) { Some(Some(annotation)) => { - if item.comment.as_deref() != Some(annotation.comment.as_str()) - || item.is_comment_auto_generated != Some(true) - { - item.comment = Some(annotation.comment.clone()); - item.is_comment_auto_generated = Some(true); + if apply_annotation( + &mut codec, + annotation_format, + &request.key, + &annotation.comment, + )? { changed += 1; } } @@ -306,7 +327,7 @@ fn run_annotate_with_backend( tone: DashboardLogTone::Info, message: format!("Writing {}", opts.output), }); - if let Err(err) = catalog.write_to(&opts.output) { + if let Err(err) = write_annotated_codec(&codec, annotation_format, &opts.output) { let err = format!("Failed to write '{}': {}", opts.output, err); reporter.emit(DashboardEvent::Log { tone: DashboardLogTone::Error, @@ -466,6 +487,7 @@ fn resolve_annotate_options( } else { input.clone() }; + validate_annotate_paths(&input, &output)?; let concurrency = opts .concurrency @@ -513,6 +535,57 @@ fn resolve_annotate_options( }) } +fn validate_annotate_paths(input: &str, output: &str) -> Result<(), String> { + let input_format = annotation_format_from_path(input)?; + let output_format = annotation_format_from_path(output)?; + if input_format != output_format { + return Err(format!( + "Annotate output format must match input format (input='{}', output='{}')", + input, output + )); + } + Ok(()) +} + +fn annotation_format_from_path(path: &str) -> Result { + match infer_format_from_extension(path) + .ok_or_else(|| format!("Cannot infer annotate format from path: {}", path))? + { + FormatType::Xcstrings => Ok(AnnotationFormat::Xcstrings), + FormatType::Strings(_) => Ok(AnnotationFormat::Strings), + FormatType::AndroidStrings(_) => Ok(AnnotationFormat::AndroidStrings), + _ => Err(format!( + "annotate supports only .xcstrings, .strings, and Android strings.xml files, got '{}'", + path + )), + } +} + +fn read_annotation_codec(path: &str, format: AnnotationFormat) -> Result { + let format_type = format.to_format_type(); + let language_hint = infer_language_from_path(path, &format_type).ok().flatten(); + let mut codec = Codec::new(); + codec + .read_file_by_extension_with_options( + path, + &ReadOptions::new().with_language_hint(language_hint), + ) + .map_err(|e| format!("Failed to read '{}': {}", path, e))?; + Ok(codec) +} + +fn default_source_language(codec: &Codec) -> Option { + codec + .resources + .iter() + .find_map(|resource| resource.metadata.custom.get("source_language").cloned()) + .or_else(|| { + (codec.resources.len() == 1) + .then(|| codec.resources[0].metadata.language.trim().to_string()) + .filter(|lang| !lang.is_empty()) + }) +} + fn annotate_requests( requests: Vec, backend: Arc, @@ -699,34 +772,25 @@ fn annotate_requests( } fn build_annotation_requests( - catalog: &XcstringsFormat, + codec: &Codec, + annotation_format: AnnotationFormat, source_lang: &str, source_values: &HashMap, source_roots: &[String], workspace_root: &Path, -) -> Result, String> { - let mut keys = catalog.strings.keys().cloned().collect::>(); - keys.sort(); - +) -> Vec { let mut requests = Vec::new(); - for key in keys { - let Some(item) = catalog.strings.get(&key) else { - continue; - }; - if should_preserve_manual_comment(item) { - continue; - } - + for target in collect_annotation_targets(codec, annotation_format) { let source_value = source_values - .get(&key) + .get(&target.key) .cloned() - .unwrap_or_else(|| key.clone()); + .unwrap_or_else(|| target.key.clone()); requests.push(AnnotationRequest { - key, + key: target.key, source_lang: source_lang.to_string(), source_value, - existing_comment: item.comment.clone(), + existing_comment: target.existing_comment, source_roots: source_roots .iter() .map(|root| display_path(workspace_root, Path::new(root))) @@ -734,11 +798,231 @@ fn build_annotation_requests( }); } - Ok(requests) + requests } -fn should_preserve_manual_comment(item: &Item) -> bool { - item.comment.is_some() && item.is_comment_auto_generated != Some(true) +fn collect_annotation_targets( + codec: &Codec, + annotation_format: AnnotationFormat, +) -> Vec { + let mut targets = BTreeMap::::new(); + let mut preserve_manual = BTreeMap::::new(); + + for resource in &codec.resources { + for entry in &resource.entries { + let key = entry.id.clone(); + let target = targets + .entry(key.clone()) + .or_insert_with(|| AnnotationTarget { + key: key.clone(), + existing_comment: None, + }); + + if target.existing_comment.is_none() { + target.existing_comment = display_comment(annotation_format, entry); + } + + if should_preserve_manual_comment(annotation_format, entry) { + preserve_manual.insert(key, true); + } + } + } + + targets + .into_iter() + .filter_map(|(key, target)| { + (!preserve_manual.get(&key).copied().unwrap_or(false)).then_some(target) + }) + .collect() +} + +fn should_preserve_manual_comment(annotation_format: AnnotationFormat, entry: &Entry) -> bool { + let Some(raw_comment) = entry.comment.as_deref() else { + return false; + }; + + match annotation_format { + AnnotationFormat::Xcstrings => !entry + .custom + .get("is_comment_auto_generated") + .and_then(|value| value.parse::().ok()) + .unwrap_or(false), + AnnotationFormat::Strings | AnnotationFormat::AndroidStrings => { + !is_generated_inline_comment(annotation_format, raw_comment) + } + } +} + +fn display_comment(annotation_format: AnnotationFormat, entry: &Entry) -> Option { + let raw_comment = entry.comment.as_deref()?; + let comment = match annotation_format { + AnnotationFormat::Xcstrings => raw_comment.trim().to_string(), + AnnotationFormat::Strings => normalize_strings_comment(raw_comment), + AnnotationFormat::AndroidStrings => normalize_inline_comment(raw_comment), + }; + + (!comment.is_empty()).then_some(comment) +} + +fn normalize_strings_comment(raw_comment: &str) -> String { + let stripped = if raw_comment.starts_with("/*") && raw_comment.ends_with("*/") { + raw_comment[2..raw_comment.len() - 2].trim() + } else if let Some(comment) = raw_comment.strip_prefix("//") { + comment.trim() + } else { + raw_comment.trim() + }; + + extract_generated_comment_body(stripped) + .unwrap_or(stripped) + .trim() + .to_string() +} + +fn normalize_inline_comment(raw_comment: &str) -> String { + let trimmed = raw_comment.trim(); + extract_generated_comment_body(trimmed) + .unwrap_or(trimmed) + .trim() + .to_string() +} + +fn extract_generated_comment_body(comment: &str) -> Option<&str> { + let trimmed = comment.trim(); + if trimmed == GENERATED_COMMENT_MARKER { + return Some(""); + } + + trimmed + .strip_prefix(GENERATED_COMMENT_MARKER) + .map(str::trim_start) +} + +fn is_generated_inline_comment(annotation_format: AnnotationFormat, raw_comment: &str) -> bool { + match annotation_format { + AnnotationFormat::Xcstrings => false, + AnnotationFormat::Strings => { + extract_generated_comment_body(&normalize_strings_comment_storage(raw_comment)) + .is_some() + } + AnnotationFormat::AndroidStrings => extract_generated_comment_body(raw_comment).is_some(), + } +} + +fn normalize_strings_comment_storage(raw_comment: &str) -> String { + if raw_comment.starts_with("/*") && raw_comment.ends_with("*/") { + raw_comment[2..raw_comment.len() - 2].trim().to_string() + } else if let Some(comment) = raw_comment.strip_prefix("//") { + comment.trim().to_string() + } else { + raw_comment.trim().to_string() + } +} + +fn generated_comment_storage(annotation_format: AnnotationFormat, comment: &str) -> String { + match annotation_format { + AnnotationFormat::Xcstrings => comment.to_string(), + AnnotationFormat::Strings => { + let body = comment.replace("*/", "* /").trim().to_string(); + format!("/* {}\n{} */", GENERATED_COMMENT_MARKER, body) + } + AnnotationFormat::AndroidStrings => { + format!("{}\n{}", GENERATED_COMMENT_MARKER, comment.trim()) + } + } +} + +fn apply_annotation( + codec: &mut Codec, + annotation_format: AnnotationFormat, + key: &str, + comment: &str, +) -> Result { + let stored_comment = generated_comment_storage(annotation_format, comment); + let mut changed = false; + let mut matched = false; + + for resource in &mut codec.resources { + for entry in &mut resource.entries { + if entry.id != key { + continue; + } + + matched = true; + match annotation_format { + AnnotationFormat::Xcstrings => { + let already_generated = entry + .custom + .get("is_comment_auto_generated") + .and_then(|value| value.parse::().ok()) + .unwrap_or(false); + if entry.comment.as_deref() != Some(comment) || !already_generated { + changed = true; + } + entry.comment = Some(comment.to_string()); + entry + .custom + .insert("is_comment_auto_generated".to_string(), "true".to_string()); + } + AnnotationFormat::Strings | AnnotationFormat::AndroidStrings => { + if entry.comment.as_deref() != Some(stored_comment.as_str()) { + changed = true; + } + entry.comment = Some(stored_comment.clone()); + } + } + } + } + + if !matched { + return Err(format!( + "Annotation target '{}' was not found in loaded resources", + key + )); + } + + Ok(changed) +} + +fn write_annotated_codec( + codec: &Codec, + annotation_format: AnnotationFormat, + output: &str, +) -> Result<(), String> { + match annotation_format { + AnnotationFormat::Xcstrings => XcstringsFormat::try_from(codec.resources.clone()) + .map_err(|e| format!("Failed to build xcstrings output: {}", e))? + .write_to(output) + .map_err(|e| e.to_string()), + AnnotationFormat::Strings => { + let resource = single_resource_for_annotation(codec, output)?; + StringsFormat::try_from(resource.clone()) + .map_err(|e| format!("Failed to build .strings output: {}", e))? + .write_to(output) + .map_err(|e| e.to_string()) + } + AnnotationFormat::AndroidStrings => { + let resource = single_resource_for_annotation(codec, output)?; + AndroidStringsFormat::from(resource.clone()) + .write_to(output) + .map_err(|e| e.to_string()) + } + } +} + +fn single_resource_for_annotation<'a>( + codec: &'a Codec, + output: &str, +) -> Result<&'a Resource, String> { + if codec.resources.len() != 1 { + return Err(format!( + "Expected exactly one resource when writing '{}', found {}", + output, + codec.resources.len() + )); + } + + Ok(&codec.resources[0]) } fn create_annotate_reporter( @@ -890,7 +1174,7 @@ fn build_agent_config(workspace_root: &Path) -> AgentConfig { fn build_annotation_prompt(request: &AnnotationRequest) -> String { let mut prompt = format!( - "Write one translator-facing comment for this xcstrings entry.\n\nKey: {}\nSource language: {}\nSource value: {}\n", + "Write one translator-facing comment for this localization entry.\n\nKey: {}\nSource language: {}\nSource value: {}\n", request.key, request.source_lang, request.source_value ); @@ -1325,6 +1609,186 @@ mod tests { ); } + #[test] + fn run_annotate_supports_apple_strings_files() { + let temp_dir = TempDir::new().expect("temp dir"); + let input_dir = temp_dir.path().join("en.lproj"); + let input = input_dir.join("Localizable.strings"); + let source_root = temp_dir.path().join("Sources"); + fs::create_dir_all(&input_dir).expect("create input dir"); + fs::create_dir_all(&source_root).expect("create root"); + fs::write( + &input, + r#"/* Written by a human. */ +"cancel" = "Cancel"; +"start" = "Start"; +/* langcodec:auto-generated +Old auto comment */ +"retry" = "Retry"; +"#, + ) + .expect("write strings"); + + let mut responses = HashMap::new(); + responses.insert( + "start".to_string(), + Some(AnnotationResponse { + comment: "A button label that starts the game.".to_string(), + confidence: "high".to_string(), + }), + ); + responses.insert( + "retry".to_string(), + Some(AnnotationResponse { + comment: "A button label shown when the user can try the action again.".to_string(), + confidence: "high".to_string(), + }), + ); + + let opts = ResolvedAnnotateOptions { + input: input.to_string_lossy().to_string(), + output: input.to_string_lossy().to_string(), + source_roots: vec![source_root.to_string_lossy().to_string()], + source_lang: Some("en".to_string()), + provider: ProviderKind::OpenAI, + model: "test-model".to_string(), + concurrency: 1, + dry_run: false, + check: false, + workspace_root: temp_dir.path().to_path_buf(), + ui_mode: ResolvedUiMode::Plain, + }; + + run_annotate_with_backend(opts, Arc::new(FakeBackend { responses })) + .expect("annotate strings"); + + let format = StringsFormat::read_from(&input).expect("read strings output"); + let mut comments = HashMap::new(); + for pair in format.pairs { + let key = pair.key.clone(); + comments.insert( + key, + pair.comment + .as_deref() + .map(normalize_strings_comment) + .unwrap_or_default(), + ); + } + + assert_eq!( + comments.get("start").map(String::as_str), + Some("A button label that starts the game.") + ); + assert_eq!( + comments.get("retry").map(String::as_str), + Some("A button label shown when the user can try the action again.") + ); + assert_eq!( + comments.get("cancel").map(String::as_str), + Some("Written by a human.") + ); + + let written = fs::read_to_string(&input).expect("read written strings"); + assert!(written.contains("langcodec:auto-generated")); + } + + #[test] + fn run_annotate_supports_android_strings_files() { + let temp_dir = TempDir::new().expect("temp dir"); + let values_dir = temp_dir.path().join("values"); + let input = values_dir.join("strings.xml"); + let source_root = temp_dir.path().join("Sources"); + fs::create_dir_all(&values_dir).expect("create values dir"); + fs::create_dir_all(&source_root).expect("create root"); + fs::write( + &input, + r#" + +Cancel +Start + +Retry + +One apple +%d apples + + +"#, + ) + .expect("write xml"); + + let mut responses = HashMap::new(); + responses.insert( + "start".to_string(), + Some(AnnotationResponse { + comment: "A button label that starts the game.".to_string(), + confidence: "high".to_string(), + }), + ); + responses.insert( + "retry".to_string(), + Some(AnnotationResponse { + comment: "A button label shown when the user can try the action again.".to_string(), + confidence: "high".to_string(), + }), + ); + responses.insert( + "apples".to_string(), + Some(AnnotationResponse { + comment: "Pluralized inventory count for apples.".to_string(), + confidence: "high".to_string(), + }), + ); + + let opts = ResolvedAnnotateOptions { + input: input.to_string_lossy().to_string(), + output: input.to_string_lossy().to_string(), + source_roots: vec![source_root.to_string_lossy().to_string()], + source_lang: Some("en".to_string()), + provider: ProviderKind::OpenAI, + model: "test-model".to_string(), + concurrency: 1, + dry_run: false, + check: false, + workspace_root: temp_dir.path().to_path_buf(), + ui_mode: ResolvedUiMode::Plain, + }; + + run_annotate_with_backend(opts, Arc::new(FakeBackend { responses })) + .expect("annotate android"); + + let format = AndroidStringsFormat::read_from(&input).expect("read android output"); + let mut string_comments = HashMap::new(); + for item in format.strings { + string_comments.insert(item.name, item.comment.unwrap_or_default()); + } + let mut plural_comments = HashMap::new(); + for item in format.plurals { + plural_comments.insert(item.name, item.comment.unwrap_or_default()); + } + + assert_eq!( + normalize_inline_comment(string_comments["start"].as_str()), + "A button label that starts the game." + ); + assert_eq!( + normalize_inline_comment(string_comments["retry"].as_str()), + "A button label shown when the user can try the action again." + ); + assert_eq!( + normalize_inline_comment(string_comments["cancel"].as_str()), + "Written by a human." + ); + assert_eq!( + normalize_inline_comment(plural_comments["apples"].as_str()), + "Pluralized inventory count for apples." + ); + + let written = fs::read_to_string(&input).expect("read written xml"); + assert!(written.contains("langcodec:auto-generated")); + } + #[test] fn run_annotate_dry_run_does_not_write_changes() { let temp_dir = TempDir::new().expect("temp dir"); diff --git a/langcodec-cli/src/main.rs b/langcodec-cli/src/main.rs index 968090d..30aa0a0 100644 --- a/langcodec-cli/src/main.rs +++ b/langcodec-cli/src/main.rs @@ -330,21 +330,21 @@ enum Commands { ui_mode: UiMode, }, - /// Generate translator-facing xcstrings comments from source usage with a Mentra agent. + /// Generate translator-facing localization comments from source usage with a Mentra agent. Annotate { - /// Xcode string catalog to annotate. Required unless configured in `langcodec.toml`. + /// Localization file to annotate (`.xcstrings`, `.strings`, or Android `strings.xml`). Required unless configured in `langcodec.toml`. #[arg(short, long)] input: Option, - /// Swift source roots to scan and expose to the agent. Repeat for multiple roots. + /// Source roots to scan and expose to the agent. Repeat for multiple roots. #[arg(long = "source-root")] source_roots: Vec, - /// Optional output file. Defaults to writing back to the input catalog. + /// Optional output file in the same format as the input. Defaults to writing back to the input file. #[arg(short, long)] output: Option, - /// Override the source language used to resolve source values from the catalog. + /// Override the source language used to resolve source values from the input file. #[arg(long)] source_lang: Option, diff --git a/langcodec-cli/src/translate.rs b/langcodec-cli/src/translate.rs index 9fa44b9..fea27c2 100644 --- a/langcodec-cli/src/translate.rs +++ b/langcodec-cli/src/translate.rs @@ -1869,6 +1869,68 @@ cp "{payload_path}" "$pull_path/$namespace/Localizable.xcstrings" assert!(written.contains("\"bye\" = \"Au revoir\";")); } + #[test] + fn translates_strings_source_into_android_target_file() { + let temp_dir = TempDir::new().unwrap(); + let source = temp_dir.path().join("en.strings"); + let target_dir = temp_dir.path().join("values-fr"); + let target = target_dir.join("strings.xml"); + fs::create_dir_all(&target_dir).unwrap(); + fs::write( + &source, + "\"welcome\" = \"Welcome\";\n\"bye\" = \"Goodbye\";\n", + ) + .unwrap(); + + let prepared = prepare_translation(&base_options(&source, Some(&target))).unwrap(); + let outcome = run_prepared_translation( + prepared, + Some(Arc::new(MockBackend::new(vec![ + (("welcome", "fr"), Ok("Bienvenue".to_string())), + (("bye", "fr"), Ok("Au revoir".to_string())), + ]))), + ) + .unwrap(); + + assert_eq!(outcome.translated, 2); + let written = fs::read_to_string(&target).unwrap(); + assert!(written.contains("Bienvenue")); + assert!(written.contains("Au revoir")); + } + + #[test] + fn translates_android_source_into_strings_target_file() { + let temp_dir = TempDir::new().unwrap(); + let source_dir = temp_dir.path().join("values"); + let source = source_dir.join("strings.xml"); + let target = temp_dir.path().join("fr.strings"); + fs::create_dir_all(&source_dir).unwrap(); + fs::write( + &source, + r#" +Welcome +Goodbye + +"#, + ) + .unwrap(); + + let prepared = prepare_translation(&base_options(&source, Some(&target))).unwrap(); + let outcome = run_prepared_translation( + prepared, + Some(Arc::new(MockBackend::new(vec![ + (("welcome", "fr"), Ok("Bienvenue".to_string())), + (("bye", "fr"), Ok("Au revoir".to_string())), + ]))), + ) + .unwrap(); + + assert_eq!(outcome.translated, 2); + let written = fs::read_to_string(&target).unwrap(); + assert!(written.contains("\"welcome\" = \"Bienvenue\";")); + assert!(written.contains("\"bye\" = \"Au revoir\";")); + } + #[test] fn dry_run_does_not_write_target() { let temp_dir = TempDir::new().unwrap(); diff --git a/langcodec-cli/tests/annotate_cli_tests.rs b/langcodec-cli/tests/annotate_cli_tests.rs index 91045cc..ef477ef 100644 --- a/langcodec-cli/tests/annotate_cli_tests.rs +++ b/langcodec-cli/tests/annotate_cli_tests.rs @@ -24,6 +24,8 @@ fn test_annotate_help_mentions_source_root_flag() { assert!(stdout.contains("--source-root")); assert!(stdout.contains("--check")); assert!(stdout.contains("--ui")); + assert!(stdout.contains(".strings")); + assert!(stdout.contains("strings.xml")); } #[test] diff --git a/langcodec/src/formats/android_strings.rs b/langcodec/src/formats/android_strings.rs index dc7e941..89f4f1a 100644 --- a/langcodec/src/formats/android_strings.rs +++ b/langcodec/src/formats/android_strings.rs @@ -39,17 +39,23 @@ impl Parser for Format { let mut buf = Vec::new(); let mut string_resources = Vec::new(); let mut plural_resources: Vec = Vec::new(); + let mut pending_comment: Option = None; loop { match xml_reader.read_event_into(&mut buf) { Ok(Event::Start(ref e)) if e.name().as_ref() == b"string" => { - let sr = parse_string_resource(e, &mut xml_reader)?; + let mut sr = parse_string_resource(e, &mut xml_reader)?; + sr.comment = pending_comment.take(); string_resources.push(sr); } Ok(Event::Start(ref e)) if e.name().as_ref() == b"plurals" => { - let pr = parse_plurals_resource(e, &mut xml_reader)?; + let mut pr = parse_plurals_resource(e, &mut xml_reader)?; + pr.comment = pending_comment.take(); plural_resources.push(pr); } + Ok(Event::Comment(comment)) => { + pending_comment = Some(parse_xml_comment(comment.as_ref())); + } Ok(Event::Eof) => break, Ok(_) => {} Err(e) => return Err(Error::XmlParse(e)), @@ -75,6 +81,7 @@ impl Parser for Format { xml_writer.write_event(Event::Text(BytesText::new("\n")))?; for sr in &self.strings { + write_xml_comment(&mut xml_writer, sr.comment.as_deref())?; let mut elem = BytesStart::new("string"); elem.push_attribute(("name", sr.name.as_str())); if let Some(trans) = sr.translatable { @@ -89,6 +96,7 @@ impl Parser for Format { // Write plurals for pr in &self.plurals { + write_xml_comment(&mut xml_writer, pr.comment.as_deref())?; let mut elem = BytesStart::new("plurals"); elem.push_attribute(("name", pr.name.as_str())); if let Some(trans) = pr.translatable { @@ -151,6 +159,7 @@ impl From for Format { plurals.push(PluralsResource { name: entry.id, items, + comment: entry.comment, translatable: match entry.status { EntryStatus::Translated => Some(true), EntryStatus::DoNotTranslate => Some(false), @@ -199,7 +208,7 @@ impl From for Resource { entries.push(Entry { id: pr.name.clone(), value: Translation::Plural(Plural { id: pr.name, forms }), - comment: None, + comment: pr.comment, status, custom: HashMap::new(), }); @@ -221,6 +230,7 @@ pub struct StringResource { pub name: String, pub value: String, pub translatable: Option, + pub comment: Option, } impl StringResource { @@ -229,6 +239,7 @@ impl StringResource { name, value, translatable, + comment, } = self; let is_value_empty = value.is_empty(); @@ -236,7 +247,7 @@ impl StringResource { Entry { id: name, value: Translation::Singular(value), - comment: None, + comment, status: match translatable { Some(true) => EntryStatus::Translated, Some(false) => EntryStatus::DoNotTranslate, @@ -255,6 +266,7 @@ impl StringResource { Translation::Singular(v) => v.clone(), Translation::Plural(_) => String::new(), // Plurals not supported in strings.xml }, + comment: entry.comment.clone(), translatable: match entry.status { EntryStatus::Translated => Some(true), EntryStatus::DoNotTranslate => Some(false), @@ -276,6 +288,7 @@ pub struct PluralsResource { pub name: String, pub items: Vec, pub translatable: Option, + pub comment: Option, } fn parse_string_resource( @@ -334,6 +347,7 @@ fn parse_string_resource( name, value, translatable, + comment: None, }) } @@ -411,9 +425,37 @@ fn parse_plurals_resource( name, items, translatable, + comment: None, }) } +fn parse_xml_comment(raw: &[u8]) -> String { + String::from_utf8_lossy(raw).trim().to_string() +} + +fn sanitize_xml_comment(comment: &str) -> String { + let mut sanitized = comment.replace("--", "- -"); + if sanitized.ends_with('-') { + sanitized.push(' '); + } + sanitized +} + +fn write_xml_comment( + xml_writer: &mut Writer, + comment: Option<&str>, +) -> Result<(), Error> { + let Some(comment) = comment.map(str::trim).filter(|comment| !comment.is_empty()) else { + return Ok(()); + }; + + xml_writer.write_event(Event::Comment(BytesText::new(&sanitize_xml_comment( + comment, + ))))?; + xml_writer.write_event(Event::Text(BytesText::new("\n")))?; + Ok(()) +} + #[cfg(test)] mod tests { @@ -548,6 +590,58 @@ World } } + #[test] + fn test_parse_and_round_trip_entry_comments() { + let xml = r#" + + + Hi + + + One apple + %d apples + + + "#; + + let format = Format::from_str(xml).unwrap(); + assert_eq!( + format.strings[0].comment.as_deref(), + Some("Greeting shown on the start screen.") + ); + assert_eq!( + format.plurals[0].comment.as_deref(), + Some("Pluralized inventory count for apples.") + ); + + let resource = Resource::from(format); + assert_eq!( + resource.find_entry("greet").unwrap().comment.as_deref(), + Some("Greeting shown on the start screen.") + ); + assert_eq!( + resource.find_entry("apples").unwrap().comment.as_deref(), + Some("Pluralized inventory count for apples.") + ); + + let round_trip = Format::from(resource); + let mut out = Vec::new(); + round_trip.to_writer(&mut out).unwrap(); + let out_str = String::from_utf8(out).unwrap(); + assert!(out_str.contains("")); + assert!(out_str.contains("")); + + let reparsed = Format::from_str(&out_str).unwrap(); + assert_eq!( + reparsed.strings[0].comment.as_deref(), + Some("Greeting shown on the start screen.") + ); + assert_eq!( + reparsed.plurals[0].comment.as_deref(), + Some("Pluralized inventory count for apples.") + ); + } + #[test] fn test_entry_with_empty_value_status_new() { let xml = r#"