From c1ccd432a9aed3ecc63708e93f7e11fa7aaeedb3 Mon Sep 17 00:00:00 2001 From: arkptz Date: Wed, 27 May 2026 20:06:15 +0300 Subject: [PATCH 01/19] test: add snapshot test pinning current generate output --- tests/expected/snapshot_baseline.yaml | 98 ++++++++++++++++++++++++++ tests/fixtures/snapshot_input.har | 5 ++ tests/fixtures/snapshot_templates.yaml | 3 + tests/snapshot_compat.rs | 87 +++++++++++++++++++++++ 4 files changed, 193 insertions(+) create mode 100644 tests/expected/snapshot_baseline.yaml create mode 100644 tests/fixtures/snapshot_input.har create mode 100644 tests/fixtures/snapshot_templates.yaml create mode 100644 tests/snapshot_compat.rs diff --git a/tests/expected/snapshot_baseline.yaml b/tests/expected/snapshot_baseline.yaml new file mode 100644 index 0000000..673a280 --- /dev/null +++ b/tests/expected/snapshot_baseline.yaml @@ -0,0 +1,98 @@ +openapi: '3.0.3' +info: + title: api.example.com API + version: 1.0.0 +servers: +- url: https://api.example.com +paths: + /api/v1/users: + get: + tags: + - api + summary: GET /api/v1/users + responses: + '200': + description: OK + content: + application/json: + schema: + type: object + properties: + items: + type: array + items: + type: object + properties: + id: + type: integer + name: + type: string + examples: + response_1: + value: + items: + - id: 1 + name: Alice + post: + tags: + - api + summary: POST /api/v1/users + requestBody: + content: + application/json: + schema: + type: object + properties: + name: + type: string + examples: + Bob: + value: + name: Bob + required: true + responses: + '201': + description: Created + content: + application/json: + schema: + type: object + properties: + id: + type: integer + name: + type: string + examples: + Bob: + value: + id: 2 + name: Bob + /api/v1/users/{id}: + get: + tags: + - api + summary: GET /api/v1/users/{id} + parameters: + - in: path + name: id + required: true + schema: + type: string + style: simple + responses: + '200': + description: OK + content: + application/json: + schema: + type: object + properties: + id: + type: integer + name: + type: string + examples: + Charlie: + value: + id: 42 + name: Charlie diff --git a/tests/fixtures/snapshot_input.har b/tests/fixtures/snapshot_input.har new file mode 100644 index 0000000..26ddd76 --- /dev/null +++ b/tests/fixtures/snapshot_input.har @@ -0,0 +1,5 @@ +{"log":{"version":"1.2","creator":{"name":"test","version":"1.0"},"entries":[ + {"startedDateTime":"2025-01-15T10:00:00.000Z","time":100,"request":{"method":"GET","url":"https://api.example.com/api/v1/users","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":50,"mimeType":"application/json","text":"{\"items\": [{\"id\": 1, \"name\": \"Alice\"}]}"},"redirectURL":"","headersSize":-1,"bodySize":50},"cache":{},"timings":{"send":1,"wait":90,"receive":9}}, + {"startedDateTime":"2025-01-15T10:00:01.000Z","time":100,"request":{"method":"POST","url":"https://api.example.com/api/v1/users","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"queryString":[],"headersSize":-1,"bodySize":30,"postData":{"mimeType":"application/json","text":"{\"name\": \"Bob\"}"}},"response":{"status":201,"statusText":"Created","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":30,"mimeType":"application/json","text":"{\"id\": 2, \"name\": \"Bob\"}"},"redirectURL":"","headersSize":-1,"bodySize":30},"cache":{},"timings":{"send":1,"wait":90,"receive":9}}, + {"startedDateTime":"2025-01-15T10:00:02.000Z","time":100,"request":{"method":"GET","url":"https://api.example.com/api/v1/users/42","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":35,"mimeType":"application/json","text":"{\"id\": 42, \"name\": \"Charlie\"}"},"redirectURL":"","headersSize":-1,"bodySize":35},"cache":{},"timings":{"send":1,"wait":90,"receive":9}} +]}} diff --git a/tests/fixtures/snapshot_templates.yaml b/tests/fixtures/snapshot_templates.yaml new file mode 100644 index 0000000..770db64 --- /dev/null +++ b/tests/fixtures/snapshot_templates.yaml @@ -0,0 +1,3 @@ +x-path-templates: + - /api/v1/users + - /api/v1/users/{id} diff --git a/tests/snapshot_compat.rs b/tests/snapshot_compat.rs new file mode 100644 index 0000000..77ef373 --- /dev/null +++ b/tests/snapshot_compat.rs @@ -0,0 +1,87 @@ +use assert_cmd::Command; +use tempfile::TempDir; + +fn har_fixture(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(name) +} + +fn expected_file(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("expected") + .join(name) +} + +const PREFIX: &str = "https://api.example.com"; + +#[test] +fn snapshot_compat() { + let dir = TempDir::new().unwrap(); + let output = dir.path().join("snapshot_output.yaml"); + + std::fs::create_dir_all( + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("expected"), + ) + .unwrap(); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + har_fixture("snapshot_input.har").to_str().unwrap(), + "-t", + har_fixture("snapshot_templates.yaml").to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + ]) + .assert() + .success(); + + let actual = std::fs::read_to_string(&output).unwrap(); + let baseline_path = expected_file("snapshot_baseline.yaml"); + let expected = std::fs::read_to_string(&baseline_path).unwrap_or_else(|e| { + panic!( + "Could not read baseline file {}: {}", + baseline_path.display(), + e + ) + }); + + if actual != expected { + let actual_lines: Vec<&str> = actual.lines().collect(); + let expected_lines: Vec<&str> = expected.lines().collect(); + let max_lines = actual_lines.len().max(expected_lines.len()); + + eprintln!("=== SNAPSHOT DIFF (expected vs actual) ==="); + for i in 0..max_lines { + let exp_line = expected_lines.get(i).copied().unwrap_or(""); + let act_line = actual_lines.get(i).copied().unwrap_or(""); + if exp_line != act_line { + eprintln!("Line {:>4}: expected: {:?}", i + 1, exp_line); + eprintln!(" actual: {:?}", act_line); + } + } + eprintln!("=== END DIFF ==="); + panic!( + "Snapshot mismatch: generate output differs from baseline.\n\ + If this change is intentional, regenerate the baseline with:\n\ + cargo run -- generate -i tests/fixtures/snapshot_input.har \\\n\ + -t tests/fixtures/snapshot_templates.yaml \\\n\ + -o tests/expected/snapshot_baseline.yaml \\\n\ + -p https://api.example.com" + ); + } + + println!( + "Snapshot test passed: output matches baseline ({} bytes)", + actual.len() + ); +} From 5e7f1aa40104b8e39e3bbb948ec41ad410a56452 Mon Sep 17 00:00:00 2001 From: arkptz Date: Wed, 27 May 2026 20:26:05 +0300 Subject: [PATCH 02/19] feat: add tag_rules module with regex-based tag matching --- src/lib.rs | 1 + src/tag_rules.rs | 225 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 src/tag_rules.rs diff --git a/src/lib.rs b/src/lib.rs index ef2d0bf..8b309b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,7 @@ pub mod path_matching; pub mod redact; pub mod report; pub mod schema; +pub mod tag_rules; pub mod tnetstring; pub(crate) mod type_hints; pub mod types; diff --git a/src/tag_rules.rs b/src/tag_rules.rs new file mode 100644 index 0000000..084af18 --- /dev/null +++ b/src/tag_rules.rs @@ -0,0 +1,225 @@ +use anyhow::{Context, Result}; +use regex::Regex; +use serde::Deserialize; +use std::path::Path; + +pub struct TagRule { + pub pattern: Regex, + pub tag: String, +} + +/// Strategy for assigning tags to API operations. +pub enum TagStrategy { + /// Default: the builder calls its own `extract_tag()` logic. + Legacy, + /// Suppress all tags (empty `tags: []` on every operation). + None, + /// Extract the path segment at the given 0-based index. + /// Segments are split by `/` with empty segments removed. + /// Index 0 = first segment after the leading `/`. + PathSegment { index: usize }, + /// First-match-wins regex rules with an optional default tag. + Rules { + rules: Vec, + default: Option, + }, +} + +#[derive(Deserialize)] +struct RawTagRule { + #[serde(rename = "match")] + match_pattern: String, + tag: String, +} + +#[derive(Deserialize)] +struct RawTagRules { + rules: Vec, + default: Option, +} + +/// Load tag rules from a YAML file. +/// Returns `Err` if the file can't be read or any regex is invalid. +pub fn load_tag_rules(path: &Path) -> Result { + let content = std::fs::read_to_string(path) + .with_context(|| format!("failed to read tag rules from {}", path.display()))?; + let raw: RawTagRules = serde_yaml_ng::from_str(&content) + .with_context(|| format!("failed to parse tag rules YAML from {}", path.display()))?; + let rules = raw + .rules + .into_iter() + .map(|r| { + let pattern = Regex::new(&r.match_pattern) + .with_context(|| format!("invalid regex in tag rule: {}", r.match_pattern))?; + Ok(TagRule { + pattern, + tag: r.tag, + }) + }) + .collect::>>()?; + Ok(TagStrategy::Rules { + rules, + default: raw.default, + }) +} + +/// Apply the strategy to a URL path, returning the resolved tag or `None`. +/// For `Legacy` and `None` strategies, always returns `None` — the builder +/// handles these cases directly. +pub fn resolve_tag(strategy: &TagStrategy, path: &str) -> Option { + match strategy { + TagStrategy::Legacy | TagStrategy::None => Option::None, + TagStrategy::PathSegment { index } => { + let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + segments.get(*index).map(|s| (*s).to_string()) + } + TagStrategy::Rules { rules, default } => { + for rule in rules { + if rule.pattern.is_match(path) { + return Some(rule.tag.clone()); + } + } + default.clone() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_rules(patterns: &[(&str, &str)], default: Option<&str>) -> TagStrategy { + TagStrategy::Rules { + rules: patterns + .iter() + .map(|(pat, tag)| TagRule { + pattern: Regex::new(pat).unwrap(), + tag: (*tag).to_string(), + }) + .collect(), + default: default.map(String::from), + } + } + + #[test] + fn match_first_wins() { + let strategy = make_rules( + &[ + ("^/api/v1/contract/", "Contract"), + ("^/api/v1/private/order", "Order"), + ], + Option::None, + ); + assert_eq!( + resolve_tag(&strategy, "/api/v1/contract/ticker"), + Some("Contract".to_string()), + ); + } + + #[test] + fn no_match_with_default() { + let strategy = make_rules(&[("^/api/v1/contract/", "Contract")], Some("Default")); + assert_eq!( + resolve_tag(&strategy, "/api/v1/other"), + Some("Default".to_string()), + ); + } + + #[test] + fn no_match_no_default() { + let strategy = make_rules(&[("^/api/v1/contract/", "Contract")], Option::None); + assert_eq!(resolve_tag(&strategy, "/api/v1/other"), Option::None); + } + + #[test] + fn regex_capture_groups() { + let strategy = make_rules(&[("^/api/v1/(private/)?account", "Account")], Option::None); + assert_eq!( + resolve_tag(&strategy, "/api/v1/account"), + Some("Account".to_string()), + ); + assert_eq!( + resolve_tag(&strategy, "/api/v1/private/account"), + Some("Account".to_string()), + ); + } + + #[test] + fn empty_rules() { + let with_default = make_rules(&[], Some("Fallback")); + assert_eq!( + resolve_tag(&with_default, "/anything"), + Some("Fallback".to_string()), + ); + + let without_default = make_rules(&[], Option::None); + assert_eq!(resolve_tag(&without_default, "/anything"), Option::None); + } + + #[test] + fn path_segment_strategy() { + let idx0 = TagStrategy::PathSegment { index: 0 }; + assert_eq!( + resolve_tag(&idx0, "/api/v1/contract/ticker"), + Some("api".to_string()), + ); + + let idx2 = TagStrategy::PathSegment { index: 2 }; + assert_eq!( + resolve_tag(&idx2, "/api/v1/contract/ticker"), + Some("contract".to_string()), + ); + } + + #[test] + fn path_segment_out_of_bounds() { + let strategy = TagStrategy::PathSegment { index: 10 }; + assert_eq!( + resolve_tag(&strategy, "/api/v1/contract/ticker"), + Option::None, + ); + } + + #[test] + fn load_tag_rules_from_yaml() { + let yaml = "\ +rules: + - match: \"^/api/v1/contract/\" + tag: Contract + - match: \"^/api/v1/private/order\" + tag: Order +default: Default +"; + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("tags.yaml"); + std::fs::write(&path, yaml).unwrap(); + + let strategy = load_tag_rules(&path).unwrap(); + assert_eq!( + resolve_tag(&strategy, "/api/v1/contract/ticker"), + Some("Contract".to_string()), + ); + assert_eq!( + resolve_tag(&strategy, "/api/v1/private/order/123"), + Some("Order".to_string()), + ); + assert_eq!( + resolve_tag(&strategy, "/api/v1/other"), + Some("Default".to_string()), + ); + } + + #[test] + fn load_tag_rules_invalid_regex() { + let yaml = "\ +rules: + - match: \"[invalid\" + tag: Bad +"; + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("tags.yaml"); + std::fs::write(&path, yaml).unwrap(); + + assert!(load_tag_rules(&path).is_err()); + } +} From b970591d102bf87a82ae585b5b6a55513ed85226 Mon Sep 17 00:00:00 2001 From: arkptz Date: Wed, 27 May 2026 20:28:20 +0300 Subject: [PATCH 03/19] feat: add operation_id module with camelCase derivation and collision resolution --- src/lib.rs | 1 + src/operation_id.rs | 341 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 342 insertions(+) create mode 100644 src/operation_id.rs diff --git a/src/lib.rs b/src/lib.rs index 8b309b9..49ba57b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,7 @@ pub mod cli; pub mod error; pub mod har_reader; pub mod mitmproxy_reader; +pub mod operation_id; pub mod output; pub mod params; pub mod path_matching; diff --git a/src/operation_id.rs b/src/operation_id.rs new file mode 100644 index 0000000..6f8fb25 --- /dev/null +++ b/src/operation_id.rs @@ -0,0 +1,341 @@ +use anyhow::Result; +use std::collections::HashMap; +use std::path::Path; + +/// Strategy for generating operationId values. +#[derive(Clone, Debug)] +pub enum OperationIdStrategy { + /// Do not generate operationId. + None, + /// Derive operationId from HTTP method + path template. + Path, + /// Use a custom template string with `{method}` and `{path}` placeholders. + Template(String), +} + +/// Derive an operationId from method + template path. +/// +/// `template_path` is the OpenAPI path template (e.g. "/api/v1/users/{id}"). +pub fn derive_operation_id( + method: &str, + template_path: &str, + strategy: &OperationIdStrategy, +) -> Option { + match strategy { + OperationIdStrategy::None => None, + OperationIdStrategy::Template(tmpl) => Some( + tmpl.replace("{method}", method) + .replace("{path}", template_path), + ), + OperationIdStrategy::Path => derive_from_path(method, template_path), + } +} + +/// Resolve collisions in a set of operations. +/// +/// Input: mutable vec of `(path, method, Option)`. +/// Collision rule: sort by `(path, method)`, first keeps name, rest get `_2`, `_3`, etc. +pub fn resolve_collisions(operations: &mut [(String, String, Option)]) { + operations.sort_by(|a, b| (&a.0, &a.1).cmp(&(&b.0, &b.1))); + + let mut seen: HashMap = HashMap::new(); + for op in operations.iter_mut() { + if let Some(ref mut id) = op.2 { + let count = seen.entry(id.clone()).or_insert(0); + *count += 1; + if *count > 1 { + *id = format!("{}_{}", id, count); + } + } + } +} + +/// Load operationId overrides from a YAML file. +/// +/// YAML format: `"METHOD /path": operationId` +pub fn load_overrides(path: &Path) -> Result> { + let content = std::fs::read_to_string(path)?; + let map: HashMap = serde_yaml_ng::from_str(&content)?; + Ok(map) +} + +fn is_param(segment: &str) -> bool { + segment.starts_with('{') && segment.ends_with('}') +} + +fn derive_from_path(method: &str, template_path: &str) -> Option { + let segments: Vec<&str> = template_path.split('/').filter(|s| !s.is_empty()).collect(); + let is_item = segments.last().is_some_and(|s| is_param(s)); + let is_collection = !is_item; + + let non_params: Vec<&str> = segments.iter().filter(|s| !is_param(s)).copied().collect(); + + if method.eq_ignore_ascii_case("POST") && is_collection && non_params.len() >= 2 { + if let Some(&last) = non_params.last() { + if !last.ends_with('s') { + let verb = last; + let noun_idx = non_params.len().checked_sub(2)?; + let noun = non_params.get(noun_idx)?; + return Some(format!("{}{}", verb, to_pascal_case(noun))); + } + } + } + + let verb = method_to_verb(method, is_collection); + let mut name_segs = path_to_name_segments(&segments); + + if method.eq_ignore_ascii_case("POST") && is_collection { + if let Some(last) = name_segs.last_mut() { + *last = singularize_pascal(last); + } + } + + Some(to_camel_case(verb, &name_segs)) +} + +fn method_to_verb(method: &str, is_collection: bool) -> &'static str { + match method.to_ascii_uppercase().as_str() { + "GET" if is_collection => "list", + "GET" => "get", + "POST" => "create", + "PUT" => "update", + "DELETE" => "delete", + "PATCH" => "patch", + _ => "handle", + } +} + +fn path_to_name_segments(segments: &[&str]) -> Vec { + let last_non_param_pos = segments.iter().rposition(|s| !is_param(s)); + let last_param_pos = segments.iter().rposition(|s| is_param(s)); + + let mut result = Vec::new(); + + if let Some(param_pos) = last_param_pos { + if let Some(before_pos) = param_pos.checked_sub(1) { + if let Some(&before) = segments.get(before_pos) { + if !is_param(before) { + let singular = singularize(before); + result.push(to_pascal_case(&singular)); + + if let Some(lnp_pos) = last_non_param_pos { + if lnp_pos != before_pos { + if let Some(&last) = segments.get(lnp_pos) { + result.push(to_pascal_case(last)); + } + } + } + return result; + } + } + } + } + + if let Some(pos) = last_non_param_pos { + if let Some(&seg) = segments.get(pos) { + result.push(to_pascal_case(seg)); + } + } + + result +} + +fn to_pascal_case(s: &str) -> String { + s.split(['_', '-']) + .filter(|part| !part.is_empty()) + .map(|part| { + let mut chars = part.chars(); + match chars.next() { + Some(c) => { + let upper: String = c.to_uppercase().collect(); + format!("{}{}", upper, chars.as_str()) + } + None => String::new(), + } + }) + .collect() +} + +fn to_camel_case(verb: &str, segments: &[String]) -> String { + let mut result = verb.to_string(); + for seg in segments { + result.push_str(seg); + } + result +} + +fn singularize(word: &str) -> String { + if word.len() > 1 && word.ends_with('s') && !word.ends_with("ss") { + word[..word.len() - 1].to_string() + } else { + word.to_string() + } +} + +fn singularize_pascal(word: &str) -> String { + if word.len() > 1 && word.ends_with('s') && !word.ends_with("ss") { + word[..word.len() - 1].to_string() + } else { + word.to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + #[test] + fn get_collection() { + let result = derive_operation_id("GET", "/api/v1/users", &OperationIdStrategy::Path); + assert_eq!(result, Some("listUsers".to_string())); + } + + #[test] + fn get_item() { + let result = derive_operation_id("GET", "/api/v1/users/{id}", &OperationIdStrategy::Path); + assert_eq!(result, Some("getUser".to_string())); + } + + #[test] + fn post() { + let result = derive_operation_id("POST", "/api/v1/users", &OperationIdStrategy::Path); + assert_eq!(result, Some("createUser".to_string())); + } + + #[test] + fn put_item() { + let result = derive_operation_id("PUT", "/api/v1/users/{id}", &OperationIdStrategy::Path); + assert_eq!(result, Some("updateUser".to_string())); + } + + #[test] + fn delete_item() { + let result = + derive_operation_id("DELETE", "/api/v1/users/{id}", &OperationIdStrategy::Path); + assert_eq!(result, Some("deleteUser".to_string())); + } + + #[test] + fn patch_item() { + let result = derive_operation_id("PATCH", "/api/v1/users/{id}", &OperationIdStrategy::Path); + assert_eq!(result, Some("patchUser".to_string())); + } + + #[test] + fn nested_resource() { + let result = derive_operation_id( + "GET", + "/api/v1/users/{id}/orders", + &OperationIdStrategy::Path, + ); + assert_eq!(result, Some("listUserOrders".to_string())); + } + + #[test] + fn deep_path() { + let result = derive_operation_id( + "GET", + "/api/v1/contract/fair_price/{symbol}", + &OperationIdStrategy::Path, + ); + assert_eq!(result, Some("getFairPrice".to_string())); + } + + #[test] + fn deep_post() { + let result = derive_operation_id( + "POST", + "/api/v1/private/order/place", + &OperationIdStrategy::Path, + ); + assert_eq!(result, Some("placeOrder".to_string())); + } + + #[test] + fn strategy_none() { + let result = derive_operation_id("GET", "/api/v1/users", &OperationIdStrategy::None); + assert_eq!(result, None); + } + + #[test] + fn collision_resolution() { + let mut ops = vec![ + ( + "/api/v1/users".to_string(), + "GET".to_string(), + Some("listUsers".to_string()), + ), + ( + "/api/v2/users".to_string(), + "GET".to_string(), + Some("listUsers".to_string()), + ), + ]; + resolve_collisions(&mut ops); + + assert_eq!(ops.first().unwrap().2, Some("listUsers".to_string())); + assert_eq!(ops.get(1).unwrap().2, Some("listUsers_2".to_string())); + + let mut ops2 = vec![ + ( + "/api/v2/users".to_string(), + "GET".to_string(), + Some("listUsers".to_string()), + ), + ( + "/api/v1/users".to_string(), + "GET".to_string(), + Some("listUsers".to_string()), + ), + ]; + resolve_collisions(&mut ops2); + assert_eq!(ops2.first().unwrap().2, Some("listUsers".to_string())); + assert_eq!(ops2.get(1).unwrap().2, Some("listUsers_2".to_string())); + } + + #[test] + fn override_wins() { + let dir = tempfile::tempdir().unwrap(); + let file_path = dir.path().join("overrides.yaml"); + let mut f = std::fs::File::create(&file_path).unwrap(); + writeln!(f, "\"GET /api/v1/users\": getAllUsers").unwrap(); + drop(f); + + let overrides = load_overrides(&file_path).unwrap(); + let key = "GET /api/v1/users"; + assert_eq!(overrides.get(key), Some(&"getAllUsers".to_string())); + } + + #[test] + fn template_strategy() { + let result = derive_operation_id( + "GET", + "/api/v1/users", + &OperationIdStrategy::Template("{method}_{path}".to_string()), + ); + assert_eq!(result, Some("GET_/api/v1/users".to_string())); + } + + #[test] + fn pascal_case_snake() { + assert_eq!(to_pascal_case("fair_price"), "FairPrice"); + } + + #[test] + fn pascal_case_simple() { + assert_eq!(to_pascal_case("users"), "Users"); + } + + #[test] + fn singularize_plural() { + assert_eq!(singularize("users"), "user"); + assert_eq!(singularize("orders"), "order"); + } + + #[test] + fn singularize_already_singular() { + assert_eq!(singularize("place"), "place"); + assert_eq!(singularize("fair_price"), "fair_price"); + } +} From e44b0bbdf993e99aa40eca640ac3e443b7b21db1 Mon Sep 17 00:00:00 2001 From: arkptz Date: Wed, 27 May 2026 20:35:42 +0300 Subject: [PATCH 04/19] feat(cli): add operationId and tag strategy CLI flags --- src/builder.rs | 11 +------- src/cli.rs | 46 +++++++++++++++++++++++++++++++- src/main.rs | 57 +++++++++++++++++++++++++++++++++++++++- src/operation_id.rs | 3 ++- src/tag_rules.rs | 3 +++ src/types.rs | 9 ++++++- tests/merge_responses.rs | 11 +------- 7 files changed, 116 insertions(+), 24 deletions(-) diff --git a/src/builder.rs b/src/builder.rs index 2ad5b8f..3a1676a 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -910,18 +910,9 @@ mod tests { fn test_config() -> Config { Config { prefix: "https://api.example.com".to_string(), - openapi_title: None, openapi_version: "1.0.0".to_string(), - exclude_headers: vec![], - exclude_cookies: vec![], - include_headers: false, - ignore_images: false, - suppress_params: false, - tags_overrides: None, - skip_options: false, max_examples: 5, - redact_patterns: vec![], - redact_fields: vec![], + ..Default::default() } } diff --git a/src/cli.rs b/src/cli.rs index 0e08f70..15fc3ba 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -15,7 +15,7 @@ pub enum Command { /// Discover API endpoints from captured traffic and produce a templates file Discover(DiscoverArgs), /// Generate an OpenAPI specification from captured traffic using a templates file - Generate(GenerateArgs), + Generate(Box), } /// Input format for traffic captures @@ -30,6 +30,32 @@ pub enum InputFormat { Mitmproxy, } +/// Tag strategy for operations +#[derive(ValueEnum, Clone, Debug, Default)] +pub enum TagStrategyArg { + /// Default: first non-param path segment (existing behavior) + #[default] + Legacy, + /// Suppress all tags + None, + /// Extract segment at given index + PathSegment, + /// Use regex rules file + Rules, +} + +/// operationId strategy for operations +#[derive(ValueEnum, Clone, Debug, Default)] +pub enum OperationIdStrategyArg { + /// Do not generate operationId (default) + #[default] + None, + /// Derive from HTTP method + path + Path, + /// Use custom template with {method} and {path} placeholders + Template, +} + fn parse_byte_size(s: &str) -> Result { let s = s.trim(); let (num_str, multiplier) = if let Some(n) = s.strip_suffix("GiB") { @@ -191,4 +217,22 @@ pub struct GenerateArgs { #[arg(long, value_delimiter = ',')] pub redact_fields: Vec, + + #[arg(long, value_enum, default_value_t = TagStrategyArg::Legacy)] + pub tag_strategy: TagStrategyArg, + + #[arg(long)] + pub tag_segment_index: Option, + + #[arg(long)] + pub tag_rules: Option, + + #[arg(long, value_enum, default_value_t = OperationIdStrategyArg::None)] + pub operation_id_strategy: OperationIdStrategyArg, + + #[arg(long)] + pub operation_id_template: Option, + + #[arg(long)] + pub operation_id_overrides: Option, } diff --git a/src/main.rs b/src/main.rs index c0a810f..2447693 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,7 @@ use clap::Parser; use tracing::{debug, info, warn}; use mitm2openapi::builder::{self, OpenApiBuilder}; -use mitm2openapi::cli::{Cli, Command, InputFormat}; +use mitm2openapi::cli::{Cli, Command, InputFormat, OperationIdStrategyArg, TagStrategyArg}; use mitm2openapi::har_reader; use mitm2openapi::mitmproxy_reader; use mitm2openapi::output; @@ -127,6 +127,7 @@ fn run(cli: Cli) -> Result { check_strict(strict, &report) } Command::Generate(args) => { + let args = *args; info!(input = %args.input.display(), output = %args.output.display(), "Starting generation"); let strict = args.strict; @@ -162,6 +163,57 @@ fn run(cli: Cli) -> Result { info!(count = active_templates.len(), "Using active templates"); + let tag_strategy = match &args.tag_strategy { + TagStrategyArg::Legacy => mitm2openapi::tag_rules::TagStrategy::Legacy, + TagStrategyArg::None => mitm2openapi::tag_rules::TagStrategy::None, + TagStrategyArg::PathSegment => { + let index = args.tag_segment_index.unwrap_or(0); + mitm2openapi::tag_rules::TagStrategy::PathSegment { index } + } + TagStrategyArg::Rules => { + let rules_path = args + .tag_rules + .as_ref() + .context("--tag-rules path required when --tag-strategy=rules")?; + mitm2openapi::tag_rules::load_tag_rules(rules_path) + .context("failed to load tag rules")? + } + }; + + let tag_strategy = if let Some(rules_path) = &args.tag_rules { + if matches!(args.tag_strategy, TagStrategyArg::Legacy) { + mitm2openapi::tag_rules::load_tag_rules(rules_path) + .context("failed to load tag rules")? + } else { + tag_strategy + } + } else { + tag_strategy + }; + + let operation_id_overrides: std::collections::HashMap = + if let Some(overrides_path) = &args.operation_id_overrides { + mitm2openapi::operation_id::load_overrides(overrides_path) + .context("failed to load operationId overrides")? + } else { + std::collections::HashMap::new() + }; + + let operation_id_strategy = match &args.operation_id_strategy { + OperationIdStrategyArg::None => { + mitm2openapi::operation_id::OperationIdStrategy::None + } + OperationIdStrategyArg::Path => { + mitm2openapi::operation_id::OperationIdStrategy::Path + } + OperationIdStrategyArg::Template => { + let tmpl = args.operation_id_template.clone().context( + "--operation-id-template required when --operation-id-strategy=template", + )?; + mitm2openapi::operation_id::OperationIdStrategy::Template(tmpl) + } + }; + let config = Config { prefix: args.prefix.clone(), openapi_title: args.openapi_title.clone(), @@ -176,6 +228,9 @@ fn run(cli: Cli) -> Result { max_examples: args.max_examples, redact_patterns: args.redact_patterns.clone(), redact_fields: args.redact_fields.clone(), + tag_strategy, + operation_id_strategy, + operation_id_overrides, }; let mut builder = OpenApiBuilder::new(&args.prefix, &config, active_templates); diff --git a/src/operation_id.rs b/src/operation_id.rs index 6f8fb25..c3cfbd9 100644 --- a/src/operation_id.rs +++ b/src/operation_id.rs @@ -3,9 +3,10 @@ use std::collections::HashMap; use std::path::Path; /// Strategy for generating operationId values. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub enum OperationIdStrategy { /// Do not generate operationId. + #[default] None, /// Derive operationId from HTTP method + path template. Path, diff --git a/src/tag_rules.rs b/src/tag_rules.rs index 084af18..1383e2f 100644 --- a/src/tag_rules.rs +++ b/src/tag_rules.rs @@ -3,14 +3,17 @@ use regex::Regex; use serde::Deserialize; use std::path::Path; +#[derive(Debug, Clone)] pub struct TagRule { pub pattern: Regex, pub tag: String, } /// Strategy for assigning tags to API operations. +#[derive(Debug, Clone, Default)] pub enum TagStrategy { /// Default: the builder calls its own `extract_tag()` logic. + #[default] Legacy, /// Suppress all tags (empty `tags: []` on every operation). None, diff --git a/src/types.rs b/src/types.rs index 6ca62a8..39bfc8a 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,3 +1,7 @@ +use crate::operation_id::OperationIdStrategy; +use crate::tag_rules::TagStrategy; +use std::collections::HashMap; + /// Unified interface for captured HTTP requests from different sources (HAR, mitmproxy). pub trait CapturedRequest { fn get_url(&self) -> &str; @@ -12,7 +16,7 @@ pub trait CapturedRequest { } /// Configuration for OpenAPI generation, derived from CLI arguments. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct Config { pub prefix: String, pub openapi_title: Option, @@ -27,4 +31,7 @@ pub struct Config { pub max_examples: usize, pub redact_patterns: Vec, pub redact_fields: Vec, + pub tag_strategy: TagStrategy, + pub operation_id_strategy: OperationIdStrategy, + pub operation_id_overrides: HashMap, } diff --git a/tests/merge_responses.rs b/tests/merge_responses.rs index 31418f8..f08364d 100644 --- a/tests/merge_responses.rs +++ b/tests/merge_responses.rs @@ -80,18 +80,9 @@ impl CapturedRequest for MockRequest { fn test_config() -> Config { Config { prefix: "https://api.example.com".to_string(), - openapi_title: None, openapi_version: "1.0.0".to_string(), - exclude_headers: vec![], - exclude_cookies: vec![], - include_headers: false, - ignore_images: false, - suppress_params: false, - tags_overrides: None, - skip_options: false, max_examples: 5, - redact_patterns: vec![], - redact_fields: vec![], + ..Default::default() } } From 73bfb41d92599f469b7a4eb644b537083e13bdef Mon Sep 17 00:00:00 2001 From: arkptz Date: Wed, 27 May 2026 20:36:01 +0300 Subject: [PATCH 05/19] feat: add envelope module for discriminator-based response splitting --- src/envelope.rs | 222 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 2 files changed, 223 insertions(+) create mode 100644 src/envelope.rs diff --git a/src/envelope.rs b/src/envelope.rs new file mode 100644 index 0000000..662f283 --- /dev/null +++ b/src/envelope.rs @@ -0,0 +1,222 @@ +//! Envelope-based response splitting. +//! +//! Many APIs wrap every response in a `{ "success": bool, ... }` envelope. +//! This module classifies captured response bodies into *success* vs *error* +//! groups based on a discriminator field, infers an `ApiError` schema from the +//! error examples, and builds a `oneOf` schema with a discriminator annotation. + +use openapiv3::{Discriminator, ReferenceOr, Schema, SchemaData, SchemaKind}; +use serde_json::Value; + +/// Configuration for envelope-based response splitting. +pub struct EnvelopeConfig { + /// JSON field name used as the discriminator (e.g. `"success"`). + pub discriminator_field: String, + /// Optional pre-defined error schema; skips inference when set. + pub error_shape: Option, + /// Suffix appended to component names (e.g. `"Success"`). + pub success_suffix: String, +} + +/// Group response bodies into (success, error) based on a discriminator field. +/// +/// Classification: only a JSON boolean `true` at `discriminator` counts as +/// success. Everything else — `false`, `null`, strings, numbers, or a missing +/// field — is classified as error. +pub fn group_bodies(bodies: &[Value], discriminator: &str) -> (Vec, Vec) { + let mut success = Vec::new(); + let mut error = Vec::new(); + for body in bodies { + if body.get(discriminator) == Some(&Value::Bool(true)) { + success.push(body.clone()); + } else { + error.push(body.clone()); + } + } + (success, error) +} + +/// Infer an `ApiError` schema from error body examples. +/// +/// If `config.error_shape` is set, returns that directly. +/// Otherwise uses [`crate::schema::value_to_schema`] on the first error body. +/// Falls back to an empty `Any` schema when no examples exist. +pub fn infer_api_error(error_bodies: &[Value], config: &EnvelopeConfig) -> Schema { + if let Some(custom) = &config.error_shape { + return custom.clone(); + } + if let Some(first) = error_bodies.first() { + return crate::schema::value_to_schema(first); + } + Schema { + schema_data: SchemaData::default(), + schema_kind: SchemaKind::Any(openapiv3::AnySchema::default()), + } +} + +/// Build a `oneOf` schema combining a success `$ref` and an error `$ref`, +/// annotated with an OpenAPI discriminator. +pub fn build_one_of_schema( + success_ref: &str, + error_ref: &str, + discriminator_field: &str, +) -> ReferenceOr { + let one_of = vec![ReferenceOr::ref_(success_ref), ReferenceOr::ref_(error_ref)]; + + ReferenceOr::Item(Schema { + schema_data: SchemaData { + discriminator: Some(Discriminator { + property_name: discriminator_field.to_string(), + mapping: indexmap::IndexMap::new(), + extensions: indexmap::IndexMap::new(), + }), + ..SchemaData::default() + }, + schema_kind: SchemaKind::OneOf { one_of }, + }) +} + +/// Derive a PascalCase component name for the success schema. +/// +/// Prefers `operationId` when available (uppercasing the first letter), +/// otherwise falls back to `Method` + path segments with each segment +/// capitalised. +pub fn success_component_name( + operation_id: Option<&str>, + path: &str, + method: &str, + suffix: &str, +) -> String { + if let Some(op_id) = operation_id { + let mut chars = op_id.chars(); + return match chars.next() { + Some(c) => { + let upper: String = c.to_uppercase().collect(); + format!("{upper}{}{suffix}", chars.as_str()) + } + None => suffix.to_string(), + }; + } + + let path_part: String = path + .split('/') + .filter(|s| !s.is_empty()) + .map(|s| { + let s = s.trim_matches(|c: char| c == '{' || c == '}'); + let mut chars = s.chars(); + match chars.next() { + Some(c) => { + let upper: String = c.to_uppercase().collect(); + format!("{upper}{}", chars.as_str()) + } + None => String::new(), + } + }) + .collect(); + + let method_part = { + let mut chars = method.chars(); + match chars.next() { + Some(c) => { + let upper: String = c.to_uppercase().collect(); + format!("{upper}{}", chars.as_str().to_lowercase()) + } + None => String::new(), + } + }; + + format!("{method_part}{path_part}{suffix}") +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn group_by_discriminator() { + let bodies = vec![ + json!({"success": true, "data": {}}), + json!({"success": true, "data": {"price": 1.0}}), + json!({"success": true, "data": {"price": 2.0}}), + json!({"success": false, "code": 1, "message": "err"}), + ]; + let (success, error) = group_bodies(&bodies, "success"); + assert_eq!(success.len(), 3); + assert_eq!(error.len(), 1); + } + + #[test] + fn only_success_unchanged() { + let bodies = vec![json!({"success": true, "data": {}})]; + let (success, error) = group_bodies(&bodies, "success"); + assert_eq!(success.len(), 1); + assert!(error.is_empty()); + } + + #[test] + fn non_boolean_discriminator_is_error() { + let bodies = vec![ + json!({"success": 1}), + json!({"success": "yes"}), + json!({"success": null}), + ]; + let (success, error) = group_bodies(&bodies, "success"); + assert!(success.is_empty()); + assert_eq!(error.len(), 3); + } + + #[test] + fn missing_discriminator_field_is_error() { + let bodies = vec![json!({"data": {}})]; + let (success, error) = group_bodies(&bodies, "success"); + assert!(success.is_empty()); + assert_eq!(error.len(), 1); + } + + #[test] + fn zero_error_bodies() { + let bodies = vec![json!({"success": true, "data": {}})]; + let (success, error) = group_bodies(&bodies, "success"); + assert_eq!(success.len(), 1); + assert!(error.is_empty()); + } + + #[test] + fn success_component_name_from_operation_id() { + let name = success_component_name( + Some("getFairPrice"), + "/api/v1/contract/fair_price/{symbol}", + "GET", + "Success", + ); + assert_eq!(name, "GetFairPriceSuccess"); + } + + #[test] + fn success_component_name_fallback() { + let name = success_component_name(None, "/api/v1/users/{id}", "GET", "Success"); + assert!(name.contains("Success")); + assert!(!name.is_empty()); + } + + #[test] + fn build_one_of_schema_structure() { + let schema = build_one_of_schema( + "#/components/schemas/GetTickerSuccess", + "#/components/schemas/ApiError", + "success", + ); + if let ReferenceOr::Item(s) = schema { + match &s.schema_kind { + SchemaKind::OneOf { one_of } => { + assert_eq!(one_of.len(), 2); + } + other => panic!("Expected OneOf, got {other:?}"), + } + assert!(s.schema_data.discriminator.is_some()); + } else { + panic!("Expected Item, got Ref"); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 49ba57b..d6ae4d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,6 +20,7 @@ pub mod builder; pub mod cli; +pub mod envelope; pub mod error; pub mod har_reader; pub mod mitmproxy_reader; From 3e2ee3960fbad5dffb1e81856f42669e41329638 Mon Sep 17 00:00:00 2001 From: arkptz Date: Wed, 27 May 2026 20:41:46 +0300 Subject: [PATCH 06/19] feat(generate): integrate tag rules strategy into builder --- src/builder.rs | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/builder.rs b/src/builder.rs index 3a1676a..6396102 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -143,6 +143,7 @@ pub fn discover_paths( pub struct OpenApiBuilder { prefix: String, config: Config, + tag_strategy: crate::tag_rules::TagStrategy, tags_overrides: Option>, compiled_templates: path_matching::CompiledTemplates, spec: OpenAPI, @@ -475,9 +476,12 @@ impl OpenApiBuilder { None }; + let tag_strategy = config.tag_strategy.clone(); + Self { prefix: prefix.to_string(), config: config.clone(), + tag_strategy, tags_overrides, compiled_templates, spec, @@ -639,8 +643,22 @@ impl OpenApiBuilder { ..Operation::default() }; - if let Some(tag) = extract_tag(&template_path, &self.tags_overrides) { - operation.tags = vec![tag]; + match &self.tag_strategy { + crate::tag_rules::TagStrategy::Legacy => { + if let Some(tag) = extract_tag(&template_path, &self.tags_overrides) { + operation.tags = vec![tag]; + } + } + crate::tag_rules::TagStrategy::None => { + // suppress tags — leave operation.tags empty + } + crate::tag_rules::TagStrategy::PathSegment { .. } + | crate::tag_rules::TagStrategy::Rules { .. } => { + if let Some(tag) = crate::tag_rules::resolve_tag(&self.tag_strategy, &template_path) + { + operation.tags = vec![tag]; + } + } } if !self.config.suppress_params { From 5607d8a7b009adfe5a665d7b2f56dcde93786cc7 Mon Sep 17 00:00:00 2001 From: arkptz Date: Wed, 27 May 2026 20:41:54 +0300 Subject: [PATCH 07/19] feat(cli): add envelope detection CLI flags --- src/cli.rs | 16 ++++++++++++++++ src/envelope.rs | 1 + src/main.rs | 24 ++++++++++++++++++++++++ src/types.rs | 2 ++ 4 files changed, 43 insertions(+) diff --git a/src/cli.rs b/src/cli.rs index 15fc3ba..b6f4ceb 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -235,4 +235,20 @@ pub struct GenerateArgs { #[arg(long)] pub operation_id_overrides: Option, + + /// Discriminator field name for envelope detection. + /// When set, 200 responses with both success:true and success:false + /// bodies will be split into oneOf(Success, ApiError). + #[arg(long)] + pub envelope_discriminator: Option, + + /// Path to YAML file containing the hand-supplied ApiError schema. + /// If omitted, schema is inferred from captured error bodies. + #[arg(long)] + pub envelope_error_shape: Option, + + /// Suffix for success component names (default: "Success"). + /// E.g., operationId "getFairPrice" → "GetFairPriceSuccess" + #[arg(long, default_value = "Success")] + pub envelope_success_component_suffix: String, } diff --git a/src/envelope.rs b/src/envelope.rs index 662f283..607216a 100644 --- a/src/envelope.rs +++ b/src/envelope.rs @@ -9,6 +9,7 @@ use openapiv3::{Discriminator, ReferenceOr, Schema, SchemaData, SchemaKind}; use serde_json::Value; /// Configuration for envelope-based response splitting. +#[derive(Clone, Debug)] pub struct EnvelopeConfig { /// JSON field name used as the discriminator (e.g. `"success"`). pub discriminator_field: String, diff --git a/src/main.rs b/src/main.rs index 2447693..328dca1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -214,6 +214,29 @@ fn run(cli: Cli) -> Result { } }; + let envelope_config = if let Some(discriminator) = args.envelope_discriminator.clone() { + let error_shape = if let Some(shape_path) = &args.envelope_error_shape { + let content = std::fs::read_to_string(shape_path).with_context(|| { + format!( + "failed to read envelope error shape from {}", + shape_path.display() + ) + })?; + let schema: openapiv3::Schema = serde_yaml_ng::from_str(&content) + .with_context(|| "failed to parse envelope error shape YAML")?; + Some(schema) + } else { + None + }; + Some(mitm2openapi::envelope::EnvelopeConfig { + discriminator_field: discriminator, + error_shape, + success_suffix: args.envelope_success_component_suffix.clone(), + }) + } else { + None + }; + let config = Config { prefix: args.prefix.clone(), openapi_title: args.openapi_title.clone(), @@ -231,6 +254,7 @@ fn run(cli: Cli) -> Result { tag_strategy, operation_id_strategy, operation_id_overrides, + envelope_config, }; let mut builder = OpenApiBuilder::new(&args.prefix, &config, active_templates); diff --git a/src/types.rs b/src/types.rs index 39bfc8a..a3ce422 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,3 +1,4 @@ +use crate::envelope::EnvelopeConfig; use crate::operation_id::OperationIdStrategy; use crate::tag_rules::TagStrategy; use std::collections::HashMap; @@ -34,4 +35,5 @@ pub struct Config { pub tag_strategy: TagStrategy, pub operation_id_strategy: OperationIdStrategy, pub operation_id_overrides: HashMap, + pub envelope_config: Option, } From 44ed4aee0a700af1526910d162ddb2429909462a Mon Sep 17 00:00:00 2001 From: arkptz Date: Wed, 27 May 2026 20:46:49 +0300 Subject: [PATCH 08/19] feat(generate): integrate operationId strategy into builder --- src/builder.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/builder.rs b/src/builder.rs index 6396102..1ac60fc 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -151,6 +151,8 @@ pub struct OpenApiBuilder { req_examples_store: HashMap<(String, String, String), Vec<(String, serde_json::Value)>>, max_examples: usize, redactor: Option, + operation_id_strategy: crate::operation_id::OperationIdStrategy, + operation_id_overrides: HashMap, } fn extract_tag( @@ -295,6 +297,20 @@ fn parse_body(body: &[u8], content_type: Option<&str>) -> Option<(String, serde_ None } +fn get_operation_ref<'a>(path_item: &'a PathItem, method: &str) -> Option<&'a Option> { + match method.to_uppercase().as_str() { + "GET" => Some(&path_item.get), + "PUT" => Some(&path_item.put), + "POST" => Some(&path_item.post), + "DELETE" => Some(&path_item.delete), + "OPTIONS" => Some(&path_item.options), + "HEAD" => Some(&path_item.head), + "PATCH" => Some(&path_item.patch), + "TRACE" => Some(&path_item.trace), + _ => None, + } +} + /// Get the method-specific operation slot from a PathItem (mutable). /// Returns `None` for HTTP methods not supported by the OpenAPI spec. fn get_operation_mut<'a>( @@ -477,6 +493,8 @@ impl OpenApiBuilder { }; let tag_strategy = config.tag_strategy.clone(); + let operation_id_strategy = config.operation_id_strategy.clone(); + let operation_id_overrides = config.operation_id_overrides.clone(); Self { prefix: prefix.to_string(), @@ -489,6 +507,8 @@ impl OpenApiBuilder { req_examples_store: HashMap::new(), max_examples: config.max_examples, redactor, + operation_id_strategy, + operation_id_overrides, } } @@ -661,6 +681,18 @@ impl OpenApiBuilder { } } + let override_key = format!("{} {}", method, template_path); + let op_id = if let Some(id) = self.operation_id_overrides.get(&override_key) { + Some(id.clone()) + } else { + crate::operation_id::derive_operation_id( + &method, + &template_path, + &self.operation_id_strategy, + ) + }; + operation.operation_id = op_id; + if !self.config.suppress_params { let mut parameters: Vec> = Vec::new(); @@ -748,6 +780,34 @@ impl OpenApiBuilder { /// Get the assembled OpenAPI spec. pub fn build(mut self) -> OpenAPI { + if !matches!( + self.operation_id_strategy, + crate::operation_id::OperationIdStrategy::None + ) { + let mut ops: Vec<(String, String, Option)> = Vec::new(); + for (path, path_ref) in &self.spec.paths.paths { + if let ReferenceOr::Item(path_item) = path_ref { + for method in &[ + "GET", "PUT", "POST", "DELETE", "OPTIONS", "HEAD", "PATCH", "TRACE", + ] { + if let Some(Some(op)) = get_operation_ref(path_item, method) { + ops.push((path.clone(), method.to_string(), op.operation_id.clone())); + } + } + } + } + crate::operation_id::resolve_collisions(&mut ops); + for (path, method, resolved_id) in ops { + if let Some(ReferenceOr::Item(path_item)) = self.spec.paths.paths.get_mut(&path) { + if let Some(slot) = get_operation_mut(path_item, &method) { + if let Some(op) = slot.as_mut() { + op.operation_id = resolved_id; + } + } + } + } + } + for ((path, method, status), examples) in self.examples_store.drain() { let Some(ReferenceOr::Item(path_item)) = self.spec.paths.paths.get_mut(&path) else { continue; From 67668ed4eb3b39874f52baf2a4e330892d1f9027 Mon Sep 17 00:00:00 2001 From: arkptz Date: Wed, 27 May 2026 20:52:34 +0300 Subject: [PATCH 09/19] feat(generate): integrate envelope detection into build() --- src/builder.rs | 137 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/src/builder.rs b/src/builder.rs index 1ac60fc..452eec3 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -153,6 +153,7 @@ pub struct OpenApiBuilder { redactor: Option, operation_id_strategy: crate::operation_id::OperationIdStrategy, operation_id_overrides: HashMap, + envelope_config: Option, } fn extract_tag( @@ -495,6 +496,7 @@ impl OpenApiBuilder { let tag_strategy = config.tag_strategy.clone(); let operation_id_strategy = config.operation_id_strategy.clone(); let operation_id_overrides = config.operation_id_overrides.clone(); + let envelope_config = config.envelope_config.clone(); Self { prefix: prefix.to_string(), @@ -509,6 +511,7 @@ impl OpenApiBuilder { redactor, operation_id_strategy, operation_id_overrides, + envelope_config, } } @@ -808,6 +811,140 @@ impl OpenApiBuilder { } } + // Envelope detection: MUST run before examples_store.drain() consumes raw bodies. + if let Some(ref envelope_cfg) = self.envelope_config { + let mut all_error_bodies: Vec = Vec::new(); + let mut components_schemas: indexmap::IndexMap> = + indexmap::IndexMap::new(); + + struct EnvelopeChange { + path: String, + method: String, + success_name: String, + success_schema: openapiv3::Schema, + one_of: ReferenceOr, + } + let mut changes: Vec = Vec::new(); + + for ((path, method, status), body_examples) in &self.examples_store { + if *status != 200 { + continue; + } + let bodies: Vec = + body_examples.iter().map(|(_, v)| v.clone()).collect(); + let (_, error_bodies) = + crate::envelope::group_bodies(&bodies, &envelope_cfg.discriminator_field); + + if error_bodies.is_empty() { + continue; + } + + all_error_bodies.extend(error_bodies.iter().cloned()); + + let op_id = self + .spec + .paths + .paths + .get(path.as_str()) + .and_then(|p| { + if let ReferenceOr::Item(pi) = p { + Some(pi) + } else { + None + } + }) + .and_then(|pi| get_operation_ref(pi, method)) + .and_then(|s| s.as_ref()) + .and_then(|op| op.operation_id.as_deref().map(String::from)); + + let success_schema = { + let path_ref = self.spec.paths.paths.get(path.as_str()); + let path_item = match path_ref { + Some(ReferenceOr::Item(pi)) => pi, + _ => continue, + }; + let op = match get_operation_ref(path_item, method) { + Some(Some(op)) => op, + _ => continue, + }; + let resp = match op.responses.responses.get(&StatusCode::Code(200)) { + Some(ReferenceOr::Item(r)) => r, + _ => continue, + }; + let mt = match resp.content.values().next() { + Some(mt) => mt, + None => continue, + }; + match &mt.schema { + Some(ReferenceOr::Item(schema)) => schema.clone(), + _ => continue, + } + }; + + let success_name = crate::envelope::success_component_name( + op_id.as_deref(), + path, + method, + &envelope_cfg.success_suffix, + ); + + let success_ref_str = format!("#/components/schemas/{success_name}"); + let error_ref_str = "#/components/schemas/ApiError".to_string(); + let one_of = crate::envelope::build_one_of_schema( + &success_ref_str, + &error_ref_str, + &envelope_cfg.discriminator_field, + ); + + changes.push(EnvelopeChange { + path: path.clone(), + method: method.clone(), + success_name, + success_schema, + one_of, + }); + } + + for change in changes { + components_schemas.insert( + change.success_name, + ReferenceOr::Item(change.success_schema), + ); + + if let Some(ReferenceOr::Item(path_item)) = + self.spec.paths.paths.get_mut(change.path.as_str()) + { + if let Some(slot) = get_operation_mut(path_item, &change.method) { + if let Some(op) = slot.as_mut() { + if let Some(ReferenceOr::Item(resp)) = + op.responses.responses.get_mut(&StatusCode::Code(200)) + { + if let Some(mt) = resp.content.values_mut().next() { + mt.schema = Some(change.one_of); + } + } + } + } + } + } + + if !all_error_bodies.is_empty() { + let api_error_schema = + crate::envelope::infer_api_error(&all_error_bodies, envelope_cfg); + components_schemas + .insert("ApiError".to_string(), ReferenceOr::Item(api_error_schema)); + } + + if !components_schemas.is_empty() { + let components = self + .spec + .components + .get_or_insert_with(openapiv3::Components::default); + for (name, schema) in components_schemas { + components.schemas.insert(name, schema); + } + } + } for ((path, method, status), examples) in self.examples_store.drain() { let Some(ReferenceOr::Item(path_item)) = self.spec.paths.paths.get_mut(&path) else { continue; From c06fee01292694d9c095fc6a89eeb3e0aee4cd50 Mon Sep 17 00:00:00 2001 From: arkptz Date: Wed, 27 May 2026 20:57:09 +0300 Subject: [PATCH 10/19] fix(output): sort maps for deterministic YAML output --- src/builder.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/builder.rs b/src/builder.rs index 452eec3..eba6c12 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -3,7 +3,7 @@ use openapiv3::{ Example, Info, MediaType, OpenAPI, Operation, PathItem, Paths, ReferenceOr, RequestBody, Response, Responses, Server, StatusCode, }; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use tracing::{debug, warn}; use crate::params; @@ -147,8 +147,8 @@ pub struct OpenApiBuilder { tags_overrides: Option>, compiled_templates: path_matching::CompiledTemplates, spec: OpenAPI, - examples_store: HashMap<(String, String, u16), Vec<(String, serde_json::Value)>>, - req_examples_store: HashMap<(String, String, String), Vec<(String, serde_json::Value)>>, + examples_store: BTreeMap<(String, String, u16), Vec<(String, serde_json::Value)>>, + req_examples_store: BTreeMap<(String, String, String), Vec<(String, serde_json::Value)>>, max_examples: usize, redactor: Option, operation_id_strategy: crate::operation_id::OperationIdStrategy, @@ -505,8 +505,8 @@ impl OpenApiBuilder { tags_overrides, compiled_templates, spec, - examples_store: HashMap::new(), - req_examples_store: HashMap::new(), + examples_store: BTreeMap::new(), + req_examples_store: BTreeMap::new(), max_examples: config.max_examples, redactor, operation_id_strategy, @@ -945,7 +945,7 @@ impl OpenApiBuilder { } } } - for ((path, method, status), examples) in self.examples_store.drain() { + for ((path, method, status), examples) in self.examples_store.into_iter() { let Some(ReferenceOr::Item(path_item)) = self.spec.paths.paths.get_mut(&path) else { continue; }; @@ -985,7 +985,7 @@ impl OpenApiBuilder { } media_type.examples = ex_map; } - for ((path, method, content_type), examples) in self.req_examples_store.drain() { + for ((path, method, content_type), examples) in self.req_examples_store.into_iter() { let Some(ReferenceOr::Item(path_item)) = self.spec.paths.paths.get_mut(&path) else { continue; }; @@ -1023,6 +1023,13 @@ impl OpenApiBuilder { } media_type.examples = ex_map; } + + self.spec.paths.paths.sort_keys(); + + if let Some(ref mut components) = self.spec.components { + components.schemas.sort_keys(); + } + self.spec } } From 15962859ec99296aa035bf55b51aa724dd1391a2 Mon Sep 17 00:00:00 2001 From: arkptz Date: Thu, 28 May 2026 02:04:22 +0300 Subject: [PATCH 11/19] test(integration): add end-to-end test for operationId, tags, and envelope test(integration): add end-to-end test for operationId, tags, and envelope --- tests/e2e_features.rs | 109 +++++++++++++++++++++++++ tests/fixtures/envelope_templates.yaml | 4 + tests/fixtures/envelope_test.har | 7 ++ tests/fixtures/tag-rules-test.yaml | 6 ++ 4 files changed, 126 insertions(+) create mode 100644 tests/e2e_features.rs create mode 100644 tests/fixtures/envelope_templates.yaml create mode 100644 tests/fixtures/envelope_test.har create mode 100644 tests/fixtures/tag-rules-test.yaml diff --git a/tests/e2e_features.rs b/tests/e2e_features.rs new file mode 100644 index 0000000..3cb757c --- /dev/null +++ b/tests/e2e_features.rs @@ -0,0 +1,109 @@ +use assert_cmd::Command; +use tempfile::TempDir; + +fn fixture(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(name) +} + +const PREFIX: &str = "https://api.example.com"; + +#[test] +fn e2e_all_features() { + let dir = TempDir::new().unwrap(); + let output = dir.path().join("openapi.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + fixture("envelope_test.har").to_str().unwrap(), + "-t", + fixture("envelope_templates.yaml").to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + "--operation-id-strategy", + "path", + "--tag-rules", + fixture("tag-rules-test.yaml").to_str().unwrap(), + "--envelope-discriminator", + "success", + ]) + .assert() + .success(); + + let content = std::fs::read_to_string(&output).unwrap(); + + // 1. Every operation should have operationId (--operation-id-strategy path) + assert!( + content.contains("operationId:"), + "expected operationId in output:\n{content}" + ); + + // 2. Tags from rules: Contract and Private + assert!( + content.contains("- Contract"), + "expected 'Contract' tag:\n{content}" + ); + assert!( + content.contains("- Private"), + "expected 'Private' tag:\n{content}" + ); + + // 3. ApiError schema in components (from envelope detection) + assert!( + content.contains("ApiError:"), + "expected ApiError in components:\n{content}" + ); + + // 4. oneOf present (ticker + fair_price both had mixed bodies) + assert!( + content.contains("oneOf:"), + "expected oneOf schema:\n{content}" + ); + + // 5. Output is valid YAML + let parsed: serde_yaml_ng::Value = + serde_yaml_ng::from_str(&content).expect("generated spec must be valid YAML"); + assert!(parsed.get("openapi").is_some()); + assert!(parsed.get("paths").is_some()); + assert!(parsed.get("components").is_some()); +} + +#[test] +fn e2e_no_flags_backward_compat() { + // Without new flags: no operationId, no components + let dir = TempDir::new().unwrap(); + let output = dir.path().join("openapi.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + fixture("envelope_test.har").to_str().unwrap(), + "-t", + fixture("envelope_templates.yaml").to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + ]) + .assert() + .success(); + + let content = std::fs::read_to_string(&output).unwrap(); + assert!( + !content.contains("operationId:"), + "operationId should NOT appear without --operation-id-strategy:\n{content}" + ); + assert!( + !content.contains("components:"), + "components should NOT appear without --envelope-discriminator:\n{content}" + ); +} diff --git a/tests/fixtures/envelope_templates.yaml b/tests/fixtures/envelope_templates.yaml new file mode 100644 index 0000000..b6f868e --- /dev/null +++ b/tests/fixtures/envelope_templates.yaml @@ -0,0 +1,4 @@ +x-path-templates: + - /api/v1/contract/ticker + - /api/v1/contract/fair_price/{symbol} + - /api/v1/private/order/place diff --git a/tests/fixtures/envelope_test.har b/tests/fixtures/envelope_test.har new file mode 100644 index 0000000..a721c1d --- /dev/null +++ b/tests/fixtures/envelope_test.har @@ -0,0 +1,7 @@ +{"log":{"version":"1.2","creator":{"name":"test","version":"1.0"},"entries":[ + {"startedDateTime":"2025-01-20T10:00:00.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/contract/ticker","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":60,"mimeType":"application/json","text":"{\"success\": true, \"data\": {\"price\": 42000.5}}"},"redirectURL":"","headersSize":-1,"bodySize":60},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-20T10:00:01.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/contract/ticker","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":55,"mimeType":"application/json","text":"{\"success\": false, \"code\": 1, \"message\": \"error\"}"},"redirectURL":"","headersSize":-1,"bodySize":55},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-20T10:00:02.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/contract/fair_price/BTC","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":65,"mimeType":"application/json","text":"{\"success\": true, \"data\": {\"fairPrice\": 42100.0}}"},"redirectURL":"","headersSize":-1,"bodySize":65},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-20T10:00:03.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/contract/fair_price/ETH","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":60,"mimeType":"application/json","text":"{\"success\": false, \"code\": 2, \"message\": \"symbol not found\"}"},"redirectURL":"","headersSize":-1,"bodySize":60},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-20T10:00:04.000Z","time":80,"request":{"method":"POST","url":"https://api.example.com/api/v1/private/order/place","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"queryString":[],"headersSize":-1,"bodySize":40,"postData":{"mimeType":"application/json","text":"{\"symbol\": \"BTC\", \"amount\": 0.1}"}},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":50,"mimeType":"application/json","text":"{\"success\": true, \"orderId\": \"ord_001\"}"},"redirectURL":"","headersSize":-1,"bodySize":50},"cache":{},"timings":{"send":1,"wait":70,"receive":9}} +]}} \ No newline at end of file diff --git a/tests/fixtures/tag-rules-test.yaml b/tests/fixtures/tag-rules-test.yaml new file mode 100644 index 0000000..67543b2 --- /dev/null +++ b/tests/fixtures/tag-rules-test.yaml @@ -0,0 +1,6 @@ +rules: + - match: "^/api/v1/contract/" + tag: Contract + - match: "^/api/v1/private/" + tag: Private +default: Default From e04e0836f97f58227ac9f6f6a6720ae1acf8c959 Mon Sep 17 00:00:00 2001 From: arkptz Date: Thu, 28 May 2026 02:08:47 +0300 Subject: [PATCH 12/19] docs: document operationId, tags, and envelope features --- CHANGELOG.md | 13 ++++++ README.md | 4 ++ book/src/usage/cli-reference.md | 9 ++++ book/src/usage/pipeline.md | 81 +++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a9ee12..e682e1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- *(generate)* add `--operation-id-strategy {none,path,template}` flag for stable operationId generation +- *(generate)* add `--operation-id-overrides ` for per-operation overrides via YAML +- *(generate)* add `--operation-id-template ` for custom operationId templates +- *(generate)* add `--tag-strategy {legacy,none,path-segment,rules}` flag +- *(generate)* add `--tag-rules ` for regex-based tag assignment from YAML +- *(generate)* add `--tag-segment-index ` to use a specific path segment as tag +- *(generate)* add `--envelope-discriminator ` for discriminator-based response splitting +- *(generate)* add `--envelope-error-shape ` for hand-supplied ApiError schema +- *(generate)* add `--envelope-success-component-suffix ` (default `Success`) +- *(output)* sort paths and component schemas alphabetically for deterministic YAML output + ## [0.6.0](https://github.com/Arkptz/mitm2openapi/compare/v0.5.2...v0.6.0) - 2026-05-27 ### Added diff --git a/README.md b/README.md index 56ad164..fcb7c8f 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,10 @@ Credit to [@alufers](https://github.com/alufers) for the original tool that pion - **Auto-detection** — heuristic format detection from file content - **Battle-tested** — integration tests against Swagger Petstore and OWASP crAPI with `oasdiff` verification - **Cross-platform** — Linux, macOS, Windows pre-built binaries +- **operationId generation** — `--operation-id-strategy path` derives stable camelCase names (e.g. `getFairPrice`, `placeOrder`); override per-operation via YAML +- **Tag rules** — `--tag-rules file.yaml` assigns one tag per operation by regex; supports `path-segment`, `none`, and `legacy` modes +- **Envelope detection** — `--envelope-discriminator success` splits MEXC-style `{success: bool}` envelopes into `oneOf(SuccessSchema, ApiError)` with shared `components/schemas/ApiError` +- **Deterministic output** — paths and schemas sorted alphabetically for byte-stable diffs across runs ## Installation diff --git a/book/src/usage/cli-reference.md b/book/src/usage/cli-reference.md index 4e8aa58..b09ed56 100644 --- a/book/src/usage/cli-reference.md +++ b/book/src/usage/cli-reference.md @@ -79,6 +79,15 @@ mitm2openapi generate [OPTIONS] -i -t -o -p ` | `5` | Maximum examples per endpoint per status code | | `--redact-patterns ` | | Comma-separated regex patterns to redact from examples | | `--redact-fields ` | | Comma-separated field names to redact from examples | +| `--operation-id-strategy ` | `none` | Strategy for operationId generation: `none`, `path`, `template` | +| `--operation-id-template ` | | Template with `{method}` and `{path}` placeholders (requires `template`) | +| `--operation-id-overrides ` | | YAML file with per-operation overrides | +| `--tag-strategy ` | `legacy` | Tag assignment strategy: `legacy`, `none`, `path-segment`, `rules` | +| `--tag-segment-index ` | | Path segment index for tag (requires `path-segment`) | +| `--tag-rules ` | | YAML rules file (auto-sets strategy to `rules`) | +| `--envelope-discriminator ` | | JSON field name for discriminating success vs error | +| `--envelope-error-shape ` | | YAML file with hand-supplied ApiError schema | +| `--envelope-success-component-suffix ` | `Success` | Suffix for success component names | ## Common flag details diff --git a/book/src/usage/pipeline.md b/book/src/usage/pipeline.md index 5236f0f..d78aea1 100644 --- a/book/src/usage/pipeline.md +++ b/book/src/usage/pipeline.md @@ -251,3 +251,84 @@ mitm2openapi generate \ The generated `openapi.yaml` is a valid OpenAPI 3.0 document that can be opened in [Swagger UI](https://github.com/swagger-api/swagger-ui), imported into Postman, or used as a contract for API testing. + +## Generating stable operationIds + +Use `--operation-id-strategy path` to generate camelCase operationIds that openapi-generator converts to readable Rust method names: + +```sh +mitm2openapi generate -i capture.har -t templates.yaml -o openapi.yaml -p https://api.example.com \ + --operation-id-strategy path +``` + +This produces ids like `listUsers`, `getUser`, `createOrder`, `placeOrder`. + +Override specific operations with a YAML file: + +```yaml +# overrides.yaml +"GET /api/v1/contract/fair_price/{symbol}": getFairPrice +"POST /api/v1/private/order/place": placeOrder +``` + +```sh +mitm2openapi generate ... --operation-id-strategy path --operation-id-overrides overrides.yaml +``` + +## Organizing operations with tags + +Tags group operations into modules (one Rust source file per tag in openapi-generator). Use regex-based rules: + +```yaml +# tag-rules.yaml +rules: + - match: "^/api/v1/contract/" + tag: Contract + - match: "^/api/v1/private/" + tag: Private +default: Market +``` + +```sh +mitm2openapi generate ... --tag-rules tag-rules.yaml +``` + +Or use a fixed path segment as the tag: + +```sh +mitm2openapi generate ... --tag-strategy path-segment --tag-segment-index 2 +``` + +## MEXC-style envelope APIs + +MEXC and similar exchange APIs always return HTTP 200 with a `success` boolean: + +```json +{"success": true, "data": {"price": 42000.5}} +{"success": false, "code": 1, "message": "Invalid symbol"} +``` + +Use `--envelope-discriminator` to split captured bodies into typed schemas: + +```sh +mitm2openapi generate \ + -i capture.har -t templates.yaml -o openapi.yaml \ + -p https://api.example.com \ + --operation-id-strategy path \ + --tag-rules tag-rules.yaml \ + --envelope-discriminator success +``` + +The generated spec will include: + +- A shared `components/schemas/ApiError` (inferred from all error bodies) +- Per-operation `{OperationId}Success` schemas +- `oneOf(SuccessSchema, ApiError)` for operations with mixed bodies + +Supply your own error schema instead of inferring: + +```sh +mitm2openapi generate ... \ + --envelope-discriminator success \ + --envelope-error-shape api-error.yaml +``` From efd7c8f372322562bcf6f0995442fe8051f7cd8a Mon Sep 17 00:00:00 2001 From: arkptz Date: Thu, 28 May 2026 11:44:04 +0300 Subject: [PATCH 13/19] fix(test): normalize CRLF in snapshot comparison, add .gitattributes eol=lf --- .gitattributes | 3 +++ tests/snapshot_compat.rs | 4 ++++ 2 files changed, 7 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..539052d --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +# Normalize line endings to LF for all text files. +# This prevents CRLF/LF mismatches on Windows CI. +* text=auto eol=lf diff --git a/tests/snapshot_compat.rs b/tests/snapshot_compat.rs index 77ef373..92f05a6 100644 --- a/tests/snapshot_compat.rs +++ b/tests/snapshot_compat.rs @@ -55,6 +55,10 @@ fn snapshot_compat() { ) }); + // Normalize line endings for cross-platform comparison (CRLF → LF on Windows CI) + let actual = actual.replace("\r\n", "\n"); + let expected = expected.replace("\r\n", "\n"); + if actual != expected { let actual_lines: Vec<&str> = actual.lines().collect(); let expected_lines: Vec<&str> = expected.lines().collect(); From 2d6ce37e21209aa183b83e0c3d99e9ba84135df6 Mon Sep 17 00:00:00 2001 From: arkptz Date: Thu, 28 May 2026 11:59:57 +0300 Subject: [PATCH 14/19] fix(envelope): merge all error bodies in infer_api_error, not just first --- src/envelope.rs | 81 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 7 deletions(-) diff --git a/src/envelope.rs b/src/envelope.rs index 607216a..d42897d 100644 --- a/src/envelope.rs +++ b/src/envelope.rs @@ -40,18 +40,64 @@ pub fn group_bodies(bodies: &[Value], discriminator: &str) -> (Vec, Vec Schema { if let Some(custom) = &config.error_shape { return custom.clone(); } - if let Some(first) = error_bodies.first() { - return crate::schema::value_to_schema(first); + if error_bodies.is_empty() { + return Schema { + schema_data: SchemaData::default(), + schema_kind: SchemaKind::Any(openapiv3::AnySchema::default()), + }; + } + merge_error_body_schemas(error_bodies) +} + +/// Merge multiple error body JSON values into a single schema. +/// +/// For each field across all bodies, picks the representative value whose JSON +/// type appears most frequently (majority vote), then converts the merged +/// object to a schema. +fn merge_error_body_schemas(bodies: &[Value]) -> Schema { + use std::collections::HashMap; + + let mut field_values: indexmap::IndexMap> = indexmap::IndexMap::new(); + for body in bodies { + if let Value::Object(obj) = body { + for (key, val) in obj { + field_values.entry(key.clone()).or_default().push(val); + } + } } - Schema { - schema_data: SchemaData::default(), - schema_kind: SchemaKind::Any(openapiv3::AnySchema::default()), + + let mut merged = serde_json::Map::new(); + for (key, values) in &field_values { + let mut type_counts: HashMap = HashMap::new(); + for val in values { + let disc = json_type_discriminant(val); + let entry = type_counts.entry(disc).or_insert((0, val)); + entry.0 += 1; + } + if let Some((_, representative)) = type_counts.into_values().max_by_key(|(count, _)| *count) + { + merged.insert(key.clone(), (*representative).clone()); + } + } + + crate::schema::value_to_schema(&Value::Object(merged)) +} + +fn json_type_discriminant(val: &Value) -> u8 { + match val { + Value::Null => 0, + Value::Bool(_) => 1, + Value::Number(_) => 2, + Value::String(_) => 3, + Value::Array(_) => 4, + Value::Object(_) => 5, } } @@ -201,6 +247,27 @@ mod tests { assert!(!name.is_empty()); } + #[test] + fn infer_api_error_merges_all_bodies_not_just_first() { + let bodies = vec![ + json!({"success": false, "code": 401, "msg": 0}), + json!({"success": false, "code": 401, "msg": "Not logged in"}), + json!({"success": false, "code": 401, "msg": "Please login first"}), + ]; + let config = EnvelopeConfig { + discriminator_field: "success".to_string(), + error_shape: None, + success_suffix: "Success".to_string(), + }; + let schema = infer_api_error(&bodies, &config); + let yaml = serde_yaml_ng::to_string(&schema).unwrap(); + assert!( + yaml.contains("msg:") + && (yaml.contains("type: string") || yaml.contains("- type: string")), + "msg must be string (or oneOf with string) when 2/3 samples are string:\n{yaml}" + ); + } + #[test] fn build_one_of_schema_structure() { let schema = build_one_of_schema( From eb3ffc9990d926e4bac50c5d2439885647337ca9 Mon Sep 17 00:00:00 2001 From: arkptz Date: Thu, 28 May 2026 12:00:57 +0300 Subject: [PATCH 15/19] fix(envelope): pin discriminator field to boolean enum:[false] in ApiError --- src/envelope.rs | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/envelope.rs b/src/envelope.rs index d42897d..0feef9e 100644 --- a/src/envelope.rs +++ b/src/envelope.rs @@ -53,7 +53,22 @@ pub fn infer_api_error(error_bodies: &[Value], config: &EnvelopeConfig) -> Schem schema_kind: SchemaKind::Any(openapiv3::AnySchema::default()), }; } - merge_error_body_schemas(error_bodies) + let mut schema = merge_error_body_schemas(error_bodies); + pin_discriminator_field(&mut schema, &config.discriminator_field); + schema +} + +fn pin_discriminator_field(schema: &mut Schema, field_name: &str) { + if let SchemaKind::Type(openapiv3::Type::Object(ref mut obj)) = schema.schema_kind { + let pinned = Schema { + schema_data: SchemaData::default(), + schema_kind: SchemaKind::Type(openapiv3::Type::Boolean(openapiv3::BooleanType { + enumeration: vec![Some(false)], + })), + }; + obj.properties + .insert(field_name.to_string(), ReferenceOr::Item(Box::new(pinned))); + } } /// Merge multiple error body JSON values into a single schema. @@ -268,6 +283,29 @@ mod tests { ); } + #[test] + fn inferred_api_error_includes_discriminator_field_pinned_to_false() { + let bodies = vec![ + json!({"success": false, "code": 401, "msg": "Not logged in"}), + json!({"success": false, "code": 99999, "msg": "System busy"}), + ]; + let config = EnvelopeConfig { + discriminator_field: "success".to_string(), + error_shape: None, + success_suffix: "Success".to_string(), + }; + let schema = infer_api_error(&bodies, &config); + let yaml = serde_yaml_ng::to_string(&schema).unwrap(); + assert!( + yaml.contains("success:"), + "discriminator field must be in ApiError:\n{yaml}" + ); + assert!( + yaml.contains("enum:") && yaml.contains("- false"), + "discriminator field must be pinned with enum: [false]:\n{yaml}" + ); + } + #[test] fn build_one_of_schema_structure() { let schema = build_one_of_schema( From d339fe29b777434c09a6ff8b4a9a3fc000d720d7 Mon Sep 17 00:00:00 2001 From: arkptz Date: Thu, 28 May 2026 12:01:20 +0300 Subject: [PATCH 16/19] fix(cli): stop splitting --redact-patterns on comma, fail invalid regex under --strict --- src/cli.rs | 2 +- src/main.rs | 11 ++++++ tests/redact_patterns.rs | 72 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/redact_patterns.rs diff --git a/src/cli.rs b/src/cli.rs index b6f4ceb..2908f95 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -212,7 +212,7 @@ pub struct GenerateArgs { #[arg(long, default_value_t = 5)] pub max_examples: usize, - #[arg(long, value_delimiter = ',')] + #[arg(long)] pub redact_patterns: Vec, #[arg(long, value_delimiter = ',')] diff --git a/src/main.rs b/src/main.rs index 328dca1..69207b1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -257,6 +257,17 @@ fn run(cli: Cli) -> Result { envelope_config, }; + if !config.redact_patterns.is_empty() || !config.redact_fields.is_empty() { + if let Err(e) = mitm2openapi::redact::Redactor::new( + &config.redact_patterns, + &config.redact_fields, + ) { + let msg = format!("invalid redact pattern: {e}"); + warn!(error = %e, "Invalid redact pattern"); + *report.events.parse_error.entry(msg).or_insert(0) += 1; + } + } + let mut builder = OpenApiBuilder::new(&args.prefix, &config, active_templates); let mut count = 0usize; for req_result in req_iter { diff --git a/tests/redact_patterns.rs b/tests/redact_patterns.rs new file mode 100644 index 0000000..d7d6a55 --- /dev/null +++ b/tests/redact_patterns.rs @@ -0,0 +1,72 @@ +use assert_cmd::Command; +use tempfile::TempDir; + +fn fixture(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(name) +} + +const PREFIX: &str = "https://api.example.com"; + +#[test] +fn redact_pattern_with_quantifier_comma_is_not_split() { + let dir = TempDir::new().unwrap(); + let output = dir.path().join("openapi.yaml"); + + // {8,64} quantifier contains a comma — clap must not split on it + let cmd = Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + fixture("snapshot_input.har").to_str().unwrap(), + "-t", + fixture("snapshot_templates.yaml").to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + "--redact-patterns", + "TOKEN[a-f0-9]{8,64}", + "--strict", + ]) + .output() + .unwrap(); + + let stderr = String::from_utf8_lossy(&cmd.stderr); + assert!( + !stderr.contains("unclosed counted repetition"), + "regex was truncated by clap comma-split:\n{stderr}" + ); + assert!( + cmd.status.success(), + "should succeed — pattern is valid:\n{stderr}" + ); +} + +#[test] +fn invalid_redact_pattern_fails_under_strict() { + let dir = TempDir::new().unwrap(); + let output = dir.path().join("openapi.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + fixture("snapshot_input.har").to_str().unwrap(), + "-t", + fixture("snapshot_templates.yaml").to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + "--redact-patterns", + "[unclosed", + "--strict", + ]) + .assert() + .failure(); +} From 3b3769c539e5d15106650300417917c4bbeabb74 Mon Sep 17 00:00:00 2001 From: arkptz Date: Thu, 28 May 2026 12:02:17 +0300 Subject: [PATCH 17/19] docs(changelog): add bugfix entries from MEXC integration testing --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e682e1a..d0fdd35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - *(generate)* add `--envelope-success-component-suffix ` (default `Success`) - *(output)* sort paths and component schemas alphabetically for deterministic YAML output +### Fixed + +- *(envelope)* `infer_api_error` now merges all error bodies, not just the first — an outlier `msg: 0` no longer overrides thousands of `msg: "string"` samples +- *(envelope)* inferred `ApiError` schema now includes the discriminator field pinned with `enum: [false]` +- *(cli)* `--redact-patterns` no longer splits on `,` — regex quantifiers like `{32,}` now work correctly (pass multiple patterns via repeated flags) +- *(cli)* invalid `--redact-patterns` regex now hard-fails under `--strict` instead of silently skipping redaction + +> Found and fixed by integration testing against a 3.1 GB MEXC capture in mexc-reversed-sdk. + ## [0.6.0](https://github.com/Arkptz/mitm2openapi/compare/v0.5.2...v0.6.0) - 2026-05-27 ### Added From 708a32d52c103c503810870208c2e7e7718d7eb1 Mon Sep 17 00:00:00 2001 From: arkptz Date: Thu, 28 May 2026 12:04:46 +0300 Subject: [PATCH 18/19] =?UTF-8?q?docs:=20update=20--redact-patterns=20usag?= =?UTF-8?q?e=20=E2=80=94=20one=20regex=20per=20flag,=20no=20comma=20split?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- book/src/usage/cli-reference.md | 2 +- book/src/usage/pipeline.md | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/book/src/usage/cli-reference.md b/book/src/usage/cli-reference.md index b09ed56..f39b9cd 100644 --- a/book/src/usage/cli-reference.md +++ b/book/src/usage/cli-reference.md @@ -77,7 +77,7 @@ mitm2openapi generate [OPTIONS] -i -t -o -p ` | | Write structured JSON processing report | | `--skip-options` | off | Filter out OPTIONS requests from output | | `--max-examples ` | `5` | Maximum examples per endpoint per status code | -| `--redact-patterns ` | | Comma-separated regex patterns to redact from examples | +| `--redact-patterns ` | | Regex pattern to redact from examples (repeat flag for multiple) | | `--redact-fields ` | | Comma-separated field names to redact from examples | | `--operation-id-strategy ` | `none` | Strategy for operationId generation: `none`, `path`, `template` | | `--operation-id-template ` | | Template with `{method}` and `{path}` placeholders (requires `template`) | diff --git a/book/src/usage/pipeline.md b/book/src/usage/pipeline.md index d78aea1..2eea556 100644 --- a/book/src/usage/pipeline.md +++ b/book/src/usage/pipeline.md @@ -201,11 +201,13 @@ mitm2openapi generate \ -t templates.yaml \ -o openapi.yaml \ -p "https://api.example.com" \ - --redact-patterns 'eyJ[\w-]+,sk-[a-zA-Z0-9]+' \ + --redact-patterns 'eyJ[\w-]+' \ + --redact-patterns 'sk-[a-zA-Z0-9]+' \ --redact-fields 'password,token,secret,authorization' ``` -`--redact-patterns` accepts comma-separated regexes matched against string values. +`--redact-patterns` takes one regex per flag — repeat the flag for multiple patterns. +Regexes with quantifiers like `{32,}` work correctly. `--redact-fields` accepts comma-separated field names whose values are replaced with `"[REDACTED]"`. From 582b9d7102531f74dbd68f8e8a21e0bab0b6aab7 Mon Sep 17 00:00:00 2001 From: arkptz Date: Thu, 28 May 2026 12:11:20 +0300 Subject: [PATCH 19/19] fix(cli): show correct value name and help text for --redact-patterns --- src/cli.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cli.rs b/src/cli.rs index 2908f95..4a7ce0b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -212,9 +212,11 @@ pub struct GenerateArgs { #[arg(long, default_value_t = 5)] pub max_examples: usize, - #[arg(long)] + /// Regex pattern to redact from examples (repeat for multiple) + #[arg(long, value_name = "REGEX")] pub redact_patterns: Vec, + /// Comma-separated field names to redact from examples #[arg(long, value_delimiter = ',')] pub redact_fields: Vec,