diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..539052d --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +# Normalize line endings to LF for all text files. +# This prevents CRLF/LF mismatches on Windows CI. +* text=auto eol=lf diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a9ee12..d0fdd35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- *(generate)* add `--operation-id-strategy {none,path,template}` flag for stable operationId generation +- *(generate)* add `--operation-id-overrides ` for per-operation overrides via YAML +- *(generate)* add `--operation-id-template ` for custom operationId templates +- *(generate)* add `--tag-strategy {legacy,none,path-segment,rules}` flag +- *(generate)* add `--tag-rules ` for regex-based tag assignment from YAML +- *(generate)* add `--tag-segment-index ` to use a specific path segment as tag +- *(generate)* add `--envelope-discriminator ` for discriminator-based response splitting +- *(generate)* add `--envelope-error-shape ` for hand-supplied ApiError schema +- *(generate)* add `--envelope-success-component-suffix ` (default `Success`) +- *(output)* sort paths and component schemas alphabetically for deterministic YAML output + +### Fixed + +- *(envelope)* `infer_api_error` now merges all error bodies, not just the first — an outlier `msg: 0` no longer overrides thousands of `msg: "string"` samples +- *(envelope)* inferred `ApiError` schema now includes the discriminator field pinned with `enum: [false]` +- *(cli)* `--redact-patterns` no longer splits on `,` — regex quantifiers like `{32,}` now work correctly (pass multiple patterns via repeated flags) +- *(cli)* invalid `--redact-patterns` regex now hard-fails under `--strict` instead of silently skipping redaction + +> Found and fixed by integration testing against a 3.1 GB MEXC capture in mexc-reversed-sdk. + ## [0.6.0](https://github.com/Arkptz/mitm2openapi/compare/v0.5.2...v0.6.0) - 2026-05-27 ### Added diff --git a/README.md b/README.md index 56ad164..fcb7c8f 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,10 @@ Credit to [@alufers](https://github.com/alufers) for the original tool that pion - **Auto-detection** — heuristic format detection from file content - **Battle-tested** — integration tests against Swagger Petstore and OWASP crAPI with `oasdiff` verification - **Cross-platform** — Linux, macOS, Windows pre-built binaries +- **operationId generation** — `--operation-id-strategy path` derives stable camelCase names (e.g. `getFairPrice`, `placeOrder`); override per-operation via YAML +- **Tag rules** — `--tag-rules file.yaml` assigns one tag per operation by regex; supports `path-segment`, `none`, and `legacy` modes +- **Envelope detection** — `--envelope-discriminator success` splits MEXC-style `{success: bool}` envelopes into `oneOf(SuccessSchema, ApiError)` with shared `components/schemas/ApiError` +- **Deterministic output** — paths and schemas sorted alphabetically for byte-stable diffs across runs ## Installation diff --git a/book/src/usage/cli-reference.md b/book/src/usage/cli-reference.md index 4e8aa58..f39b9cd 100644 --- a/book/src/usage/cli-reference.md +++ b/book/src/usage/cli-reference.md @@ -77,8 +77,17 @@ mitm2openapi generate [OPTIONS] -i -t -o -p ` | | Write structured JSON processing report | | `--skip-options` | off | Filter out OPTIONS requests from output | | `--max-examples ` | `5` | Maximum examples per endpoint per status code | -| `--redact-patterns ` | | Comma-separated regex patterns to redact from examples | +| `--redact-patterns ` | | Regex pattern to redact from examples (repeat flag for multiple) | | `--redact-fields ` | | Comma-separated field names to redact from examples | +| `--operation-id-strategy ` | `none` | Strategy for operationId generation: `none`, `path`, `template` | +| `--operation-id-template ` | | Template with `{method}` and `{path}` placeholders (requires `template`) | +| `--operation-id-overrides ` | | YAML file with per-operation overrides | +| `--tag-strategy ` | `legacy` | Tag assignment strategy: `legacy`, `none`, `path-segment`, `rules` | +| `--tag-segment-index ` | | Path segment index for tag (requires `path-segment`) | +| `--tag-rules ` | | YAML rules file (auto-sets strategy to `rules`) | +| `--envelope-discriminator ` | | JSON field name for discriminating success vs error | +| `--envelope-error-shape ` | | YAML file with hand-supplied ApiError schema | +| `--envelope-success-component-suffix ` | `Success` | Suffix for success component names | ## Common flag details diff --git a/book/src/usage/pipeline.md b/book/src/usage/pipeline.md index 5236f0f..2eea556 100644 --- a/book/src/usage/pipeline.md +++ b/book/src/usage/pipeline.md @@ -201,11 +201,13 @@ mitm2openapi generate \ -t templates.yaml \ -o openapi.yaml \ -p "https://api.example.com" \ - --redact-patterns 'eyJ[\w-]+,sk-[a-zA-Z0-9]+' \ + --redact-patterns 'eyJ[\w-]+' \ + --redact-patterns 'sk-[a-zA-Z0-9]+' \ --redact-fields 'password,token,secret,authorization' ``` -`--redact-patterns` accepts comma-separated regexes matched against string values. +`--redact-patterns` takes one regex per flag — repeat the flag for multiple patterns. +Regexes with quantifiers like `{32,}` work correctly. `--redact-fields` accepts comma-separated field names whose values are replaced with `"[REDACTED]"`. @@ -251,3 +253,84 @@ mitm2openapi generate \ The generated `openapi.yaml` is a valid OpenAPI 3.0 document that can be opened in [Swagger UI](https://github.com/swagger-api/swagger-ui), imported into Postman, or used as a contract for API testing. + +## Generating stable operationIds + +Use `--operation-id-strategy path` to generate camelCase operationIds that openapi-generator converts to readable Rust method names: + +```sh +mitm2openapi generate -i capture.har -t templates.yaml -o openapi.yaml -p https://api.example.com \ + --operation-id-strategy path +``` + +This produces ids like `listUsers`, `getUser`, `createOrder`, `placeOrder`. + +Override specific operations with a YAML file: + +```yaml +# overrides.yaml +"GET /api/v1/contract/fair_price/{symbol}": getFairPrice +"POST /api/v1/private/order/place": placeOrder +``` + +```sh +mitm2openapi generate ... --operation-id-strategy path --operation-id-overrides overrides.yaml +``` + +## Organizing operations with tags + +Tags group operations into modules (one Rust source file per tag in openapi-generator). Use regex-based rules: + +```yaml +# tag-rules.yaml +rules: + - match: "^/api/v1/contract/" + tag: Contract + - match: "^/api/v1/private/" + tag: Private +default: Market +``` + +```sh +mitm2openapi generate ... --tag-rules tag-rules.yaml +``` + +Or use a fixed path segment as the tag: + +```sh +mitm2openapi generate ... --tag-strategy path-segment --tag-segment-index 2 +``` + +## MEXC-style envelope APIs + +MEXC and similar exchange APIs always return HTTP 200 with a `success` boolean: + +```json +{"success": true, "data": {"price": 42000.5}} +{"success": false, "code": 1, "message": "Invalid symbol"} +``` + +Use `--envelope-discriminator` to split captured bodies into typed schemas: + +```sh +mitm2openapi generate \ + -i capture.har -t templates.yaml -o openapi.yaml \ + -p https://api.example.com \ + --operation-id-strategy path \ + --tag-rules tag-rules.yaml \ + --envelope-discriminator success +``` + +The generated spec will include: + +- A shared `components/schemas/ApiError` (inferred from all error bodies) +- Per-operation `{OperationId}Success` schemas +- `oneOf(SuccessSchema, ApiError)` for operations with mixed bodies + +Supply your own error schema instead of inferring: + +```sh +mitm2openapi generate ... \ + --envelope-discriminator success \ + --envelope-error-shape api-error.yaml +``` diff --git a/src/builder.rs b/src/builder.rs index 2ad5b8f..eba6c12 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -3,7 +3,7 @@ use openapiv3::{ Example, Info, MediaType, OpenAPI, Operation, PathItem, Paths, ReferenceOr, RequestBody, Response, Responses, Server, StatusCode, }; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use tracing::{debug, warn}; use crate::params; @@ -143,13 +143,17 @@ pub fn discover_paths( pub struct OpenApiBuilder { prefix: String, config: Config, + tag_strategy: crate::tag_rules::TagStrategy, tags_overrides: Option>, compiled_templates: path_matching::CompiledTemplates, spec: OpenAPI, - examples_store: HashMap<(String, String, u16), Vec<(String, serde_json::Value)>>, - req_examples_store: HashMap<(String, String, String), Vec<(String, serde_json::Value)>>, + examples_store: BTreeMap<(String, String, u16), Vec<(String, serde_json::Value)>>, + req_examples_store: BTreeMap<(String, String, String), Vec<(String, serde_json::Value)>>, max_examples: usize, redactor: Option, + operation_id_strategy: crate::operation_id::OperationIdStrategy, + operation_id_overrides: HashMap, + envelope_config: Option, } fn extract_tag( @@ -294,6 +298,20 @@ fn parse_body(body: &[u8], content_type: Option<&str>) -> Option<(String, serde_ None } +fn get_operation_ref<'a>(path_item: &'a PathItem, method: &str) -> Option<&'a Option> { + match method.to_uppercase().as_str() { + "GET" => Some(&path_item.get), + "PUT" => Some(&path_item.put), + "POST" => Some(&path_item.post), + "DELETE" => Some(&path_item.delete), + "OPTIONS" => Some(&path_item.options), + "HEAD" => Some(&path_item.head), + "PATCH" => Some(&path_item.patch), + "TRACE" => Some(&path_item.trace), + _ => None, + } +} + /// Get the method-specific operation slot from a PathItem (mutable). /// Returns `None` for HTTP methods not supported by the OpenAPI spec. fn get_operation_mut<'a>( @@ -475,16 +493,25 @@ impl OpenApiBuilder { None }; + let tag_strategy = config.tag_strategy.clone(); + let operation_id_strategy = config.operation_id_strategy.clone(); + let operation_id_overrides = config.operation_id_overrides.clone(); + let envelope_config = config.envelope_config.clone(); + Self { prefix: prefix.to_string(), config: config.clone(), + tag_strategy, tags_overrides, compiled_templates, spec, - examples_store: HashMap::new(), - req_examples_store: HashMap::new(), + examples_store: BTreeMap::new(), + req_examples_store: BTreeMap::new(), max_examples: config.max_examples, redactor, + operation_id_strategy, + operation_id_overrides, + envelope_config, } } @@ -639,10 +666,36 @@ impl OpenApiBuilder { ..Operation::default() }; - if let Some(tag) = extract_tag(&template_path, &self.tags_overrides) { - operation.tags = vec![tag]; + match &self.tag_strategy { + crate::tag_rules::TagStrategy::Legacy => { + if let Some(tag) = extract_tag(&template_path, &self.tags_overrides) { + operation.tags = vec![tag]; + } + } + crate::tag_rules::TagStrategy::None => { + // suppress tags — leave operation.tags empty + } + crate::tag_rules::TagStrategy::PathSegment { .. } + | crate::tag_rules::TagStrategy::Rules { .. } => { + if let Some(tag) = crate::tag_rules::resolve_tag(&self.tag_strategy, &template_path) + { + operation.tags = vec![tag]; + } + } } + let override_key = format!("{} {}", method, template_path); + let op_id = if let Some(id) = self.operation_id_overrides.get(&override_key) { + Some(id.clone()) + } else { + crate::operation_id::derive_operation_id( + &method, + &template_path, + &self.operation_id_strategy, + ) + }; + operation.operation_id = op_id; + if !self.config.suppress_params { let mut parameters: Vec> = Vec::new(); @@ -730,7 +783,169 @@ impl OpenApiBuilder { /// Get the assembled OpenAPI spec. pub fn build(mut self) -> OpenAPI { - for ((path, method, status), examples) in self.examples_store.drain() { + if !matches!( + self.operation_id_strategy, + crate::operation_id::OperationIdStrategy::None + ) { + let mut ops: Vec<(String, String, Option)> = Vec::new(); + for (path, path_ref) in &self.spec.paths.paths { + if let ReferenceOr::Item(path_item) = path_ref { + for method in &[ + "GET", "PUT", "POST", "DELETE", "OPTIONS", "HEAD", "PATCH", "TRACE", + ] { + if let Some(Some(op)) = get_operation_ref(path_item, method) { + ops.push((path.clone(), method.to_string(), op.operation_id.clone())); + } + } + } + } + crate::operation_id::resolve_collisions(&mut ops); + for (path, method, resolved_id) in ops { + if let Some(ReferenceOr::Item(path_item)) = self.spec.paths.paths.get_mut(&path) { + if let Some(slot) = get_operation_mut(path_item, &method) { + if let Some(op) = slot.as_mut() { + op.operation_id = resolved_id; + } + } + } + } + } + + // Envelope detection: MUST run before examples_store.drain() consumes raw bodies. + if let Some(ref envelope_cfg) = self.envelope_config { + let mut all_error_bodies: Vec = Vec::new(); + let mut components_schemas: indexmap::IndexMap> = + indexmap::IndexMap::new(); + + struct EnvelopeChange { + path: String, + method: String, + success_name: String, + success_schema: openapiv3::Schema, + one_of: ReferenceOr, + } + let mut changes: Vec = Vec::new(); + + for ((path, method, status), body_examples) in &self.examples_store { + if *status != 200 { + continue; + } + let bodies: Vec = + body_examples.iter().map(|(_, v)| v.clone()).collect(); + let (_, error_bodies) = + crate::envelope::group_bodies(&bodies, &envelope_cfg.discriminator_field); + + if error_bodies.is_empty() { + continue; + } + + all_error_bodies.extend(error_bodies.iter().cloned()); + + let op_id = self + .spec + .paths + .paths + .get(path.as_str()) + .and_then(|p| { + if let ReferenceOr::Item(pi) = p { + Some(pi) + } else { + None + } + }) + .and_then(|pi| get_operation_ref(pi, method)) + .and_then(|s| s.as_ref()) + .and_then(|op| op.operation_id.as_deref().map(String::from)); + + let success_schema = { + let path_ref = self.spec.paths.paths.get(path.as_str()); + let path_item = match path_ref { + Some(ReferenceOr::Item(pi)) => pi, + _ => continue, + }; + let op = match get_operation_ref(path_item, method) { + Some(Some(op)) => op, + _ => continue, + }; + let resp = match op.responses.responses.get(&StatusCode::Code(200)) { + Some(ReferenceOr::Item(r)) => r, + _ => continue, + }; + let mt = match resp.content.values().next() { + Some(mt) => mt, + None => continue, + }; + match &mt.schema { + Some(ReferenceOr::Item(schema)) => schema.clone(), + _ => continue, + } + }; + + let success_name = crate::envelope::success_component_name( + op_id.as_deref(), + path, + method, + &envelope_cfg.success_suffix, + ); + + let success_ref_str = format!("#/components/schemas/{success_name}"); + let error_ref_str = "#/components/schemas/ApiError".to_string(); + let one_of = crate::envelope::build_one_of_schema( + &success_ref_str, + &error_ref_str, + &envelope_cfg.discriminator_field, + ); + + changes.push(EnvelopeChange { + path: path.clone(), + method: method.clone(), + success_name, + success_schema, + one_of, + }); + } + + for change in changes { + components_schemas.insert( + change.success_name, + ReferenceOr::Item(change.success_schema), + ); + + if let Some(ReferenceOr::Item(path_item)) = + self.spec.paths.paths.get_mut(change.path.as_str()) + { + if let Some(slot) = get_operation_mut(path_item, &change.method) { + if let Some(op) = slot.as_mut() { + if let Some(ReferenceOr::Item(resp)) = + op.responses.responses.get_mut(&StatusCode::Code(200)) + { + if let Some(mt) = resp.content.values_mut().next() { + mt.schema = Some(change.one_of); + } + } + } + } + } + } + + if !all_error_bodies.is_empty() { + let api_error_schema = + crate::envelope::infer_api_error(&all_error_bodies, envelope_cfg); + components_schemas + .insert("ApiError".to_string(), ReferenceOr::Item(api_error_schema)); + } + + if !components_schemas.is_empty() { + let components = self + .spec + .components + .get_or_insert_with(openapiv3::Components::default); + for (name, schema) in components_schemas { + components.schemas.insert(name, schema); + } + } + } + for ((path, method, status), examples) in self.examples_store.into_iter() { let Some(ReferenceOr::Item(path_item)) = self.spec.paths.paths.get_mut(&path) else { continue; }; @@ -770,7 +985,7 @@ impl OpenApiBuilder { } media_type.examples = ex_map; } - for ((path, method, content_type), examples) in self.req_examples_store.drain() { + for ((path, method, content_type), examples) in self.req_examples_store.into_iter() { let Some(ReferenceOr::Item(path_item)) = self.spec.paths.paths.get_mut(&path) else { continue; }; @@ -808,6 +1023,13 @@ impl OpenApiBuilder { } media_type.examples = ex_map; } + + self.spec.paths.paths.sort_keys(); + + if let Some(ref mut components) = self.spec.components { + components.schemas.sort_keys(); + } + self.spec } } @@ -910,18 +1132,9 @@ mod tests { fn test_config() -> Config { Config { prefix: "https://api.example.com".to_string(), - openapi_title: None, openapi_version: "1.0.0".to_string(), - exclude_headers: vec![], - exclude_cookies: vec![], - include_headers: false, - ignore_images: false, - suppress_params: false, - tags_overrides: None, - skip_options: false, max_examples: 5, - redact_patterns: vec![], - redact_fields: vec![], + ..Default::default() } } diff --git a/src/cli.rs b/src/cli.rs index 0e08f70..4a7ce0b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -15,7 +15,7 @@ pub enum Command { /// Discover API endpoints from captured traffic and produce a templates file Discover(DiscoverArgs), /// Generate an OpenAPI specification from captured traffic using a templates file - Generate(GenerateArgs), + Generate(Box), } /// Input format for traffic captures @@ -30,6 +30,32 @@ pub enum InputFormat { Mitmproxy, } +/// Tag strategy for operations +#[derive(ValueEnum, Clone, Debug, Default)] +pub enum TagStrategyArg { + /// Default: first non-param path segment (existing behavior) + #[default] + Legacy, + /// Suppress all tags + None, + /// Extract segment at given index + PathSegment, + /// Use regex rules file + Rules, +} + +/// operationId strategy for operations +#[derive(ValueEnum, Clone, Debug, Default)] +pub enum OperationIdStrategyArg { + /// Do not generate operationId (default) + #[default] + None, + /// Derive from HTTP method + path + Path, + /// Use custom template with {method} and {path} placeholders + Template, +} + fn parse_byte_size(s: &str) -> Result { let s = s.trim(); let (num_str, multiplier) = if let Some(n) = s.strip_suffix("GiB") { @@ -186,9 +212,45 @@ pub struct GenerateArgs { #[arg(long, default_value_t = 5)] pub max_examples: usize, - #[arg(long, value_delimiter = ',')] + /// Regex pattern to redact from examples (repeat for multiple) + #[arg(long, value_name = "REGEX")] pub redact_patterns: Vec, + /// Comma-separated field names to redact from examples #[arg(long, value_delimiter = ',')] pub redact_fields: Vec, + + #[arg(long, value_enum, default_value_t = TagStrategyArg::Legacy)] + pub tag_strategy: TagStrategyArg, + + #[arg(long)] + pub tag_segment_index: Option, + + #[arg(long)] + pub tag_rules: Option, + + #[arg(long, value_enum, default_value_t = OperationIdStrategyArg::None)] + pub operation_id_strategy: OperationIdStrategyArg, + + #[arg(long)] + pub operation_id_template: Option, + + #[arg(long)] + pub operation_id_overrides: Option, + + /// Discriminator field name for envelope detection. + /// When set, 200 responses with both success:true and success:false + /// bodies will be split into oneOf(Success, ApiError). + #[arg(long)] + pub envelope_discriminator: Option, + + /// Path to YAML file containing the hand-supplied ApiError schema. + /// If omitted, schema is inferred from captured error bodies. + #[arg(long)] + pub envelope_error_shape: Option, + + /// Suffix for success component names (default: "Success"). + /// E.g., operationId "getFairPrice" → "GetFairPriceSuccess" + #[arg(long, default_value = "Success")] + pub envelope_success_component_suffix: String, } diff --git a/src/envelope.rs b/src/envelope.rs new file mode 100644 index 0000000..0feef9e --- /dev/null +++ b/src/envelope.rs @@ -0,0 +1,328 @@ +//! Envelope-based response splitting. +//! +//! Many APIs wrap every response in a `{ "success": bool, ... }` envelope. +//! This module classifies captured response bodies into *success* vs *error* +//! groups based on a discriminator field, infers an `ApiError` schema from the +//! error examples, and builds a `oneOf` schema with a discriminator annotation. + +use openapiv3::{Discriminator, ReferenceOr, Schema, SchemaData, SchemaKind}; +use serde_json::Value; + +/// Configuration for envelope-based response splitting. +#[derive(Clone, Debug)] +pub struct EnvelopeConfig { + /// JSON field name used as the discriminator (e.g. `"success"`). + pub discriminator_field: String, + /// Optional pre-defined error schema; skips inference when set. + pub error_shape: Option, + /// Suffix appended to component names (e.g. `"Success"`). + pub success_suffix: String, +} + +/// Group response bodies into (success, error) based on a discriminator field. +/// +/// Classification: only a JSON boolean `true` at `discriminator` counts as +/// success. Everything else — `false`, `null`, strings, numbers, or a missing +/// field — is classified as error. +pub fn group_bodies(bodies: &[Value], discriminator: &str) -> (Vec, Vec) { + let mut success = Vec::new(); + let mut error = Vec::new(); + for body in bodies { + if body.get(discriminator) == Some(&Value::Bool(true)) { + success.push(body.clone()); + } else { + error.push(body.clone()); + } + } + (success, error) +} + +/// Infer an `ApiError` schema from error body examples. +/// +/// If `config.error_shape` is set, returns that directly. +/// Otherwise merges all error bodies into a single schema using majority-vote +/// type selection per field. Falls back to an empty `Any` schema when no +/// examples exist. +pub fn infer_api_error(error_bodies: &[Value], config: &EnvelopeConfig) -> Schema { + if let Some(custom) = &config.error_shape { + return custom.clone(); + } + if error_bodies.is_empty() { + return Schema { + schema_data: SchemaData::default(), + schema_kind: SchemaKind::Any(openapiv3::AnySchema::default()), + }; + } + let mut schema = merge_error_body_schemas(error_bodies); + pin_discriminator_field(&mut schema, &config.discriminator_field); + schema +} + +fn pin_discriminator_field(schema: &mut Schema, field_name: &str) { + if let SchemaKind::Type(openapiv3::Type::Object(ref mut obj)) = schema.schema_kind { + let pinned = Schema { + schema_data: SchemaData::default(), + schema_kind: SchemaKind::Type(openapiv3::Type::Boolean(openapiv3::BooleanType { + enumeration: vec![Some(false)], + })), + }; + obj.properties + .insert(field_name.to_string(), ReferenceOr::Item(Box::new(pinned))); + } +} + +/// Merge multiple error body JSON values into a single schema. +/// +/// For each field across all bodies, picks the representative value whose JSON +/// type appears most frequently (majority vote), then converts the merged +/// object to a schema. +fn merge_error_body_schemas(bodies: &[Value]) -> Schema { + use std::collections::HashMap; + + let mut field_values: indexmap::IndexMap> = indexmap::IndexMap::new(); + for body in bodies { + if let Value::Object(obj) = body { + for (key, val) in obj { + field_values.entry(key.clone()).or_default().push(val); + } + } + } + + let mut merged = serde_json::Map::new(); + for (key, values) in &field_values { + let mut type_counts: HashMap = HashMap::new(); + for val in values { + let disc = json_type_discriminant(val); + let entry = type_counts.entry(disc).or_insert((0, val)); + entry.0 += 1; + } + if let Some((_, representative)) = type_counts.into_values().max_by_key(|(count, _)| *count) + { + merged.insert(key.clone(), (*representative).clone()); + } + } + + crate::schema::value_to_schema(&Value::Object(merged)) +} + +fn json_type_discriminant(val: &Value) -> u8 { + match val { + Value::Null => 0, + Value::Bool(_) => 1, + Value::Number(_) => 2, + Value::String(_) => 3, + Value::Array(_) => 4, + Value::Object(_) => 5, + } +} + +/// Build a `oneOf` schema combining a success `$ref` and an error `$ref`, +/// annotated with an OpenAPI discriminator. +pub fn build_one_of_schema( + success_ref: &str, + error_ref: &str, + discriminator_field: &str, +) -> ReferenceOr { + let one_of = vec![ReferenceOr::ref_(success_ref), ReferenceOr::ref_(error_ref)]; + + ReferenceOr::Item(Schema { + schema_data: SchemaData { + discriminator: Some(Discriminator { + property_name: discriminator_field.to_string(), + mapping: indexmap::IndexMap::new(), + extensions: indexmap::IndexMap::new(), + }), + ..SchemaData::default() + }, + schema_kind: SchemaKind::OneOf { one_of }, + }) +} + +/// Derive a PascalCase component name for the success schema. +/// +/// Prefers `operationId` when available (uppercasing the first letter), +/// otherwise falls back to `Method` + path segments with each segment +/// capitalised. +pub fn success_component_name( + operation_id: Option<&str>, + path: &str, + method: &str, + suffix: &str, +) -> String { + if let Some(op_id) = operation_id { + let mut chars = op_id.chars(); + return match chars.next() { + Some(c) => { + let upper: String = c.to_uppercase().collect(); + format!("{upper}{}{suffix}", chars.as_str()) + } + None => suffix.to_string(), + }; + } + + let path_part: String = path + .split('/') + .filter(|s| !s.is_empty()) + .map(|s| { + let s = s.trim_matches(|c: char| c == '{' || c == '}'); + let mut chars = s.chars(); + match chars.next() { + Some(c) => { + let upper: String = c.to_uppercase().collect(); + format!("{upper}{}", chars.as_str()) + } + None => String::new(), + } + }) + .collect(); + + let method_part = { + let mut chars = method.chars(); + match chars.next() { + Some(c) => { + let upper: String = c.to_uppercase().collect(); + format!("{upper}{}", chars.as_str().to_lowercase()) + } + None => String::new(), + } + }; + + format!("{method_part}{path_part}{suffix}") +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn group_by_discriminator() { + let bodies = vec![ + json!({"success": true, "data": {}}), + json!({"success": true, "data": {"price": 1.0}}), + json!({"success": true, "data": {"price": 2.0}}), + json!({"success": false, "code": 1, "message": "err"}), + ]; + let (success, error) = group_bodies(&bodies, "success"); + assert_eq!(success.len(), 3); + assert_eq!(error.len(), 1); + } + + #[test] + fn only_success_unchanged() { + let bodies = vec![json!({"success": true, "data": {}})]; + let (success, error) = group_bodies(&bodies, "success"); + assert_eq!(success.len(), 1); + assert!(error.is_empty()); + } + + #[test] + fn non_boolean_discriminator_is_error() { + let bodies = vec![ + json!({"success": 1}), + json!({"success": "yes"}), + json!({"success": null}), + ]; + let (success, error) = group_bodies(&bodies, "success"); + assert!(success.is_empty()); + assert_eq!(error.len(), 3); + } + + #[test] + fn missing_discriminator_field_is_error() { + let bodies = vec![json!({"data": {}})]; + let (success, error) = group_bodies(&bodies, "success"); + assert!(success.is_empty()); + assert_eq!(error.len(), 1); + } + + #[test] + fn zero_error_bodies() { + let bodies = vec![json!({"success": true, "data": {}})]; + let (success, error) = group_bodies(&bodies, "success"); + assert_eq!(success.len(), 1); + assert!(error.is_empty()); + } + + #[test] + fn success_component_name_from_operation_id() { + let name = success_component_name( + Some("getFairPrice"), + "/api/v1/contract/fair_price/{symbol}", + "GET", + "Success", + ); + assert_eq!(name, "GetFairPriceSuccess"); + } + + #[test] + fn success_component_name_fallback() { + let name = success_component_name(None, "/api/v1/users/{id}", "GET", "Success"); + assert!(name.contains("Success")); + assert!(!name.is_empty()); + } + + #[test] + fn infer_api_error_merges_all_bodies_not_just_first() { + let bodies = vec![ + json!({"success": false, "code": 401, "msg": 0}), + json!({"success": false, "code": 401, "msg": "Not logged in"}), + json!({"success": false, "code": 401, "msg": "Please login first"}), + ]; + let config = EnvelopeConfig { + discriminator_field: "success".to_string(), + error_shape: None, + success_suffix: "Success".to_string(), + }; + let schema = infer_api_error(&bodies, &config); + let yaml = serde_yaml_ng::to_string(&schema).unwrap(); + assert!( + yaml.contains("msg:") + && (yaml.contains("type: string") || yaml.contains("- type: string")), + "msg must be string (or oneOf with string) when 2/3 samples are string:\n{yaml}" + ); + } + + #[test] + fn inferred_api_error_includes_discriminator_field_pinned_to_false() { + let bodies = vec![ + json!({"success": false, "code": 401, "msg": "Not logged in"}), + json!({"success": false, "code": 99999, "msg": "System busy"}), + ]; + let config = EnvelopeConfig { + discriminator_field: "success".to_string(), + error_shape: None, + success_suffix: "Success".to_string(), + }; + let schema = infer_api_error(&bodies, &config); + let yaml = serde_yaml_ng::to_string(&schema).unwrap(); + assert!( + yaml.contains("success:"), + "discriminator field must be in ApiError:\n{yaml}" + ); + assert!( + yaml.contains("enum:") && yaml.contains("- false"), + "discriminator field must be pinned with enum: [false]:\n{yaml}" + ); + } + + #[test] + fn build_one_of_schema_structure() { + let schema = build_one_of_schema( + "#/components/schemas/GetTickerSuccess", + "#/components/schemas/ApiError", + "success", + ); + if let ReferenceOr::Item(s) = schema { + match &s.schema_kind { + SchemaKind::OneOf { one_of } => { + assert_eq!(one_of.len(), 2); + } + other => panic!("Expected OneOf, got {other:?}"), + } + assert!(s.schema_data.discriminator.is_some()); + } else { + panic!("Expected Item, got Ref"); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index ef2d0bf..d6ae4d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,15 +20,18 @@ pub mod builder; pub mod cli; +pub mod envelope; pub mod error; pub mod har_reader; pub mod mitmproxy_reader; +pub mod operation_id; pub mod output; pub mod params; pub mod path_matching; pub mod redact; pub mod report; pub mod schema; +pub mod tag_rules; pub mod tnetstring; pub(crate) mod type_hints; pub mod types; diff --git a/src/main.rs b/src/main.rs index c0a810f..69207b1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,7 @@ use clap::Parser; use tracing::{debug, info, warn}; use mitm2openapi::builder::{self, OpenApiBuilder}; -use mitm2openapi::cli::{Cli, Command, InputFormat}; +use mitm2openapi::cli::{Cli, Command, InputFormat, OperationIdStrategyArg, TagStrategyArg}; use mitm2openapi::har_reader; use mitm2openapi::mitmproxy_reader; use mitm2openapi::output; @@ -127,6 +127,7 @@ fn run(cli: Cli) -> Result { check_strict(strict, &report) } Command::Generate(args) => { + let args = *args; info!(input = %args.input.display(), output = %args.output.display(), "Starting generation"); let strict = args.strict; @@ -162,6 +163,80 @@ fn run(cli: Cli) -> Result { info!(count = active_templates.len(), "Using active templates"); + let tag_strategy = match &args.tag_strategy { + TagStrategyArg::Legacy => mitm2openapi::tag_rules::TagStrategy::Legacy, + TagStrategyArg::None => mitm2openapi::tag_rules::TagStrategy::None, + TagStrategyArg::PathSegment => { + let index = args.tag_segment_index.unwrap_or(0); + mitm2openapi::tag_rules::TagStrategy::PathSegment { index } + } + TagStrategyArg::Rules => { + let rules_path = args + .tag_rules + .as_ref() + .context("--tag-rules path required when --tag-strategy=rules")?; + mitm2openapi::tag_rules::load_tag_rules(rules_path) + .context("failed to load tag rules")? + } + }; + + let tag_strategy = if let Some(rules_path) = &args.tag_rules { + if matches!(args.tag_strategy, TagStrategyArg::Legacy) { + mitm2openapi::tag_rules::load_tag_rules(rules_path) + .context("failed to load tag rules")? + } else { + tag_strategy + } + } else { + tag_strategy + }; + + let operation_id_overrides: std::collections::HashMap = + if let Some(overrides_path) = &args.operation_id_overrides { + mitm2openapi::operation_id::load_overrides(overrides_path) + .context("failed to load operationId overrides")? + } else { + std::collections::HashMap::new() + }; + + let operation_id_strategy = match &args.operation_id_strategy { + OperationIdStrategyArg::None => { + mitm2openapi::operation_id::OperationIdStrategy::None + } + OperationIdStrategyArg::Path => { + mitm2openapi::operation_id::OperationIdStrategy::Path + } + OperationIdStrategyArg::Template => { + let tmpl = args.operation_id_template.clone().context( + "--operation-id-template required when --operation-id-strategy=template", + )?; + mitm2openapi::operation_id::OperationIdStrategy::Template(tmpl) + } + }; + + let envelope_config = if let Some(discriminator) = args.envelope_discriminator.clone() { + let error_shape = if let Some(shape_path) = &args.envelope_error_shape { + let content = std::fs::read_to_string(shape_path).with_context(|| { + format!( + "failed to read envelope error shape from {}", + shape_path.display() + ) + })?; + let schema: openapiv3::Schema = serde_yaml_ng::from_str(&content) + .with_context(|| "failed to parse envelope error shape YAML")?; + Some(schema) + } else { + None + }; + Some(mitm2openapi::envelope::EnvelopeConfig { + discriminator_field: discriminator, + error_shape, + success_suffix: args.envelope_success_component_suffix.clone(), + }) + } else { + None + }; + let config = Config { prefix: args.prefix.clone(), openapi_title: args.openapi_title.clone(), @@ -176,8 +251,23 @@ fn run(cli: Cli) -> Result { max_examples: args.max_examples, redact_patterns: args.redact_patterns.clone(), redact_fields: args.redact_fields.clone(), + tag_strategy, + operation_id_strategy, + operation_id_overrides, + envelope_config, }; + if !config.redact_patterns.is_empty() || !config.redact_fields.is_empty() { + if let Err(e) = mitm2openapi::redact::Redactor::new( + &config.redact_patterns, + &config.redact_fields, + ) { + let msg = format!("invalid redact pattern: {e}"); + warn!(error = %e, "Invalid redact pattern"); + *report.events.parse_error.entry(msg).or_insert(0) += 1; + } + } + let mut builder = OpenApiBuilder::new(&args.prefix, &config, active_templates); let mut count = 0usize; for req_result in req_iter { diff --git a/src/operation_id.rs b/src/operation_id.rs new file mode 100644 index 0000000..c3cfbd9 --- /dev/null +++ b/src/operation_id.rs @@ -0,0 +1,342 @@ +use anyhow::Result; +use std::collections::HashMap; +use std::path::Path; + +/// Strategy for generating operationId values. +#[derive(Clone, Debug, Default)] +pub enum OperationIdStrategy { + /// Do not generate operationId. + #[default] + None, + /// Derive operationId from HTTP method + path template. + Path, + /// Use a custom template string with `{method}` and `{path}` placeholders. + Template(String), +} + +/// Derive an operationId from method + template path. +/// +/// `template_path` is the OpenAPI path template (e.g. "/api/v1/users/{id}"). +pub fn derive_operation_id( + method: &str, + template_path: &str, + strategy: &OperationIdStrategy, +) -> Option { + match strategy { + OperationIdStrategy::None => None, + OperationIdStrategy::Template(tmpl) => Some( + tmpl.replace("{method}", method) + .replace("{path}", template_path), + ), + OperationIdStrategy::Path => derive_from_path(method, template_path), + } +} + +/// Resolve collisions in a set of operations. +/// +/// Input: mutable vec of `(path, method, Option)`. +/// Collision rule: sort by `(path, method)`, first keeps name, rest get `_2`, `_3`, etc. +pub fn resolve_collisions(operations: &mut [(String, String, Option)]) { + operations.sort_by(|a, b| (&a.0, &a.1).cmp(&(&b.0, &b.1))); + + let mut seen: HashMap = HashMap::new(); + for op in operations.iter_mut() { + if let Some(ref mut id) = op.2 { + let count = seen.entry(id.clone()).or_insert(0); + *count += 1; + if *count > 1 { + *id = format!("{}_{}", id, count); + } + } + } +} + +/// Load operationId overrides from a YAML file. +/// +/// YAML format: `"METHOD /path": operationId` +pub fn load_overrides(path: &Path) -> Result> { + let content = std::fs::read_to_string(path)?; + let map: HashMap = serde_yaml_ng::from_str(&content)?; + Ok(map) +} + +fn is_param(segment: &str) -> bool { + segment.starts_with('{') && segment.ends_with('}') +} + +fn derive_from_path(method: &str, template_path: &str) -> Option { + let segments: Vec<&str> = template_path.split('/').filter(|s| !s.is_empty()).collect(); + let is_item = segments.last().is_some_and(|s| is_param(s)); + let is_collection = !is_item; + + let non_params: Vec<&str> = segments.iter().filter(|s| !is_param(s)).copied().collect(); + + if method.eq_ignore_ascii_case("POST") && is_collection && non_params.len() >= 2 { + if let Some(&last) = non_params.last() { + if !last.ends_with('s') { + let verb = last; + let noun_idx = non_params.len().checked_sub(2)?; + let noun = non_params.get(noun_idx)?; + return Some(format!("{}{}", verb, to_pascal_case(noun))); + } + } + } + + let verb = method_to_verb(method, is_collection); + let mut name_segs = path_to_name_segments(&segments); + + if method.eq_ignore_ascii_case("POST") && is_collection { + if let Some(last) = name_segs.last_mut() { + *last = singularize_pascal(last); + } + } + + Some(to_camel_case(verb, &name_segs)) +} + +fn method_to_verb(method: &str, is_collection: bool) -> &'static str { + match method.to_ascii_uppercase().as_str() { + "GET" if is_collection => "list", + "GET" => "get", + "POST" => "create", + "PUT" => "update", + "DELETE" => "delete", + "PATCH" => "patch", + _ => "handle", + } +} + +fn path_to_name_segments(segments: &[&str]) -> Vec { + let last_non_param_pos = segments.iter().rposition(|s| !is_param(s)); + let last_param_pos = segments.iter().rposition(|s| is_param(s)); + + let mut result = Vec::new(); + + if let Some(param_pos) = last_param_pos { + if let Some(before_pos) = param_pos.checked_sub(1) { + if let Some(&before) = segments.get(before_pos) { + if !is_param(before) { + let singular = singularize(before); + result.push(to_pascal_case(&singular)); + + if let Some(lnp_pos) = last_non_param_pos { + if lnp_pos != before_pos { + if let Some(&last) = segments.get(lnp_pos) { + result.push(to_pascal_case(last)); + } + } + } + return result; + } + } + } + } + + if let Some(pos) = last_non_param_pos { + if let Some(&seg) = segments.get(pos) { + result.push(to_pascal_case(seg)); + } + } + + result +} + +fn to_pascal_case(s: &str) -> String { + s.split(['_', '-']) + .filter(|part| !part.is_empty()) + .map(|part| { + let mut chars = part.chars(); + match chars.next() { + Some(c) => { + let upper: String = c.to_uppercase().collect(); + format!("{}{}", upper, chars.as_str()) + } + None => String::new(), + } + }) + .collect() +} + +fn to_camel_case(verb: &str, segments: &[String]) -> String { + let mut result = verb.to_string(); + for seg in segments { + result.push_str(seg); + } + result +} + +fn singularize(word: &str) -> String { + if word.len() > 1 && word.ends_with('s') && !word.ends_with("ss") { + word[..word.len() - 1].to_string() + } else { + word.to_string() + } +} + +fn singularize_pascal(word: &str) -> String { + if word.len() > 1 && word.ends_with('s') && !word.ends_with("ss") { + word[..word.len() - 1].to_string() + } else { + word.to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + #[test] + fn get_collection() { + let result = derive_operation_id("GET", "/api/v1/users", &OperationIdStrategy::Path); + assert_eq!(result, Some("listUsers".to_string())); + } + + #[test] + fn get_item() { + let result = derive_operation_id("GET", "/api/v1/users/{id}", &OperationIdStrategy::Path); + assert_eq!(result, Some("getUser".to_string())); + } + + #[test] + fn post() { + let result = derive_operation_id("POST", "/api/v1/users", &OperationIdStrategy::Path); + assert_eq!(result, Some("createUser".to_string())); + } + + #[test] + fn put_item() { + let result = derive_operation_id("PUT", "/api/v1/users/{id}", &OperationIdStrategy::Path); + assert_eq!(result, Some("updateUser".to_string())); + } + + #[test] + fn delete_item() { + let result = + derive_operation_id("DELETE", "/api/v1/users/{id}", &OperationIdStrategy::Path); + assert_eq!(result, Some("deleteUser".to_string())); + } + + #[test] + fn patch_item() { + let result = derive_operation_id("PATCH", "/api/v1/users/{id}", &OperationIdStrategy::Path); + assert_eq!(result, Some("patchUser".to_string())); + } + + #[test] + fn nested_resource() { + let result = derive_operation_id( + "GET", + "/api/v1/users/{id}/orders", + &OperationIdStrategy::Path, + ); + assert_eq!(result, Some("listUserOrders".to_string())); + } + + #[test] + fn deep_path() { + let result = derive_operation_id( + "GET", + "/api/v1/contract/fair_price/{symbol}", + &OperationIdStrategy::Path, + ); + assert_eq!(result, Some("getFairPrice".to_string())); + } + + #[test] + fn deep_post() { + let result = derive_operation_id( + "POST", + "/api/v1/private/order/place", + &OperationIdStrategy::Path, + ); + assert_eq!(result, Some("placeOrder".to_string())); + } + + #[test] + fn strategy_none() { + let result = derive_operation_id("GET", "/api/v1/users", &OperationIdStrategy::None); + assert_eq!(result, None); + } + + #[test] + fn collision_resolution() { + let mut ops = vec![ + ( + "/api/v1/users".to_string(), + "GET".to_string(), + Some("listUsers".to_string()), + ), + ( + "/api/v2/users".to_string(), + "GET".to_string(), + Some("listUsers".to_string()), + ), + ]; + resolve_collisions(&mut ops); + + assert_eq!(ops.first().unwrap().2, Some("listUsers".to_string())); + assert_eq!(ops.get(1).unwrap().2, Some("listUsers_2".to_string())); + + let mut ops2 = vec![ + ( + "/api/v2/users".to_string(), + "GET".to_string(), + Some("listUsers".to_string()), + ), + ( + "/api/v1/users".to_string(), + "GET".to_string(), + Some("listUsers".to_string()), + ), + ]; + resolve_collisions(&mut ops2); + assert_eq!(ops2.first().unwrap().2, Some("listUsers".to_string())); + assert_eq!(ops2.get(1).unwrap().2, Some("listUsers_2".to_string())); + } + + #[test] + fn override_wins() { + let dir = tempfile::tempdir().unwrap(); + let file_path = dir.path().join("overrides.yaml"); + let mut f = std::fs::File::create(&file_path).unwrap(); + writeln!(f, "\"GET /api/v1/users\": getAllUsers").unwrap(); + drop(f); + + let overrides = load_overrides(&file_path).unwrap(); + let key = "GET /api/v1/users"; + assert_eq!(overrides.get(key), Some(&"getAllUsers".to_string())); + } + + #[test] + fn template_strategy() { + let result = derive_operation_id( + "GET", + "/api/v1/users", + &OperationIdStrategy::Template("{method}_{path}".to_string()), + ); + assert_eq!(result, Some("GET_/api/v1/users".to_string())); + } + + #[test] + fn pascal_case_snake() { + assert_eq!(to_pascal_case("fair_price"), "FairPrice"); + } + + #[test] + fn pascal_case_simple() { + assert_eq!(to_pascal_case("users"), "Users"); + } + + #[test] + fn singularize_plural() { + assert_eq!(singularize("users"), "user"); + assert_eq!(singularize("orders"), "order"); + } + + #[test] + fn singularize_already_singular() { + assert_eq!(singularize("place"), "place"); + assert_eq!(singularize("fair_price"), "fair_price"); + } +} diff --git a/src/tag_rules.rs b/src/tag_rules.rs new file mode 100644 index 0000000..1383e2f --- /dev/null +++ b/src/tag_rules.rs @@ -0,0 +1,228 @@ +use anyhow::{Context, Result}; +use regex::Regex; +use serde::Deserialize; +use std::path::Path; + +#[derive(Debug, Clone)] +pub struct TagRule { + pub pattern: Regex, + pub tag: String, +} + +/// Strategy for assigning tags to API operations. +#[derive(Debug, Clone, Default)] +pub enum TagStrategy { + /// Default: the builder calls its own `extract_tag()` logic. + #[default] + Legacy, + /// Suppress all tags (empty `tags: []` on every operation). + None, + /// Extract the path segment at the given 0-based index. + /// Segments are split by `/` with empty segments removed. + /// Index 0 = first segment after the leading `/`. + PathSegment { index: usize }, + /// First-match-wins regex rules with an optional default tag. + Rules { + rules: Vec, + default: Option, + }, +} + +#[derive(Deserialize)] +struct RawTagRule { + #[serde(rename = "match")] + match_pattern: String, + tag: String, +} + +#[derive(Deserialize)] +struct RawTagRules { + rules: Vec, + default: Option, +} + +/// Load tag rules from a YAML file. +/// Returns `Err` if the file can't be read or any regex is invalid. +pub fn load_tag_rules(path: &Path) -> Result { + let content = std::fs::read_to_string(path) + .with_context(|| format!("failed to read tag rules from {}", path.display()))?; + let raw: RawTagRules = serde_yaml_ng::from_str(&content) + .with_context(|| format!("failed to parse tag rules YAML from {}", path.display()))?; + let rules = raw + .rules + .into_iter() + .map(|r| { + let pattern = Regex::new(&r.match_pattern) + .with_context(|| format!("invalid regex in tag rule: {}", r.match_pattern))?; + Ok(TagRule { + pattern, + tag: r.tag, + }) + }) + .collect::>>()?; + Ok(TagStrategy::Rules { + rules, + default: raw.default, + }) +} + +/// Apply the strategy to a URL path, returning the resolved tag or `None`. +/// For `Legacy` and `None` strategies, always returns `None` — the builder +/// handles these cases directly. +pub fn resolve_tag(strategy: &TagStrategy, path: &str) -> Option { + match strategy { + TagStrategy::Legacy | TagStrategy::None => Option::None, + TagStrategy::PathSegment { index } => { + let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + segments.get(*index).map(|s| (*s).to_string()) + } + TagStrategy::Rules { rules, default } => { + for rule in rules { + if rule.pattern.is_match(path) { + return Some(rule.tag.clone()); + } + } + default.clone() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_rules(patterns: &[(&str, &str)], default: Option<&str>) -> TagStrategy { + TagStrategy::Rules { + rules: patterns + .iter() + .map(|(pat, tag)| TagRule { + pattern: Regex::new(pat).unwrap(), + tag: (*tag).to_string(), + }) + .collect(), + default: default.map(String::from), + } + } + + #[test] + fn match_first_wins() { + let strategy = make_rules( + &[ + ("^/api/v1/contract/", "Contract"), + ("^/api/v1/private/order", "Order"), + ], + Option::None, + ); + assert_eq!( + resolve_tag(&strategy, "/api/v1/contract/ticker"), + Some("Contract".to_string()), + ); + } + + #[test] + fn no_match_with_default() { + let strategy = make_rules(&[("^/api/v1/contract/", "Contract")], Some("Default")); + assert_eq!( + resolve_tag(&strategy, "/api/v1/other"), + Some("Default".to_string()), + ); + } + + #[test] + fn no_match_no_default() { + let strategy = make_rules(&[("^/api/v1/contract/", "Contract")], Option::None); + assert_eq!(resolve_tag(&strategy, "/api/v1/other"), Option::None); + } + + #[test] + fn regex_capture_groups() { + let strategy = make_rules(&[("^/api/v1/(private/)?account", "Account")], Option::None); + assert_eq!( + resolve_tag(&strategy, "/api/v1/account"), + Some("Account".to_string()), + ); + assert_eq!( + resolve_tag(&strategy, "/api/v1/private/account"), + Some("Account".to_string()), + ); + } + + #[test] + fn empty_rules() { + let with_default = make_rules(&[], Some("Fallback")); + assert_eq!( + resolve_tag(&with_default, "/anything"), + Some("Fallback".to_string()), + ); + + let without_default = make_rules(&[], Option::None); + assert_eq!(resolve_tag(&without_default, "/anything"), Option::None); + } + + #[test] + fn path_segment_strategy() { + let idx0 = TagStrategy::PathSegment { index: 0 }; + assert_eq!( + resolve_tag(&idx0, "/api/v1/contract/ticker"), + Some("api".to_string()), + ); + + let idx2 = TagStrategy::PathSegment { index: 2 }; + assert_eq!( + resolve_tag(&idx2, "/api/v1/contract/ticker"), + Some("contract".to_string()), + ); + } + + #[test] + fn path_segment_out_of_bounds() { + let strategy = TagStrategy::PathSegment { index: 10 }; + assert_eq!( + resolve_tag(&strategy, "/api/v1/contract/ticker"), + Option::None, + ); + } + + #[test] + fn load_tag_rules_from_yaml() { + let yaml = "\ +rules: + - match: \"^/api/v1/contract/\" + tag: Contract + - match: \"^/api/v1/private/order\" + tag: Order +default: Default +"; + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("tags.yaml"); + std::fs::write(&path, yaml).unwrap(); + + let strategy = load_tag_rules(&path).unwrap(); + assert_eq!( + resolve_tag(&strategy, "/api/v1/contract/ticker"), + Some("Contract".to_string()), + ); + assert_eq!( + resolve_tag(&strategy, "/api/v1/private/order/123"), + Some("Order".to_string()), + ); + assert_eq!( + resolve_tag(&strategy, "/api/v1/other"), + Some("Default".to_string()), + ); + } + + #[test] + fn load_tag_rules_invalid_regex() { + let yaml = "\ +rules: + - match: \"[invalid\" + tag: Bad +"; + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("tags.yaml"); + std::fs::write(&path, yaml).unwrap(); + + assert!(load_tag_rules(&path).is_err()); + } +} diff --git a/src/types.rs b/src/types.rs index 6ca62a8..a3ce422 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,3 +1,8 @@ +use crate::envelope::EnvelopeConfig; +use crate::operation_id::OperationIdStrategy; +use crate::tag_rules::TagStrategy; +use std::collections::HashMap; + /// Unified interface for captured HTTP requests from different sources (HAR, mitmproxy). pub trait CapturedRequest { fn get_url(&self) -> &str; @@ -12,7 +17,7 @@ pub trait CapturedRequest { } /// Configuration for OpenAPI generation, derived from CLI arguments. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct Config { pub prefix: String, pub openapi_title: Option, @@ -27,4 +32,8 @@ pub struct Config { pub max_examples: usize, pub redact_patterns: Vec, pub redact_fields: Vec, + pub tag_strategy: TagStrategy, + pub operation_id_strategy: OperationIdStrategy, + pub operation_id_overrides: HashMap, + pub envelope_config: Option, } diff --git a/tests/e2e_features.rs b/tests/e2e_features.rs new file mode 100644 index 0000000..3cb757c --- /dev/null +++ b/tests/e2e_features.rs @@ -0,0 +1,109 @@ +use assert_cmd::Command; +use tempfile::TempDir; + +fn fixture(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(name) +} + +const PREFIX: &str = "https://api.example.com"; + +#[test] +fn e2e_all_features() { + let dir = TempDir::new().unwrap(); + let output = dir.path().join("openapi.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + fixture("envelope_test.har").to_str().unwrap(), + "-t", + fixture("envelope_templates.yaml").to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + "--operation-id-strategy", + "path", + "--tag-rules", + fixture("tag-rules-test.yaml").to_str().unwrap(), + "--envelope-discriminator", + "success", + ]) + .assert() + .success(); + + let content = std::fs::read_to_string(&output).unwrap(); + + // 1. Every operation should have operationId (--operation-id-strategy path) + assert!( + content.contains("operationId:"), + "expected operationId in output:\n{content}" + ); + + // 2. Tags from rules: Contract and Private + assert!( + content.contains("- Contract"), + "expected 'Contract' tag:\n{content}" + ); + assert!( + content.contains("- Private"), + "expected 'Private' tag:\n{content}" + ); + + // 3. ApiError schema in components (from envelope detection) + assert!( + content.contains("ApiError:"), + "expected ApiError in components:\n{content}" + ); + + // 4. oneOf present (ticker + fair_price both had mixed bodies) + assert!( + content.contains("oneOf:"), + "expected oneOf schema:\n{content}" + ); + + // 5. Output is valid YAML + let parsed: serde_yaml_ng::Value = + serde_yaml_ng::from_str(&content).expect("generated spec must be valid YAML"); + assert!(parsed.get("openapi").is_some()); + assert!(parsed.get("paths").is_some()); + assert!(parsed.get("components").is_some()); +} + +#[test] +fn e2e_no_flags_backward_compat() { + // Without new flags: no operationId, no components + let dir = TempDir::new().unwrap(); + let output = dir.path().join("openapi.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + fixture("envelope_test.har").to_str().unwrap(), + "-t", + fixture("envelope_templates.yaml").to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + ]) + .assert() + .success(); + + let content = std::fs::read_to_string(&output).unwrap(); + assert!( + !content.contains("operationId:"), + "operationId should NOT appear without --operation-id-strategy:\n{content}" + ); + assert!( + !content.contains("components:"), + "components should NOT appear without --envelope-discriminator:\n{content}" + ); +} diff --git a/tests/expected/snapshot_baseline.yaml b/tests/expected/snapshot_baseline.yaml new file mode 100644 index 0000000..673a280 --- /dev/null +++ b/tests/expected/snapshot_baseline.yaml @@ -0,0 +1,98 @@ +openapi: '3.0.3' +info: + title: api.example.com API + version: 1.0.0 +servers: +- url: https://api.example.com +paths: + /api/v1/users: + get: + tags: + - api + summary: GET /api/v1/users + responses: + '200': + description: OK + content: + application/json: + schema: + type: object + properties: + items: + type: array + items: + type: object + properties: + id: + type: integer + name: + type: string + examples: + response_1: + value: + items: + - id: 1 + name: Alice + post: + tags: + - api + summary: POST /api/v1/users + requestBody: + content: + application/json: + schema: + type: object + properties: + name: + type: string + examples: + Bob: + value: + name: Bob + required: true + responses: + '201': + description: Created + content: + application/json: + schema: + type: object + properties: + id: + type: integer + name: + type: string + examples: + Bob: + value: + id: 2 + name: Bob + /api/v1/users/{id}: + get: + tags: + - api + summary: GET /api/v1/users/{id} + parameters: + - in: path + name: id + required: true + schema: + type: string + style: simple + responses: + '200': + description: OK + content: + application/json: + schema: + type: object + properties: + id: + type: integer + name: + type: string + examples: + Charlie: + value: + id: 42 + name: Charlie diff --git a/tests/fixtures/envelope_templates.yaml b/tests/fixtures/envelope_templates.yaml new file mode 100644 index 0000000..b6f868e --- /dev/null +++ b/tests/fixtures/envelope_templates.yaml @@ -0,0 +1,4 @@ +x-path-templates: + - /api/v1/contract/ticker + - /api/v1/contract/fair_price/{symbol} + - /api/v1/private/order/place diff --git a/tests/fixtures/envelope_test.har b/tests/fixtures/envelope_test.har new file mode 100644 index 0000000..a721c1d --- /dev/null +++ b/tests/fixtures/envelope_test.har @@ -0,0 +1,7 @@ +{"log":{"version":"1.2","creator":{"name":"test","version":"1.0"},"entries":[ + {"startedDateTime":"2025-01-20T10:00:00.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/contract/ticker","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":60,"mimeType":"application/json","text":"{\"success\": true, \"data\": {\"price\": 42000.5}}"},"redirectURL":"","headersSize":-1,"bodySize":60},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-20T10:00:01.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/contract/ticker","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":55,"mimeType":"application/json","text":"{\"success\": false, \"code\": 1, \"message\": \"error\"}"},"redirectURL":"","headersSize":-1,"bodySize":55},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-20T10:00:02.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/contract/fair_price/BTC","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":65,"mimeType":"application/json","text":"{\"success\": true, \"data\": {\"fairPrice\": 42100.0}}"},"redirectURL":"","headersSize":-1,"bodySize":65},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-20T10:00:03.000Z","time":50,"request":{"method":"GET","url":"https://api.example.com/api/v1/contract/fair_price/ETH","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":60,"mimeType":"application/json","text":"{\"success\": false, \"code\": 2, \"message\": \"symbol not found\"}"},"redirectURL":"","headersSize":-1,"bodySize":60},"cache":{},"timings":{"send":1,"wait":40,"receive":9}}, + {"startedDateTime":"2025-01-20T10:00:04.000Z","time":80,"request":{"method":"POST","url":"https://api.example.com/api/v1/private/order/place","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"queryString":[],"headersSize":-1,"bodySize":40,"postData":{"mimeType":"application/json","text":"{\"symbol\": \"BTC\", \"amount\": 0.1}"}},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":50,"mimeType":"application/json","text":"{\"success\": true, \"orderId\": \"ord_001\"}"},"redirectURL":"","headersSize":-1,"bodySize":50},"cache":{},"timings":{"send":1,"wait":70,"receive":9}} +]}} \ No newline at end of file diff --git a/tests/fixtures/snapshot_input.har b/tests/fixtures/snapshot_input.har new file mode 100644 index 0000000..26ddd76 --- /dev/null +++ b/tests/fixtures/snapshot_input.har @@ -0,0 +1,5 @@ +{"log":{"version":"1.2","creator":{"name":"test","version":"1.0"},"entries":[ + {"startedDateTime":"2025-01-15T10:00:00.000Z","time":100,"request":{"method":"GET","url":"https://api.example.com/api/v1/users","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":50,"mimeType":"application/json","text":"{\"items\": [{\"id\": 1, \"name\": \"Alice\"}]}"},"redirectURL":"","headersSize":-1,"bodySize":50},"cache":{},"timings":{"send":1,"wait":90,"receive":9}}, + {"startedDateTime":"2025-01-15T10:00:01.000Z","time":100,"request":{"method":"POST","url":"https://api.example.com/api/v1/users","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"queryString":[],"headersSize":-1,"bodySize":30,"postData":{"mimeType":"application/json","text":"{\"name\": \"Bob\"}"}},"response":{"status":201,"statusText":"Created","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":30,"mimeType":"application/json","text":"{\"id\": 2, \"name\": \"Bob\"}"},"redirectURL":"","headersSize":-1,"bodySize":30},"cache":{},"timings":{"send":1,"wait":90,"receive":9}}, + {"startedDateTime":"2025-01-15T10:00:02.000Z","time":100,"request":{"method":"GET","url":"https://api.example.com/api/v1/users/42","httpVersion":"HTTP/1.1","cookies":[],"headers":[],"queryString":[],"headersSize":-1,"bodySize":0},"response":{"status":200,"statusText":"OK","httpVersion":"HTTP/1.1","cookies":[],"headers":[{"name":"Content-Type","value":"application/json"}],"content":{"size":35,"mimeType":"application/json","text":"{\"id\": 42, \"name\": \"Charlie\"}"},"redirectURL":"","headersSize":-1,"bodySize":35},"cache":{},"timings":{"send":1,"wait":90,"receive":9}} +]}} diff --git a/tests/fixtures/snapshot_templates.yaml b/tests/fixtures/snapshot_templates.yaml new file mode 100644 index 0000000..770db64 --- /dev/null +++ b/tests/fixtures/snapshot_templates.yaml @@ -0,0 +1,3 @@ +x-path-templates: + - /api/v1/users + - /api/v1/users/{id} diff --git a/tests/fixtures/tag-rules-test.yaml b/tests/fixtures/tag-rules-test.yaml new file mode 100644 index 0000000..67543b2 --- /dev/null +++ b/tests/fixtures/tag-rules-test.yaml @@ -0,0 +1,6 @@ +rules: + - match: "^/api/v1/contract/" + tag: Contract + - match: "^/api/v1/private/" + tag: Private +default: Default diff --git a/tests/merge_responses.rs b/tests/merge_responses.rs index 31418f8..f08364d 100644 --- a/tests/merge_responses.rs +++ b/tests/merge_responses.rs @@ -80,18 +80,9 @@ impl CapturedRequest for MockRequest { fn test_config() -> Config { Config { prefix: "https://api.example.com".to_string(), - openapi_title: None, openapi_version: "1.0.0".to_string(), - exclude_headers: vec![], - exclude_cookies: vec![], - include_headers: false, - ignore_images: false, - suppress_params: false, - tags_overrides: None, - skip_options: false, max_examples: 5, - redact_patterns: vec![], - redact_fields: vec![], + ..Default::default() } } diff --git a/tests/redact_patterns.rs b/tests/redact_patterns.rs new file mode 100644 index 0000000..d7d6a55 --- /dev/null +++ b/tests/redact_patterns.rs @@ -0,0 +1,72 @@ +use assert_cmd::Command; +use tempfile::TempDir; + +fn fixture(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(name) +} + +const PREFIX: &str = "https://api.example.com"; + +#[test] +fn redact_pattern_with_quantifier_comma_is_not_split() { + let dir = TempDir::new().unwrap(); + let output = dir.path().join("openapi.yaml"); + + // {8,64} quantifier contains a comma — clap must not split on it + let cmd = Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + fixture("snapshot_input.har").to_str().unwrap(), + "-t", + fixture("snapshot_templates.yaml").to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + "--redact-patterns", + "TOKEN[a-f0-9]{8,64}", + "--strict", + ]) + .output() + .unwrap(); + + let stderr = String::from_utf8_lossy(&cmd.stderr); + assert!( + !stderr.contains("unclosed counted repetition"), + "regex was truncated by clap comma-split:\n{stderr}" + ); + assert!( + cmd.status.success(), + "should succeed — pattern is valid:\n{stderr}" + ); +} + +#[test] +fn invalid_redact_pattern_fails_under_strict() { + let dir = TempDir::new().unwrap(); + let output = dir.path().join("openapi.yaml"); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + fixture("snapshot_input.har").to_str().unwrap(), + "-t", + fixture("snapshot_templates.yaml").to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + "--redact-patterns", + "[unclosed", + "--strict", + ]) + .assert() + .failure(); +} diff --git a/tests/snapshot_compat.rs b/tests/snapshot_compat.rs new file mode 100644 index 0000000..92f05a6 --- /dev/null +++ b/tests/snapshot_compat.rs @@ -0,0 +1,91 @@ +use assert_cmd::Command; +use tempfile::TempDir; + +fn har_fixture(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(name) +} + +fn expected_file(name: &str) -> std::path::PathBuf { + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("expected") + .join(name) +} + +const PREFIX: &str = "https://api.example.com"; + +#[test] +fn snapshot_compat() { + let dir = TempDir::new().unwrap(); + let output = dir.path().join("snapshot_output.yaml"); + + std::fs::create_dir_all( + std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("expected"), + ) + .unwrap(); + + Command::cargo_bin("mitm2openapi") + .unwrap() + .args([ + "generate", + "-i", + har_fixture("snapshot_input.har").to_str().unwrap(), + "-t", + har_fixture("snapshot_templates.yaml").to_str().unwrap(), + "-o", + output.to_str().unwrap(), + "-p", + PREFIX, + ]) + .assert() + .success(); + + let actual = std::fs::read_to_string(&output).unwrap(); + let baseline_path = expected_file("snapshot_baseline.yaml"); + let expected = std::fs::read_to_string(&baseline_path).unwrap_or_else(|e| { + panic!( + "Could not read baseline file {}: {}", + baseline_path.display(), + e + ) + }); + + // Normalize line endings for cross-platform comparison (CRLF → LF on Windows CI) + let actual = actual.replace("\r\n", "\n"); + let expected = expected.replace("\r\n", "\n"); + + if actual != expected { + let actual_lines: Vec<&str> = actual.lines().collect(); + let expected_lines: Vec<&str> = expected.lines().collect(); + let max_lines = actual_lines.len().max(expected_lines.len()); + + eprintln!("=== SNAPSHOT DIFF (expected vs actual) ==="); + for i in 0..max_lines { + let exp_line = expected_lines.get(i).copied().unwrap_or(""); + let act_line = actual_lines.get(i).copied().unwrap_or(""); + if exp_line != act_line { + eprintln!("Line {:>4}: expected: {:?}", i + 1, exp_line); + eprintln!(" actual: {:?}", act_line); + } + } + eprintln!("=== END DIFF ==="); + panic!( + "Snapshot mismatch: generate output differs from baseline.\n\ + If this change is intentional, regenerate the baseline with:\n\ + cargo run -- generate -i tests/fixtures/snapshot_input.har \\\n\ + -t tests/fixtures/snapshot_templates.yaml \\\n\ + -o tests/expected/snapshot_baseline.yaml \\\n\ + -p https://api.example.com" + ); + } + + println!( + "Snapshot test passed: output matches baseline ({} bytes)", + actual.len() + ); +}