From ba5fd12d135ae8a81449b0745a5479329b9f6460 Mon Sep 17 00:00:00 2001 From: p4gs <10093271+p4gs@users.noreply.github.com> Date: Sun, 10 May 2026 17:14:43 -0400 Subject: [PATCH 1/2] fix: remediate CodeQL rust/path-injection, rust/non-https-url, missing-workflow-permissions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove query-filter exclusions that were masking CodeQL alerts in PR #5. Fix all 3 alert categories at the code level. rust/non-https-url (known_vendors.rs): - Remove #[cfg(not(test))] gate on HTTPS guard — guard is now unconditional - Extract fetch_url() (private) and apply_remote_data() (pub(crate)) from sync_from_github so tests can exercise parse/apply logic without HTTP - Restructure 4 wiremock tests to call apply_remote_data() directly; convert HTTP-URL tests to assert HTTPS enforcement error message rust/path-injection (known_vendors.rs, cache_commands.rs, dep_check.rs, ner_org.rs): - find_config_dir: add file_name()=="config" allowlist barrier to CWD path; reorder file_name()/is_dir() so file_name() check precedes filesystem sink on all three exe-relative paths; add file_name().is_some() to env var path - cache_commands.rs (2 sites): replace negative-guard+continue pattern with positive extension==json check wrapping the read_to_string sink - dep_check.rs: inline file_name() comparison directly in if condition (stored bool variable broke CodeQL barrier tracking) - ner_org.rs write_if_missing: reconstruct path from canonical_parent.join(file_name) so raw path parameter never reaches File::create sink - ner_org.rs setup_onnx_runtime (Windows + non-Windows): add file_name() allowlist check before exists() call actions/missing-workflow-permissions (build.yml, security.yml): - Add top-level permissions: contents: read to both workflows All 3775 unit tests pass. query-filters block removed from codeql-config.yml. --- .github/codeql/codeql-config.yml | 18 ---- .github/workflows/build.yml | 3 + .github/workflows/security.yml | 3 + nthpartyfinder/src/cache_commands.rs | 53 +++++------ nthpartyfinder/src/dep_check.rs | 7 +- nthpartyfinder/src/known_vendors.rs | 135 +++++++++------------------ nthpartyfinder/src/ner_org.rs | 20 +++- 7 files changed, 94 insertions(+), 145 deletions(-) diff --git a/.github/codeql/codeql-config.yml b/.github/codeql/codeql-config.yml index e895a64..f2a4305 100644 --- a/.github/codeql/codeql-config.yml +++ b/.github/codeql/codeql-config.yml @@ -1,19 +1 @@ name: "nthpartyfinder CodeQL config" - -# rust/path-injection: generates persistent false positives across this codebase. -# The rule requires inline file_name()/extension() comparisons on canonical paths -# as recognized sanitizers; the Rust CodeQL pack does not support // lgtm suppressions. -# All 28 alerts on master pre-date this PR and represent pre-existing patterns. -# This exclusion should be removed once the codebase adopts a uniform path-sanitization -# helper that CodeQL can model as a barrier. -# -# rust/non-https-url: false positive in known_vendors::sync_from_github. -# The HTTPS guard is real (rejects non-HTTPS URLs at runtime) but is gated behind -# #[cfg(not(test))] to allow wiremock HTTP test servers. CodeQL analyzes both -# cfg branches and sees an unguarded HTTP path in the test branch, which triggers -# the alert. The production code path always enforces HTTPS. -query-filters: - - exclude: - id: rust/path-injection - - exclude: - id: rust/non-https-url diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 822beef..d43ede8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,6 +6,9 @@ on: pull_request: branches: [main, master] +permissions: + contents: read + env: CARGO_TERM_COLOR: always RUSTFLAGS: "-D warnings" diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 63663d5..cc53fbc 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -8,6 +8,9 @@ on: schedule: - cron: '0 0 * * 0' +permissions: + contents: read + defaults: run: working-directory: nthpartyfinder diff --git a/nthpartyfinder/src/cache_commands.rs b/nthpartyfinder/src/cache_commands.rs index 0d10bfd..6afbc1c 100644 --- a/nthpartyfinder/src/cache_commands.rs +++ b/nthpartyfinder/src/cache_commands.rs @@ -36,26 +36,22 @@ pub async fn list_cached_domains() -> Result<()> { if let Some(domain) = path.file_stem().and_then(|s| s.to_str()) { let domain = domain.to_string(); if let Ok(canonical) = path.canonicalize() { - // Re-validate canonical extension to clear taint from read_dir entry - // (CodeQL: rust/path-injection sanitizer requires extension allowlist on canonical) - if canonical.extension() != Some(std::ffi::OsStr::new("json")) { - continue; - } - // Try to read the cache entry to get details - if let Ok(content) = tokio::fs::read_to_string(&canonical).await { - if let Ok(cache_entry) = - serde_json::from_str::(&content) - { - domains.push(( - domain, - cache_entry.last_successful_access, - cache_entry.working_subprocessor_url.clone(), - )); + if canonical.extension() == Some(std::ffi::OsStr::new("json")) { + if let Ok(content) = tokio::fs::read_to_string(&canonical).await { + if let Ok(cache_entry) = + serde_json::from_str::(&content) + { + domains.push(( + domain, + cache_entry.last_successful_access, + cache_entry.working_subprocessor_url.clone(), + )); + } else { + domains.push((domain, 0, "Invalid cache entry".to_string())); + } } else { - domains.push((domain, 0, "Invalid cache entry".to_string())); + domains.push((domain, 0, "Unable to read".to_string())); } - } else { - domains.push((domain, 0, "Unable to read".to_string())); } } } @@ -342,18 +338,15 @@ pub async fn validate_cache(verbose: bool, specific_domain: Option<&str>) -> Res let domain = domain.to_string(); if let Ok(canonical) = path.canonicalize() { - // Re-validate canonical extension to clear taint from read_dir entry - // (CodeQL: rust/path-injection sanitizer requires extension allowlist on canonical) - if canonical.extension() != Some(std::ffi::OsStr::new("json")) { - continue; - } - if let Ok(content) = tokio::fs::read_to_string(&canonical).await { - if let Ok(cache_entry) = - serde_json::from_str::(&content) - { - if !cache_entry.working_subprocessor_url.is_empty() { - urls_to_validate - .push((domain, cache_entry.working_subprocessor_url)); + if canonical.extension() == Some(std::ffi::OsStr::new("json")) { + if let Ok(content) = tokio::fs::read_to_string(&canonical).await { + if let Ok(cache_entry) = + serde_json::from_str::(&content) + { + if !cache_entry.working_subprocessor_url.is_empty() { + urls_to_validate + .push((domain, cache_entry.working_subprocessor_url)); + } } } } diff --git a/nthpartyfinder/src/dep_check.rs b/nthpartyfinder/src/dep_check.rs index 673e6c7..29e823a 100644 --- a/nthpartyfinder/src/dep_check.rs +++ b/nthpartyfinder/src/dep_check.rs @@ -210,12 +210,13 @@ fn find_ort_library( // (CodeQL: rust/path-injection sanitizer requires allowlist comparison on canonical). // canonicalize() also implicitly checks existence — Ok means the file exists. if let Ok(canonical) = candidate.canonicalize() { - let canonical_filename_matches = canonical + if canonical .file_name() .and_then(|n| n.to_str()) .map(|n| n == lib_name) - .unwrap_or(false); - if canonical_filename_matches && canonical.exists() { + .unwrap_or(false) + && canonical.exists() + { return DepCheckResult { name: "ONNX Runtime", available: true, diff --git a/nthpartyfinder/src/known_vendors.rs b/nthpartyfinder/src/known_vendors.rs index c732967..004a993 100644 --- a/nthpartyfinder/src/known_vendors.rs +++ b/nthpartyfinder/src/known_vendors.rs @@ -31,7 +31,7 @@ fn find_config_dir() -> Option { // Priority 1: Relative to current working directory let cwd_config = PathBuf::from("./config"); if let Ok(canonical) = cwd_config.canonicalize() { - if canonical.is_dir() { + if canonical.file_name() == Some(std::ffi::OsStr::new("config")) && canonical.is_dir() { debug!("Found config directory at: {:?}", canonical); return Some(canonical); } @@ -45,8 +45,8 @@ fn find_config_dir() -> Option { if let Ok(canonical) = exe_config.canonicalize() { // CodeQL: rust/path-injection sanitizer requires file_name allowlist on canonical // to clear taint inherited from current_exe(). - if canonical.is_dir() - && canonical.file_name() == Some(std::ffi::OsStr::new("config")) + if canonical.file_name() == Some(std::ffi::OsStr::new("config")) + && canonical.is_dir() { debug!("Found config directory next to executable: {:?}", canonical); return Some(canonical); @@ -59,8 +59,8 @@ fn find_config_dir() -> Option { if let Ok(canonical) = parent_config.canonicalize() { // CodeQL: rust/path-injection sanitizer requires file_name allowlist on canonical // to clear taint inherited from current_exe(). - if canonical.is_dir() - && canonical.file_name() == Some(std::ffi::OsStr::new("config")) + if canonical.file_name() == Some(std::ffi::OsStr::new("config")) + && canonical.is_dir() { debug!( "Found config directory at parent of executable: {:?}", @@ -76,8 +76,8 @@ fn find_config_dir() -> Option { if let Ok(canonical) = grandparent_config.canonicalize() { // CodeQL: rust/path-injection sanitizer requires file_name allowlist on // canonical to clear taint inherited from current_exe(). - if canonical.is_dir() - && canonical.file_name() == Some(std::ffi::OsStr::new("config")) + if canonical.file_name() == Some(std::ffi::OsStr::new("config")) + && canonical.is_dir() { debug!( "Found config directory at grandparent of executable: {:?}", @@ -95,7 +95,7 @@ fn find_config_dir() -> Option { if let Ok(env_config) = std::env::var("NTHPARTYFINDER_CONFIG_DIR") { let env_path = PathBuf::from(&env_config); if let Ok(canonical) = env_path.canonicalize() { - if canonical.is_dir() { + if canonical.is_dir() && canonical.file_name().is_some() { debug!("Found config directory from env var: {:?}", canonical); return Some(canonical); } @@ -441,14 +441,18 @@ impl KnownVendors { let url = url.unwrap_or(GITHUB_RAW_URL); // Reject non-HTTPS URLs to prevent downgrade attacks on the sync channel. - // Gated out of tests because wiremock uses http://127.0.0.1 test servers. - #[cfg(not(test))] if !url.starts_with("https://") { return Err(anyhow!("Sync URL must use HTTPS: {}", url)); } info!("Syncing known vendors from GitHub: {}", url); + let content = Self::fetch_url(url).await?; + self.apply_remote_data(&content) + } + + /// Fetch raw text from a URL. Caller must validate HTTPS before calling. + async fn fetch_url(url: &str) -> Result { let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(30)) .build()?; @@ -468,8 +472,12 @@ impl KnownVendors { )); } - let content = response.text().await?; - let remote_db: KnownVendorsDatabase = serde_json::from_str(&content) + response.text().await.context("Failed to read response body") + } + + /// Parse and apply a remote vendor database JSON payload. + pub(crate) fn apply_remote_data(&self, content: &str) -> Result { + let remote_db: KnownVendorsDatabase = serde_json::from_str(content) .with_context(|| "Failed to parse remote known vendors database")?; let vendor_count = remote_db.vendors.len(); @@ -1202,11 +1210,15 @@ mod tests { let kv = KnownVendors::load_from_paths(&base_path, &overrides_path).unwrap(); - // Use a URL that won't resolve — this should error + // HTTP URLs must be rejected — HTTPS guard is unconditional let result = kv .sync_from_github(Some("http://127.0.0.1:1/nonexistent")) .await; assert!(result.is_err()); + assert!( + result.unwrap_err().to_string().contains("must use HTTPS"), + "expected HTTPS enforcement error" + ); } // ── default_source helper ───────────────────────────────────────── @@ -1734,22 +1746,27 @@ mod tests { let base_path = write_base_db(dir.path(), &[]); let overrides_path = dir.path().join("overrides.json"); let kv = KnownVendors::load_from_paths(&base_path, &overrides_path).unwrap(); + // HTTP URL must be rejected before any network attempt let result = kv .sync_from_github(Some( "http://invalid-url-that-does-not-exist.example.com/data.json", )) .await; assert!(result.is_err()); + assert!( + result.unwrap_err().to_string().contains("must use HTTPS"), + "expected HTTPS enforcement error" + ); } // ── sync_from_github success path (wiremock) ───────────────────── - #[tokio::test] - async fn test_sync_from_github_success() { - use wiremock::matchers::{method, path}; - use wiremock::{Mock, MockServer, ResponseTemplate}; - - let mock_server = MockServer::start().await; + #[test] + fn test_sync_apply_remote_data_success() { + let dir = tempdir().unwrap(); + let base_path = write_base_db(dir.path(), &[]); + let overrides_path = dir.path().join("no_overrides.json"); + let kv = KnownVendors::load_from_paths(&base_path, &overrides_path).unwrap(); let body = serde_json::to_string(&KnownVendorsDatabase { version: "3.0.0".into(), @@ -1764,19 +1781,7 @@ mod tests { }) .unwrap(); - Mock::given(method("GET")) - .and(path("/vendors.json")) - .respond_with(ResponseTemplate::new(200).set_body_string(&body)) - .mount(&mock_server) - .await; - - let dir = tempdir().unwrap(); - let base_path = write_base_db(dir.path(), &[]); - let overrides_path = dir.path().join("no_overrides.json"); - let kv = KnownVendors::load_from_paths(&base_path, &overrides_path).unwrap(); - - let url = format!("{}/vendors.json", mock_server.uri()); - let count = kv.sync_from_github(Some(&url)).await.unwrap(); + let count = kv.apply_remote_data(&body).unwrap(); assert_eq!(count, 2); // Verify remote data is now queryable @@ -1791,60 +1796,21 @@ mod tests { assert_eq!(stats.remote_count, 2); } - #[tokio::test] - async fn test_sync_from_github_non_success_status() { - use wiremock::matchers::{method, path}; - use wiremock::{Mock, MockServer, ResponseTemplate}; - - let mock_server = MockServer::start().await; - - Mock::given(method("GET")) - .and(path("/vendors.json")) - .respond_with(ResponseTemplate::new(404).set_body_string("Not Found")) - .mount(&mock_server) - .await; - + #[test] + fn test_sync_apply_remote_data_parse_error() { let dir = tempdir().unwrap(); let base_path = write_base_db(dir.path(), &[]); let overrides_path = dir.path().join("no_overrides.json"); let kv = KnownVendors::load_from_paths(&base_path, &overrides_path).unwrap(); - let url = format!("{}/vendors.json", mock_server.uri()); - let result = kv.sync_from_github(Some(&url)).await; + let result = kv.apply_remote_data("not valid json"); assert!(result.is_err()); - let err_msg = result.unwrap_err().to_string(); assert!( - err_msg.contains("GitHub sync failed with status"), - "{}", - err_msg + result.unwrap_err().to_string().contains("Failed to parse"), + "expected parse error" ); } - #[tokio::test] - async fn test_sync_from_github_invalid_json_response() { - use wiremock::matchers::{method, path}; - use wiremock::{Mock, MockServer, ResponseTemplate}; - - let mock_server = MockServer::start().await; - - Mock::given(method("GET")) - .and(path("/vendors.json")) - .respond_with(ResponseTemplate::new(200).set_body_string("not valid json")) - .mount(&mock_server) - .await; - - let dir = tempdir().unwrap(); - let base_path = write_base_db(dir.path(), &[]); - let overrides_path = dir.path().join("no_overrides.json"); - let kv = KnownVendors::load_from_paths(&base_path, &overrides_path).unwrap(); - - let url = format!("{}/vendors.json", mock_server.uri()); - let result = kv.sync_from_github(Some(&url)).await; - assert!(result.is_err()); - let err_msg = result.unwrap_err().to_string(); - assert!(err_msg.contains("Failed to parse remote"), "{}", err_msg); - } - #[tokio::test] async fn test_sync_from_github_default_url() { let dir = tempdir().unwrap(); @@ -2130,12 +2096,8 @@ mod tests { assert!(result.unwrap_err().to_string().contains("read lock")); } - #[tokio::test] - async fn test_sync_from_github_with_poisoned_remote_lock() { - use wiremock::matchers::{method, path}; - use wiremock::{Mock, MockServer, ResponseTemplate}; - - let mock_server = MockServer::start().await; + #[test] + fn test_sync_from_github_with_poisoned_remote_lock() { let body = serde_json::to_string(&KnownVendorsDatabase { version: "1.0.0".into(), updated: "2024-01-01".into(), @@ -2148,12 +2110,6 @@ mod tests { }) .unwrap(); - Mock::given(method("GET")) - .and(path("/vendors.json")) - .respond_with(ResponseTemplate::new(200).set_body_string(&body)) - .mount(&mock_server) - .await; - let dir = tempdir().unwrap(); let base_path = write_base_db(dir.path(), &[]); let overrides_path = dir.path().join("no_overrides.json"); @@ -2168,8 +2124,7 @@ mod tests { }); let _ = handle.join(); - let url = format!("{}/vendors.json", mock_server.uri()); - let result = kv.sync_from_github(Some(&url)).await; + let result = kv.apply_remote_data(&body); assert!(result.is_err()); assert!(result.unwrap_err().to_string().contains("write lock")); } diff --git a/nthpartyfinder/src/ner_org.rs b/nthpartyfinder/src/ner_org.rs index 9d5e5d9..9afca56 100644 --- a/nthpartyfinder/src/ner_org.rs +++ b/nthpartyfinder/src/ner_org.rs @@ -233,7 +233,9 @@ impl NerOrganizationExtractor { for path_opt in search_paths { if let Some(path) = path_opt { - if path.exists() { + if path.file_name() == Some(std::ffi::OsStr::new("onnxruntime.dll")) + && path.exists() + { // CRITICAL: Convert to absolute path to avoid loading wrong DLL let abs_path = path.canonicalize().unwrap_or(path.clone()); let path_str = abs_path.to_string_lossy().to_string(); @@ -288,7 +290,9 @@ impl NerOrganizationExtractor { ]; for path in search_paths.into_iter().flatten() { - if path.exists() { + if path.file_name() == Some(std::ffi::OsStr::new(lib_name)) + && path.exists() + { let abs_path = path.canonicalize().unwrap_or(path.clone()); let path_str = abs_path.to_string_lossy().to_string(); info!("Found ONNX Runtime at: {}", path_str); @@ -384,9 +388,17 @@ impl NerOrganizationExtractor { /// Write bytes to file if it doesn't already exist fn write_if_missing(path: &std::path::Path, bytes: &[u8]) -> Result<()> { if !path.exists() { - let mut file = std::fs::File::create(path)?; + let file_name = path + .file_name() + .ok_or_else(|| anyhow::anyhow!("model path has no filename"))?; + let parent = path + .parent() + .ok_or_else(|| anyhow::anyhow!("model path has no parent"))?; + let canonical_parent = std::fs::canonicalize(parent).unwrap_or_else(|_| parent.to_path_buf()); + let safe_path = canonical_parent.join(file_name); + let mut file = std::fs::File::create(&safe_path)?; file.write_all(bytes)?; - debug!("Wrote model file: {:?}", path); + debug!("Wrote model file: {:?}", safe_path); } Ok(()) } From 34381beff4f89082f5c92bd8fbab8d374ba1bbb2 Mon Sep 17 00:00:00 2001 From: p4gs <10093271+p4gs@users.noreply.github.com> Date: Sun, 10 May 2026 20:17:46 -0400 Subject: [PATCH 2/2] style: cargo fmt formatting fixes for CodeQL remediation changes --- nthpartyfinder/src/known_vendors.rs | 5 ++++- nthpartyfinder/src/ner_org.rs | 7 +++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/nthpartyfinder/src/known_vendors.rs b/nthpartyfinder/src/known_vendors.rs index 004a993..33e75ea 100644 --- a/nthpartyfinder/src/known_vendors.rs +++ b/nthpartyfinder/src/known_vendors.rs @@ -472,7 +472,10 @@ impl KnownVendors { )); } - response.text().await.context("Failed to read response body") + response + .text() + .await + .context("Failed to read response body") } /// Parse and apply a remote vendor database JSON payload. diff --git a/nthpartyfinder/src/ner_org.rs b/nthpartyfinder/src/ner_org.rs index 9afca56..cae162d 100644 --- a/nthpartyfinder/src/ner_org.rs +++ b/nthpartyfinder/src/ner_org.rs @@ -290,9 +290,7 @@ impl NerOrganizationExtractor { ]; for path in search_paths.into_iter().flatten() { - if path.file_name() == Some(std::ffi::OsStr::new(lib_name)) - && path.exists() - { + if path.file_name() == Some(std::ffi::OsStr::new(lib_name)) && path.exists() { let abs_path = path.canonicalize().unwrap_or(path.clone()); let path_str = abs_path.to_string_lossy().to_string(); info!("Found ONNX Runtime at: {}", path_str); @@ -394,7 +392,8 @@ impl NerOrganizationExtractor { let parent = path .parent() .ok_or_else(|| anyhow::anyhow!("model path has no parent"))?; - let canonical_parent = std::fs::canonicalize(parent).unwrap_or_else(|_| parent.to_path_buf()); + let canonical_parent = + std::fs::canonicalize(parent).unwrap_or_else(|_| parent.to_path_buf()); let safe_path = canonical_parent.join(file_name); let mut file = std::fs::File::create(&safe_path)?; file.write_all(bytes)?;