Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,12 @@ JSONL and SARIF:
```

Key flags:
- `--min-confidence 0.9`: filter low-confidence hits
- `--threads N`: set thread pool size
- `--max-file-size MB`: skip large files (default 2)
- `--patterns PATH`: specify patterns file (default: `patterns.toml`)
- `--progress`: show progress bar during scanning
- `--include-glob GLOB` / `--exclude-glob GLOB`
- `--allow LIB` / `--deny LIB`
- `--deterministic`: stable output ordering
- `--fail-on-find`: exit 2 if findings exist
- `--print-config`: print loaded `patterns.toml`
- `--dry-run`: list files to be scanned

Expand All @@ -48,7 +45,7 @@ Rust | RustCrypto | 2 | src/main.rs:12 aes_gcm::Aes256Gcm
JSONL example:

```json
{"language":"Rust","library":"RustCrypto","file":"src/main.rs","span":{"line":12,"column":5},"symbol":"aes_gcm::Aes256Gcm","snippet":"use aes_gcm::Aes256Gcm;","confidence":0.99,"detector_id":"detector-rust"}
{"language":"Rust","library":"RustCrypto","file":"src/main.rs","span":{"line":12,"column":5},"symbol":"aes_gcm::Aes256Gcm","snippet":"use aes_gcm::Aes256Gcm;","detector_id":"detector-rust"}
```

SARIF snippet:
Expand Down
22 changes: 0 additions & 22 deletions crates/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ struct Args {
#[arg(long, value_name = "FILE")]
sarif: Option<PathBuf>,

/// Minimum confidence required
#[arg(long, value_name = "FLOAT")]
min_confidence: Option<f32>,

/// Number of threads
#[arg(long, value_name = "N")]
threads: Option<usize>,
Expand All @@ -42,22 +38,10 @@ struct Args {
#[arg(long, value_name = "GLOB")]
exclude_glob: Vec<String>,

/// Allow only these libraries
#[arg(long, value_name = "LIB")]
allow: Vec<String>,

/// Deny these libraries
#[arg(long, value_name = "LIB")]
deny: Vec<String>,

/// Deterministic output ordering
#[arg(long, action = ArgAction::SetTrue)]
deterministic: bool,

/// Fail with code 2 if findings are present
#[arg(long, action = ArgAction::SetTrue)]
fail_on_find: bool,

/// Print merged patterns/config and exit
#[arg(long, action = ArgAction::SetTrue)]
print_config: bool,
Expand Down Expand Up @@ -155,11 +139,8 @@ fn main() -> Result<()> {
];

let mut cfg = Config {
min_confidence: args.min_confidence,
include_globs: args.include_glob.clone(),
exclude_globs: args.exclude_glob.clone(),
allow_libs: args.allow.clone(),
deny_libs: args.deny.clone(),
deterministic: args.deterministic,
..Default::default()
};
Expand Down Expand Up @@ -214,9 +195,6 @@ fn main() -> Result<()> {
fs::write(sarif_path, serde_json::to_vec_pretty(&sarif)?)?;
}

if args.fail_on_find && !findings.is_empty() {
std::process::exit(2);
}
Ok(())
}

Expand Down
100 changes: 100 additions & 0 deletions crates/cli/tests/anchors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
use scanner_core::*;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};

fn write_file(dir: &Path, rel: &str, contents: &str) {
let path = dir.join(rel);
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(path, contents).unwrap();
}

fn tmp_dir(prefix: &str) -> PathBuf {
let mut base = std::env::temp_dir();
let ts = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
let pid = std::process::id();
base.push(format!("cipherscope_test_{}_{}_{}", prefix, pid, ts));
fs::create_dir_all(&base).unwrap();
base
}

#[test]
fn tink_requires_import_and_api() {
let workspace = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
let patterns_path = workspace.join("patterns.toml");
let patterns = fs::read_to_string(patterns_path).unwrap();
let reg = Arc::new(PatternRegistry::load(&patterns).unwrap());
let dets: Vec<Box<dyn Detector>> = vec![Box::new(PatternDetector::new(
"detector-java",
&[Language::Java],
reg.clone(),
))];
let scanner = Scanner::new(&reg, dets, Config::default());

// 1) Import only: should NOT report Tink
let dir_import_only = tmp_dir("tink_import_only");
write_file(
&dir_import_only,
"src/ImportOnly.java",
r#"package test;
import com.google.crypto.tink.aead.AeadConfig; // import present
public class ImportOnly {
public static void main(String[] args) { System.out.println("hello"); }
}
"#,
);
let findings = scanner.run(std::slice::from_ref(&dir_import_only)).unwrap();
assert!(
!findings.iter().any(|f| f.library == "Google Tink (Java)"),
"Tink should not be reported with import only"
);

// 2) API only: should NOT report Tink
let dir_api_only = tmp_dir("tink_api_only");
write_file(
&dir_api_only,
"src/ApiOnly.java",
r#"package test;
public class ApiOnly {
public static void main(String[] args) {
// Mention API symbol without import
String s = "Aead Mac HybridEncrypt"; // matches pattern by word, but no import
System.out.println(s);
}
}
"#,
);
let findings = scanner.run(std::slice::from_ref(&dir_api_only)).unwrap();
assert!(
!findings.iter().any(|f| f.library == "Google Tink (Java)"),
"Tink should not be reported with API mentions only"
);

// 3) Import + API: should report Tink
let dir_both = tmp_dir("tink_both");
write_file(
&dir_both,
"src/Both.java",
r#"package test;
import com.google.crypto.tink.aead.AeadConfig; // import present
public class Both {
public static void main(String[] args) {
// Include an API token
String s = "Aead";
System.out.println(s);
}
}
"#,
);
let findings = scanner.run(std::slice::from_ref(&dir_both)).unwrap();
assert!(
findings.iter().any(|f| f.library == "Google Tink (Java)"),
"Tink should be reported when import and API are present"
);
}
201 changes: 201 additions & 0 deletions crates/cli/tests/filtering.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
use scanner_core::*;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};

fn write_file(dir: &Path, rel: &str, contents: &str) {
let path = dir.join(rel);
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(path, contents).unwrap();
}

fn tmp_dir(prefix: &str) -> PathBuf {
let mut base = std::env::temp_dir();
let ts = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
let pid = std::process::id();
base.push(format!("cipherscope_test_{}_{}_{}", prefix, pid, ts));
fs::create_dir_all(&base).unwrap();
base
}

fn load_registry() -> Arc<PatternRegistry> {
let workspace = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
let patterns_path = workspace.join("patterns.toml");
let patterns = fs::read_to_string(patterns_path).unwrap();
Arc::new(PatternRegistry::load(&patterns).unwrap())
}

#[test]
fn commented_import_does_not_trigger_anchor_java() {
let reg = load_registry();
let dets: Vec<Box<dyn Detector>> = vec![Box::new(PatternDetector::new(
"detector-java",
&[Language::Java],
reg.clone(),
))];
let cfg = Config::default();
let scanner = Scanner::new(&reg, dets, cfg);

let dir = tmp_dir("commented_import_java");
write_file(
&dir,
"src/Main.java",
r#"package test;
// import javax.crypto.Cipher; // commented anchor
public class Main {
public static void main(String[] args) throws Exception {
javax.crypto.Cipher.getInstance("AES/GCM/NoPadding"); // API present
}
}
"#,
);
let findings = scanner.run(std::slice::from_ref(&dir)).unwrap();
assert!(
!findings.iter().any(|f| f.library == "Java JCA/JCE"),
"JCA/JCE should not be reported when import is commented"
);
}

#[test]
fn php_api_only_reports_openssl() {
let reg = load_registry();
let dets: Vec<Box<dyn Detector>> = vec![Box::new(PatternDetector::new(
"detector-php",
&[Language::Php],
reg.clone(),
))];
let cfg = Config::default();
let scanner = Scanner::new(&reg, dets, cfg);

let dir = tmp_dir("php_openssl_api_only");
write_file(
&dir,
"web/index.php",
r#"<?php
// No imports for PHP OpenSSL detector; API use is enough
$ciphertext = openssl_encrypt("data", "aes-256-cbc", "key", 0, "1234567890123456");
echo $ciphertext;
"#,
);
let findings = scanner.run(std::slice::from_ref(&dir)).unwrap();
assert!(
findings.iter().any(|f| f.library == "OpenSSL (PHP)"),
"OpenSSL (PHP) should be reported on API use only"
);
}

#[test]
fn include_glob_filters_file_types() {
let reg = load_registry();
let dets_java: Vec<Box<dyn Detector>> = vec![
Box::new(PatternDetector::new(
"detector-java",
&[Language::Java],
reg.clone(),
)),
Box::new(PatternDetector::new(
"detector-php",
&[Language::Php],
reg.clone(),
)),
];

let dir = tmp_dir("include_glob_filters");
// Java file with anchor+API
write_file(
&dir,
"src/Main.java",
r#"package test;
import java.security.MessageDigest;
public class Main {
public static void main(String[] args) throws Exception {
java.security.KeyFactory.getInstance("RSA");
}
}
"#,
);
// PHP file with API
write_file(
&dir,
"web/index.php",
r#"<?php
echo openssl_encrypt("data", "aes-256-cbc", "key", 0, "1234567890123456");
"#,
);

// Only Java
let cfg_java_only = Config {
include_globs: vec!["**/*.java".to_string()],
..Default::default()
};
let scanner_java = Scanner::new(&reg, dets_java, cfg_java_only);
let findings_java = scanner_java.run(std::slice::from_ref(&dir)).unwrap();
assert!(findings_java.iter().any(|f| f.library == "Java JCA/JCE"));
assert!(
!findings_java.iter().any(|f| f.library == "OpenSSL (PHP)"),
"PHP findings should be excluded by include_glob"
);

// Only PHP
let cfg_php_only = Config {
include_globs: vec!["**/*.php".to_string()],
..Default::default()
};
let dets_php: Vec<Box<dyn Detector>> = vec![
Box::new(PatternDetector::new(
"detector-java",
&[Language::Java],
reg.clone(),
)),
Box::new(PatternDetector::new(
"detector-php",
&[Language::Php],
reg.clone(),
)),
];
let scanner_php = Scanner::new(&reg, dets_php, cfg_php_only);
let findings_php = scanner_php.run(std::slice::from_ref(&dir)).unwrap();
assert!(findings_php.iter().any(|f| f.library == "OpenSSL (PHP)"));
assert!(
!findings_php.iter().any(|f| f.library == "Java JCA/JCE"),
"Java findings should be excluded by include_glob"
);
}

#[test]
fn max_file_size_skips_large_files() {
let reg = load_registry();
let dets: Vec<Box<dyn Detector>> = vec![Box::new(PatternDetector::new(
"detector-java",
&[Language::Java],
reg.clone(),
))];

let dir = tmp_dir("max_file_size");
// Create a large Java file that would otherwise match JCA
let mut content = String::from(
"package test;\nimport javax.crypto.Cipher;\npublic class Big { public static void main(String[] a){ } }\n",
);
// Append enough data to exceed threshold
for _ in 0..5000 {
content.push_str("// padding padding padding padding padding padding\n");
}
write_file(&dir, "src/Big.java", &content);

let cfg_small_limit = Config {
max_file_size: 512, // bytes
..Default::default()
};
let scanner = Scanner::new(&reg, dets, cfg_small_limit);
let findings = scanner.run(std::slice::from_ref(&dir)).unwrap();
assert!(
findings.is_empty(),
"Large file should be skipped by max_file_size"
);
}
Loading
Loading