Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions unified/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@ This is a CodeQL extractor based on tree-sitter.
## Building
To build the extractor, run `scripts/create-extractor-pack.sh`

## Testing
- If you changed the extractor code, always rebuild it before running tests.
## Extractor Testing
- To run extractor tests, run `cargo test` in the `extractor` directory.

- To run all tests, run `codeql test run --search-path extractor-pack ql/test`
- Do not edit the printed ASTs in `extractor/test/corpus` directly. To regenerate the ASTs, run tests with the environment variable `YEAST_UPDATE_CORPUS=1`.

## CodeQL Testing
- If you changed the extractor code, always rebuild it before running CodeQL tests.

- To run all CodeQL tests, run `codeql test run --search-path extractor-pack ql/test`

- Do not edit `.expected` files manually. To update the expected output, pass `--learn` to the `codeql test run` command.

Expand Down
8 changes: 2 additions & 6 deletions unified/extractor/src/extractor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ use std::path::PathBuf;

use codeql_extractor::extractor::simple;
use codeql_extractor::trap;

#[path = "languages/swift/swift.rs"]
mod swift;
use crate::languages;

#[derive(Args)]
pub struct Options {
Expand All @@ -27,9 +25,7 @@ pub fn run(options: Options) -> std::io::Result<()> {

let extractor = simple::Extractor {
prefix: "unified".to_string(),
languages: vec![
swift::language_spec(),
],
languages: languages::all_language_specs(),
trap_dir: options.output_dir,
trap_compression: trap::Compression::from_env("CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION"),
source_archive_dir: options.source_archive_dir,
Expand Down
8 changes: 8 additions & 0 deletions unified/extractor/src/languages/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
use codeql_extractor::extractor::simple;

#[path = "swift/swift.rs"]
mod swift;

pub fn all_language_specs() -> Vec<simple::LanguageSpec> {
vec![swift::language_spec()]
}
1 change: 1 addition & 0 deletions unified/extractor/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use clap::Parser;
mod autobuilder;
mod extractor;
mod generator;
mod languages;

#[derive(Parser)]
#[command(author, version, about)]
Expand Down
23 changes: 23 additions & 0 deletions unified/extractor/tests/corpus/swift/desugar.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
===
Additive expression is desugared
===

1 + 2

---

source_file
simple_identifier "blah"


===
Another additive expression is desugared
===

foo + bar

---

source_file
simple_identifier "blah"

182 changes: 182 additions & 0 deletions unified/extractor/tests/corpus_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
use std::fs;
use std::path::Path;

use codeql_extractor::extractor::simple;
use yeast::{dump::dump_ast, Runner};

#[path = "../src/languages/mod.rs"]
mod languages;

#[derive(Debug)]
struct CorpusCase {
name: String,
input: String,
expected: String,
}

fn update_mode_enabled() -> bool {
std::env::var("YEAST_UPDATE_CORPUS")
.map(|v| matches!(v.to_ascii_lowercase().as_str(), "1" | "true" | "yes" | "on"))
.unwrap_or(false)
}

fn is_header_rule(line: &str) -> bool {
let trimmed = line.trim();
trimmed.len() >= 3 && trimmed.chars().all(|c| c == '=')
}

fn parse_corpus(content: &str) -> Vec<CorpusCase> {
let lines: Vec<&str> = content.lines().collect();
let mut i = 0;
let mut cases = Vec::new();

while i < lines.len() {
while i < lines.len() && lines[i].trim().is_empty() {
i += 1;
}
if i >= lines.len() {
break;
}

assert!(
is_header_rule(lines[i]),
"Expected header delimiter at line {}",
i + 1
);
i += 1;

assert!(i < lines.len(), "Missing test name at line {}", i + 1);
let name = lines[i].trim().to_string();
i += 1;

assert!(
i < lines.len() && is_header_rule(lines[i]),
"Missing closing header delimiter for case {name}"
);
i += 1;

let input_start = i;
while i < lines.len() && lines[i].trim() != "---" {
i += 1;
}
assert!(i < lines.len(), "Missing --- separator for case {name}");
let input = lines[input_start..i].join("\n").trim_end().to_string();
i += 1;

let expected_start = i;
while i < lines.len() {
if is_header_rule(lines[i])
&& i + 2 < lines.len()
&& !lines[i + 1].trim().is_empty()
&& is_header_rule(lines[i + 2])
{
break;
}
i += 1;
}
let expected = lines[expected_start..i].join("\n").trim().to_string();

cases.push(CorpusCase {
name,
input,
expected,
});
}

cases
}

fn render_corpus(cases: &[CorpusCase]) -> String {
let mut out = String::new();

for (idx, case) in cases.iter().enumerate() {
if idx > 0 {
out.push('\n');
}
out.push_str("===\n");
out.push_str(case.name.trim());
out.push_str("\n===\n");
out.push('\n');
out.push_str(case.input.trim());
out.push_str("\n\n---\n");
out.push('\n');
out.push_str(case.expected.trim());
out.push_str("\n\n");
}

out
}

fn run_desugaring(lang: &simple::LanguageSpec, input: &str) -> String {
let runner = match lang.desugar.as_ref() {
Some(config) => Runner::from_config(lang.ts_language.clone(), config)
.expect("Failed to create yeast runner from desugaring config"),
None => Runner::new(lang.ts_language.clone(), &[]),
};
let ast = runner
.run(input)
.unwrap_or_else(|e| panic!("Failed to parse corpus input: {e}"));
dump_ast(&ast, ast.get_root(), input)
}

#[test]
fn test_corpus() {
let update_mode = update_mode_enabled();
let all_languages = languages::all_language_specs();
let corpus_dir = Path::new("tests/corpus");

for lang in all_languages {
let lang_corpus_dir = corpus_dir.join(&lang.prefix);
if !lang_corpus_dir.exists() {
continue;
}

let mut corpus_files: Vec<_> = fs::read_dir(&lang_corpus_dir)
.unwrap_or_else(|e| {
panic!(
"Failed to read corpus directory {}: {e}",
lang_corpus_dir.display()
)
})
.map(|entry| entry.expect("Failed to read corpus entry").path())
.filter(|path| path.extension().is_some_and(|ext| ext == "txt"))
.collect();
corpus_files.sort();

for corpus_path in corpus_files {
let content = fs::read_to_string(&corpus_path)
.unwrap_or_else(|e| panic!("Failed to read {}: {e}", corpus_path.display()));
let mut cases = parse_corpus(&content);
assert!(
!cases.is_empty(),
"No corpus cases found in {}",
corpus_path.display()
);

for case in &mut cases {
let actual = run_desugaring(&lang, &case.input);
if update_mode {
case.expected = actual.trim().to_string();
} else {
assert_eq!(
case.expected.trim(),
actual.trim(),
"Corpus case failed in {}: {}",
corpus_path.display(),
case.name
);
}
}

if update_mode {
let updated = render_corpus(&cases);
fs::write(&corpus_path, updated).unwrap_or_else(|e| {
panic!(
"Failed to update corpus file {}: {e}",
corpus_path.display()
)
});
}
}
}
}
Loading