diff --git a/.gitignore b/.gitignore index 26b21af..0a8a625 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ prodigal *.faa *.gff *.csv +*.out +/baseline diff --git a/scripts/benchmark.sh b/scripts/benchmark.sh new file mode 100755 index 0000000..fd544ab --- /dev/null +++ b/scripts/benchmark.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -euo pipefail + +# Benchmark script for FragGeneScanRs using hyperfine +# Runs benchmarks on all example files with at least 10 iterations + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" +BINARY="$PROJECT_ROOT/target/release/FragGeneScanRs" + +# Check for hyperfine +if ! command -v hyperfine &> /dev/null; then + echo "Error: hyperfine is not installed." + echo "Install with: brew install hyperfine or similar" + exit 1 +fi + +# Build release binary +echo "Building release binary..." +cargo build --release --manifest-path "$PROJECT_ROOT/Cargo.toml" + +# Create temp directory for outputs +TEMP_DIR=$(mktemp -d) +trap 'rm -rf "$TEMP_DIR"' EXIT + +echo "" +echo "Running benchmarks (minimum 10 runs, 3 warmup runs each)..." +echo "============================================================" + +# Benchmark 1: Short reads (NC_000913-454.fna with 454_10 training) +echo "" +echo "Benchmark: Short reads (NC_000913-454.fna)" +hyperfine \ + --warmup 3 \ + --min-runs 20 \ + "$BINARY -s $PROJECT_ROOT/example/NC_000913-454.fna -t 454_10 -w 0 -o $TEMP_DIR/NC_000913-454" + +# Benchmark 2: Complete genome (NC_000913.fna with complete training) +echo "" +echo "Benchmark: Complete genome (NC_000913.fna)" +hyperfine \ + --warmup 3 \ + --min-runs 20 \ + "$BINARY -s $PROJECT_ROOT/example/NC_000913.fna -t complete -w 1 -o $TEMP_DIR/NC_000913" + +# Benchmark 3: Long reads (contigs.fna with complete training) +echo "" +echo "Benchmark: Long reads (contigs.fna)" +hyperfine \ + --warmup 3 \ + --min-runs 10 \ + "$BINARY -s $PROJECT_ROOT/example/contigs.fna -t complete -w 1 -o $TEMP_DIR/contigs" + +echo "" +echo "Benchmarks complete!" diff --git a/scripts/validate.sh b/scripts/validate.sh new file mode 100755 index 0000000..73e5598 --- /dev/null +++ b/scripts/validate.sh @@ -0,0 +1,131 @@ +#!/bin/bash +set -euo pipefail + +# Validation script for FragGeneScanRs +# Usage: +# ./scripts/validate.sh --baseline Generate baseline output files +# ./scripts/validate.sh --check Compare current output against baseline (default) + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" +BASELINE_DIR="$PROJECT_ROOT/baseline" +BINARY="$PROJECT_ROOT/target/release/FragGeneScanRs" + +# Example files and their configurations +# Format: "input_file:training_file:whole_genome_flag:output_name" +EXAMPLES=( + "example/NC_000913-454.fna:454_10:0:NC_000913-454" + "example/NC_000913.fna:complete:1:NC_000913" + "example/contigs.fna:complete:1:contigs" +) + +usage() { + echo "Usage: $0 [--baseline|--check]" + echo " --baseline Generate baseline output files" + echo " --check Compare current output against baseline (default)" + exit 1 +} + +build_release() { + echo "Building release binary..." + cargo build --release --manifest-path "$PROJECT_ROOT/Cargo.toml" +} + +run_example() { + local input="$1" + local train="$2" + local whole="$3" + local output_prefix="$4" + + "$BINARY" \ + -s "$PROJECT_ROOT/$input" \ + -t "$train" \ + -w "$whole" \ + -o "$output_prefix" +} + +generate_baseline() { + echo "Generating baseline outputs..." + mkdir -p "$BASELINE_DIR" + + for example in "${EXAMPLES[@]}"; do + IFS=':' read -r input train whole name <<< "$example" + echo " Processing $name..." + run_example "$input" "$train" "$whole" "$BASELINE_DIR/$name" + done + + echo "Baseline generated in $BASELINE_DIR" +} + +check_against_baseline() { + if [[ ! -d "$BASELINE_DIR" ]]; then + echo "Error: Baseline directory not found. Run with --baseline first." + exit 1 + fi + + local temp_dir + temp_dir=$(mktemp -d) + trap 'rm -rf "$temp_dir"' EXIT + + echo "Running current version and comparing against baseline..." + local failed=0 + + for example in "${EXAMPLES[@]}"; do + IFS=':' read -r input train whole name <<< "$example" + echo " Processing $name..." + run_example "$input" "$train" "$whole" "$temp_dir/$name" + + for ext in out faa ffn; do + local baseline_file="$BASELINE_DIR/$name.$ext" + local current_file="$temp_dir/$name.$ext" + + if [[ ! -f "$baseline_file" ]]; then + echo " Warning: Baseline file $baseline_file not found" + continue + fi + + if diff -q "$baseline_file" "$current_file" > /dev/null 2>&1; then + echo " ✓ $name.$ext matches" + else + echo " ✗ $name.$ext DIFFERS" + failed=1 + fi + done + done + + if [[ $failed -eq 0 ]]; then + echo "All outputs match baseline!" + exit 0 + else + echo "Some outputs differ from baseline!" + exit 1 + fi +} + +# Parse arguments +MODE="check" +if [[ $# -gt 0 ]]; then + case "$1" in + --baseline) + MODE="baseline" + ;; + --check) + MODE="check" + ;; + *) + usage + ;; + esac +fi + +# Main +build_release + +case "$MODE" in + baseline) + generate_baseline + ;; + check) + check_against_baseline + ;; +esac