diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7525a38..54e213e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,6 +9,9 @@ on: - "**" workflow_dispatch: +env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + jobs: build: name: Build on ${{ matrix.os }} @@ -22,15 +25,19 @@ jobs: os: [ubuntu-latest, macos-latest] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: - fetch-depth: 0 + # Shallow clone is enough for building. Steps that need history + # (changelog, merge-base) should override with their own fetch. + fetch-depth: 1 + # Linux: install toolchain + accelerators - name: Install dependencies (Linux) if: runner.os == 'Linux' run: | sudo apt-get update - sudo apt-get install -y build-essential cmake python3 + sudo apt-get install -y build-essential cmake python3 \ + ninja-build ccache lld # Install LLVM and Clang 20 wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - sudo apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-20 main" @@ -39,38 +46,75 @@ jobs: echo "LLVM_DIR=/usr/lib/llvm-20/lib/cmake/llvm" >> $GITHUB_ENV echo "Clang_DIR=/usr/lib/llvm-20/lib/cmake/clang" >> $GITHUB_ENV + # macOS: install toolchain - name: Install dependencies (macOS) if: runner.os == 'macOS' run: | - brew install cmake python llvm@20 + brew install cmake python llvm@20 ninja ccache echo "LLVM_DIR=$(brew --prefix llvm@20)/lib/cmake/llvm" >> $GITHUB_ENV echo "Clang_DIR=$(brew --prefix llvm@20)/lib/cmake/clang" >> $GITHUB_ENV echo "$(brew --prefix llvm@20)/bin" >> $GITHUB_PATH - - name: Configure via build.sh (quick) + # ccache: restore + configure + - name: Restore ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-${{ runner.os }}-${{ github.ref_name }}-${{ hashFiles('CMakeLists.txt', 'src/**', 'include/**') }} + restore-keys: | + ccache-${{ runner.os }}-${{ github.ref_name }}- + ccache-${{ runner.os }}- + + - name: Configure ccache run: | - ./build.sh --build-dir build-script --type Release --configure-only + ccache --set-config=cache_dir=${{ env.CCACHE_DIR }} + ccache --set-config=max_size=500M + ccache --set-config=compression=true + ccache -z - - name: Configure and Build (Linux/macOS) - if: runner.os == 'Linux' || runner.os == 'macOS' + # FetchContent cache (sources only) + - name: Restore FetchContent sources + uses: actions/cache@v4 + with: + path: | + build/_deps/cc-src + build/_deps/coretrace-logger-src + key: fetchcontent-${{ runner.os }}-llvm20-${{ hashFiles('CMakeLists.txt', 'cmake/**') }} + restore-keys: | + fetchcontent-${{ runner.os }}-llvm20- + + # Configure + - name: Configure run: | - mkdir -p build && cd build - cmake .. -DCMAKE_BUILD_TYPE=Release \ - -DLLVM_DIR=${{ env.LLVM_DIR }} \ - -DClang_DIR=${{ env.Clang_DIR }} \ - -DUSE_SHARED_LIB=OFF \ - -DBUILD_TESTS=OFF + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_DIR=${{ env.LLVM_DIR }} \ + -DClang_DIR=${{ env.Clang_DIR }} \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DUSE_SHARED_LIB=OFF \ + -DBUILD_TESTS=OFF \ + ${{ runner.os == 'Linux' && '-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=lld -DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=lld' || '' }} + + # Build + - name: Build + run: cmake --build build --config Release - cmake --build . --config Release + - name: Show ccache stats + if: always() + run: ccache -s - - name: Test Stack Usage Analyzer (Linux/macOS) - if: runner.os == 'Linux' || runner.os == 'macOS' + # Tests + - name: Test Stack Usage Analyzer timeout-minutes: 45 run: | TEST_JOBS="$(python3 -c 'import os; print(max(1, min(8, os.cpu_count() or 1)))')" echo "Running run_test.py with ${TEST_JOBS} job(s)" - python3 -u run_test.py --jobs="${TEST_JOBS}" + EXTRA_ANALYZER_ARGS="" + CORETRACE_RUN_TEST_EXTRA_ANALYZER_ARGS="${EXTRA_ANALYZER_ARGS}" \ + python3 -u run_test.py --jobs="${TEST_JOBS}" + # Self-analysis (Linux only) - name: Self-analysis (analyze own source code) if: runner.os == 'Linux' run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index 0083692..5dbb883 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,14 +68,20 @@ set(STACK_ANALYZER_SOURCES src/analysis/AnalyzerUtils.cpp src/analysis/CompileCommands.cpp src/analysis/ConstParamAnalysis.cpp + src/analysis/CommandInjectionAnalysis.cpp src/analysis/DuplicateIfCondition.cpp src/analysis/DynamicAlloca.cpp + src/analysis/BufferWriteModel.cpp + src/analysis/FrontendDiagnostics.cpp src/analysis/FunctionFilter.cpp src/analysis/IRValueUtils.cpp src/analysis/IntRanges.cpp + src/analysis/IntegerOverflowAnalysis.cpp src/analysis/InputPipeline.cpp src/analysis/InvalidBaseReconstruction.cpp src/analysis/MemIntrinsicOverflow.cpp + src/analysis/NullDerefAnalysis.cpp + src/analysis/OOBReadAnalysis.cpp src/analysis/ResourceLifetimeAnalysis.cpp src/analysis/Reachability.cpp src/analysis/SizeMinusKWrites.cpp @@ -84,6 +90,8 @@ set(STACK_ANALYZER_SOURCES src/analysis/StackPointerEscape.cpp src/analysis/StackPointerEscapeModel.cpp src/analysis/StackPointerEscapeResolver.cpp + src/analysis/TOCTOUAnalysis.cpp + src/analysis/TypeConfusionAnalysis.cpp src/analysis/UninitializedVarAnalysis.cpp src/report/ReportSerialization.cpp src/mangle.cpp diff --git a/Dockerfile b/Dockerfile index ce96275..209ad00 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,31 +1,39 @@ # ============================================================================= -# CoreTrace Stack Analyzer — Production Docker Image +# CoreTrace Stack Analyzer — Docker Image # ============================================================================= -# Multi-stage build: builds the analyzer, then creates a slim runtime image. +# This Dockerfile supports 3 user-facing targets: +# - dev: +# toolchain + repo checkout, no build; default command is an interactive shell. +# Use it to run cmake/build/tests manually. +# - builder: +# compiles the analyzer into /repo/build/stack_usage_analyzer. +# Use it in CI or to extract binaries/artifacts. +# - runtime: +# production image with analyzer + models + Docker entrypoint wrapper. +# Default workdir is /workspace and entrypoint auto-resolves compile_commands.json. # -# Default runtime behavior (via entrypoint wrapper): -# - auto-detect /workspace/build/compile_commands.json (fallback: /workspace/compile_commands.json) -# - --analysis-profile=fast -# - --compdb-fast -# - --resource-summary-cache-memory-only -# - --resource-model=/models/resource-lifetime/generic.txt +# Typical commands: +# # 1) Dev mode (interactive) +# docker build --target dev -t coretrace-stack-analyzer:dev . +# docker run --rm -it -v "$PWD:/repo" -w /repo coretrace-stack-analyzer:dev # -# Usage: -# docker build -t coretrace-stack-analyzer . -# docker run --rm -v $(pwd):/workspace coretrace-stack-analyzer +# # 2) Builder mode (compile artifacts) +# docker build --target builder -t coretrace-stack-analyzer:builder . +# docker create --name coretrace-builder coretrace-stack-analyzer:builder +# docker cp coretrace-builder:/repo/build/stack_usage_analyzer ./build/stack_usage_analyzer +# docker rm coretrace-builder # -# Override defaults with explicit args: -# docker run --rm -v $(pwd):/workspace coretrace-stack-analyzer \ -# --analysis-profile=full --resource-model=/models/resource-lifetime/generic.txt -# -# Bypass defaults completely: -# docker run --rm -v $(pwd):/workspace coretrace-stack-analyzer --raw --help +# # 3) Runtime mode (analyze project from compile_commands.json) +# docker build --target runtime -t coretrace-stack-analyzer:runtime . +# docker run --rm -v "$PWD:/workspace" coretrace-stack-analyzer:runtime +# # pass --raw to bypass wrapper defaults: +# docker run --rm -v "$PWD:/workspace" coretrace-stack-analyzer:runtime --raw --help # ============================================================================= # --------------------------------------------------------------------------- -# Stage 1: Build +# Stage 0: Base (toolchain + build deps) # --------------------------------------------------------------------------- -FROM ubuntu:24.04 AS builder +FROM ubuntu:24.04 AS base ARG DEBIAN_FRONTEND=noninteractive ARG LLVM_VERSION=20 @@ -52,6 +60,25 @@ RUN curl -fsSL https://apt.llvm.org/llvm.sh -o /tmp/llvm.sh \ && apt-get install -y --no-install-recommends libclang-${LLVM_VERSION}-dev \ && rm -rf /var/lib/apt/lists/* +# Make sure LLVM shared libs are found at runtime (useful for dev builds too) +ENV LD_LIBRARY_PATH=/usr/lib/llvm-${LLVM_VERSION}/lib + +# --------------------------------------------------------------------------- +# Stage 1: Dev (deps + repo, no build) +# --------------------------------------------------------------------------- +FROM base AS dev + +WORKDIR /repo +COPY . /repo + +# Default: interactive shell so you can build/test manually +CMD ["bash"] + +# --------------------------------------------------------------------------- +# Stage 2: Build (produces binaries) +# --------------------------------------------------------------------------- +FROM base AS builder + WORKDIR /repo COPY . /repo @@ -65,14 +92,14 @@ RUN cmake -S . -B build -G Ninja \ && cmake --build build -j"$(nproc)" # --------------------------------------------------------------------------- -# Stage 2: Runtime (slim) +# Stage 3: Runtime (prod) # --------------------------------------------------------------------------- -FROM ubuntu:24.04 +FROM ubuntu:24.04 AS runtime ARG DEBIAN_FRONTEND=noninteractive ARG LLVM_VERSION=20 -# Install only the runtime libraries needed by the analyzer binary +# Install only what is needed to run (and to support the entrypoint script) RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates \ curl \ @@ -97,7 +124,6 @@ COPY --from=builder /repo/models /models RUN chmod +x /usr/local/bin/coretrace-entrypoint.py -# Make sure the binary can find LLVM shared libs ENV LD_LIBRARY_PATH=/usr/lib/llvm-${LLVM_VERSION}/lib WORKDIR /workspace diff --git a/PATCH.md b/PATCH.md deleted file mode 100644 index 2b16dc8..0000000 --- a/PATCH.md +++ /dev/null @@ -1,54 +0,0 @@ -# Statut des faux positifs (mise a jour) - -## Corriges - -### 1) `src/analysis/MemIntrinsicOverflow.cpp:71` - -Warning corrige: -- `local variable 'classifyByName' is never initialized` - -Patch applique: -- `UninitializedVarAnalysis` ignore maintenant les objets C++ vides (ex: lambda sans state) - en se basant sur la forme IR + metadata debug (pas une heuristique sur un nom de variable). - -### 2) `src/analysis/ResourceLifetimeAnalysis.cpp:823, 825, 995, 1007` - -Warning corrige: -- `potential read of uninitialized local variable 'out'` - -Patch applique: -- verification d'initialisation "padding-aware" dans `UninitializedVarAnalysis`: - - on valide l'initialisation des octets semantiques (membres) ; - - les trous de padding de layout ne declenchent plus de faux positifs. - -### 3) `src/cli/ArgParser.cpp` (19 warnings) - -Warnings corriges: -- `potential UB: invalid base reconstruction via offsetof/container_of` -- `unable to verify that derived pointer points to a valid object` - -Patch applique: -- `InvalidBaseReconstruction` utilise maintenant une resolution recursive de sous-objet - (type + offset + bornes de projection) au lieu d'un test limite au membre top-level. -- Les projections C++ valides sur objets imbriques (`result.parsed.config.*`) ne sont plus - confondues avec des patterns `container_of`. - -## Non-regressions ajoutees - -- `test/uninitialized-variable/uninitialized-local-cpp-empty-lambda-capture.cpp` -- `test/uninitialized-variable/uninitialized-local-cpp-default-member-return.cpp` -- `test/offset_of-container_of/gep_nested_subobject_reference_no_diag.cpp` - -## Validation - -- Verification ciblee sur: - - `src/analysis/MemIntrinsicOverflow.cpp` -> `warning=0` - - `src/analysis/ResourceLifetimeAnalysis.cpp` -> `warning=0` - - `src/cli/ArgParser.cpp` -> `warning=0` -- Suite de regression complete: - - `./run_test.py --jobs 4` - - resultat: **413/413 passed** - -## Reste connu - -- Hors faux positifs: vrai positif conserve `src/analysis/InvalidBaseReconstruction.cpp:188`. diff --git a/README.md b/README.md index f293e1f..2186382 100644 --- a/README.md +++ b/README.md @@ -94,46 +94,57 @@ Notes: - If no compile database is found, it can fallback to git-tracked sources (`inputs-from-git-fallback`, enabled by default). -### Docker image for registry-based CI - -When you want a reusable analyzer image in CI (instead of rebuilding the tool each run), -build and publish: -- `Dockerfile`: analyzer runtime image with sensible defaults for full-repo analysis. -- `Dockerfile.ci`: CI gate image (entrypoint = `run_code_analysis.py`). - -Default behavior of `Dockerfile` runtime entrypoint: -- auto-detect `compile_commands.json` from `/workspace/build/compile_commands.json` - (fallback: `/workspace/compile_commands.json`) -- `--analysis-profile=fast` -- `--compdb-fast` (drops heavy/platform-specific compile flags from compile DB) -- `--resource-summary-cache-memory-only` -- `--resource-model=/models/resource-lifetime/generic.txt` -- if `compile_commands.json` contains stale absolute paths (e.g. `/tmp/evan/...`) - while the repo is mounted at `/workspace`, a compatibility symlink is created - automatically when safe (so analysis can still run without extra Docker flags) - -Runtime image is intentionally analyzer-only (toolchain/runtime + analyzer models). -Project-specific SDKs/headers must be installed in the target CI job or in a derived image. - -Simple local run (analyze whole repo from compile database): +### Docker image for local and CI workflows + +`Dockerfile` is multi-target and supports three modes: + +1. `dev` mode (interactive toolchain container, no prebuild) ```zsh -docker build -t coretrace-stack-analyzer . -docker run --rm -v "$PWD:/workspace" coretrace-stack-analyzer +docker build --target dev -t coretrace-stack-analyzer:dev . +docker run --rm -it -v "$PWD:/repo" -w /repo coretrace-stack-analyzer:dev ``` +Use this mode when you want to run `cmake`, `ctest`, `run_test.py`, or debug locally inside a Linux environment. -Override defaults: +2. `builder` mode (compile artifacts) ```zsh -docker run --rm -v "$PWD:/workspace" coretrace-stack-analyzer \ +docker build --target builder -t coretrace-stack-analyzer:builder . +docker create --name coretrace-builder coretrace-stack-analyzer:builder +docker cp coretrace-builder:/repo/build/stack_usage_analyzer ./build/stack_usage_analyzer +docker rm coretrace-builder +``` +Use this mode in CI when you only need the built analyzer binary (or build artifacts) and not the runtime wrapper. + +3. `runtime` mode (production analyzer container) +```zsh +docker build --target runtime -t coretrace-stack-analyzer:runtime . +docker run --rm -v "$PWD:/workspace" coretrace-stack-analyzer:runtime +``` + +Default behavior of runtime entrypoint (`scripts/docker/coretrace_entrypoint.py`): +- auto-detect `compile_commands.json` from `/workspace/build/compile_commands.json`, + then `/workspace/compile_commands.json`, then recursive search under `/workspace` +- add `--analysis-profile=fast` unless already set +- add `--compdb-fast` by default (can be disabled with `CORETRACE_COMPDB_FAST=0`) +- add `--resource-summary-cache-memory-only` unless a resource cache option is already set +- add `--resource-model=/models/resource-lifetime/generic.txt` when present +- optionally create a compatibility symlink for stale absolute build paths in compile DB, + restricted by `CORETRACE_COMPAT_SYMLINK_ALLOWED_ROOTS` (default: `/tmp:/var/tmp`) + +Override defaults in runtime mode: +```zsh +docker run --rm -v "$PWD:/workspace" coretrace-stack-analyzer:runtime \ --analysis-profile=full \ --warnings-only ``` -Bypass defaults entirely: +Bypass wrapper defaults entirely: ```zsh -docker run --rm -v "$PWD:/workspace" coretrace-stack-analyzer --raw --help +docker run --rm -v "$PWD:/workspace" coretrace-stack-analyzer:runtime --raw --help ``` -Build and push: +For registry-based policy gating, `Dockerfile.ci` is still available (entrypoint = `run_code_analysis.py`). + +Build and push CI image: ```zsh docker build -f Dockerfile.ci \ --build-arg VERSION=0.1.0 \ @@ -143,7 +154,7 @@ docker build -f Dockerfile.ci \ docker push ghcr.io//coretrace-stack-analyzer-ci:0.1.0 ``` -Run in CI (entrypoint already targets `run_code_analysis.py`): +Run CI image: ```zsh docker run --rm \ -u "$(id -u):$(id -g)" \ @@ -222,6 +233,7 @@ Ready-to-adapt workflow examples: --include-compdb-deps includes `_deps` entries when inputs are auto-discovered from compile_commands.json --jobs= parallel jobs for multi-file loading/analysis and cross-TU resource summary build (default: 1) --escape-model= loads external noescape rules for stack pointer escape analysis (`noescape_arg`) +--buffer-model= loads external buffer write rules for copy/string overflow checks (`bounded_write`/`unbounded_write`) --resource-model= loads external acquire/release rules for generic resource lifetime checks --resource-cross-tu enables cross-TU resource summaries for resource lifetime analysis (default: on) --no-resource-cross-tu disables cross-TU resource summaries @@ -527,6 +539,34 @@ For test files, `run_test.py` supports per-file selection with: --- +Buffer write API contracts (`--buffer-model=`) + +- Why this exists: + - Many overflow-prone APIs are project-specific wrappers (`copy_bytes`, `my_strcpy`, etc.). + - Encoding these contracts in a model avoids hardcoding function names in analyzer code. + - You can model both bounded writes (explicit length arg) and unbounded writes. + +Model format (`--buffer-model=`): + +```text +bounded_write +unbounded_write +``` + +Example model: + +```text +bounded_write memcpy 0 2 +bounded_write strncpy 0 2 +unbounded_write strcpy 0 +unbounded_write strcat 0 +``` + +For test files, `run_test.py` supports per-file selection with: +`// buffer-model: `. + +--- + Actually done: - 1. Multi-file CLI inputs with deterministic ordering and aggregated output. @@ -535,7 +575,7 @@ Actually done: - 4. Compile args passthrough: `-I`, `-D`, `--compile-arg`. - 5. Dynamic alloca / VLA detection, including user-controlled sizes, upper-bound inference, and recursion-aware severity (errors for infinite recursion or oversized allocations, warnings for other dynamic sizes). - 6. Deriving human-friendly names for unnamed allocas in diagnostics. -- 7. Detection of memcpy/memset overflows on stack buffers. +- 7. Detection of stack buffer overflows in memory/string write APIs (built-in + model-driven). - 8. Warning when a function performs multiple stores into the same stack buffer. - 9. Deeper traversal analysis: constraint propagation. - 10. Detection of deep indirection in aliasing. diff --git a/include/StackUsageAnalyzer.hpp b/include/StackUsageAnalyzer.hpp index 0fa2721..c549515 100644 --- a/include/StackUsageAnalyzer.hpp +++ b/include/StackUsageAnalyzer.hpp @@ -63,6 +63,7 @@ namespace ctrace::stack bool dumpIRIsDir = false; bool demangle = false; std::string escapeModelPath; + std::string bufferModelPath; std::string resourceModelPath; bool resourceCrossTU = true; std::string resourceSummaryCacheDir = ".cache/resource-lifetime"; @@ -143,12 +144,18 @@ namespace ctrace::stack DuplicateIfCondition = 13, UninitializedLocalRead = 14, StackFrameTooLarge = 15, - ResourceLifetimeIssue = 16 + ResourceLifetimeIssue = 16, + NullPointerDereference = 17, + CommandInjection = 18, + TOCTOURace = 19, + IntegerOverflow = 20, + TypeConfusion = 21, + OutOfBoundsRead = 22 }; template <> struct EnumTraits { - static constexpr std::array names = {"None", + static constexpr std::array names = {"None", "StackBufferOverflow", "NegativeStackIndex", "VLAUsage", @@ -164,7 +171,13 @@ namespace ctrace::stack "DuplicateIfCondition", "UninitializedLocalRead", "StackFrameTooLarge", - "ResourceLifetimeIssue"}; + "ResourceLifetimeIssue", + "NullPointerDereference", + "CommandInjection", + "TOCTOURace", + "IntegerOverflow", + "TypeConfusion", + "OutOfBoundsRead"}; }; /* diff --git a/include/analysis/BufferWriteModel.hpp b/include/analysis/BufferWriteModel.hpp new file mode 100644 index 0000000..ed7d70f --- /dev/null +++ b/include/analysis/BufferWriteModel.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class Function; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + enum class BufferWriteRuleKind + { + BoundedWrite, + UnboundedWrite + }; + + struct BufferWriteRule + { + BufferWriteRuleKind kind = BufferWriteRuleKind::BoundedWrite; + std::string functionPattern; + unsigned destArgIndex = 0; + unsigned sizeArgIndex = 0; // only used for BoundedWrite + }; + + struct BufferWriteModel + { + std::vector rules; + }; + + class BufferWriteRuleMatcher + { + public: + const BufferWriteRule* findMatchingRule(const BufferWriteModel& model, + const llvm::Function& callee, std::size_t argCount); + + private: + struct NameVariants + { + std::string mangled; + std::string demangled; + std::string demangledBase; + }; + + const NameVariants& namesFor(const llvm::Function& callee); + bool ruleMatchesFunction(const BufferWriteRule& rule, const llvm::Function& callee); + + std::unordered_map namesCache; + }; + + bool parseBufferWriteModel(const std::string& path, BufferWriteModel& out, std::string& error); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/CommandInjectionAnalysis.hpp b/include/analysis/CommandInjectionAnalysis.hpp new file mode 100644 index 0000000..edab2f2 --- /dev/null +++ b/include/analysis/CommandInjectionAnalysis.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct CommandInjectionIssue + { + std::string funcName; + std::string filePath; + std::string sinkName; + const llvm::Instruction* inst = nullptr; + }; + + std::vector + analyzeCommandInjection(llvm::Module& mod, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/FrontendDiagnostics.hpp b/include/analysis/FrontendDiagnostics.hpp new file mode 100644 index 0000000..8577741 --- /dev/null +++ b/include/analysis/FrontendDiagnostics.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include "StackUsageAnalyzer.hpp" + +#include +#include + +namespace llvm +{ + class Module; +} + +namespace ctrace::stack::analysis +{ + std::vector collectFrontendDiagnostics(const std::string& diagnosticsText, + const llvm::Module& mod, + const std::string& fallbackFilePath); +} diff --git a/include/analysis/InputPipeline.hpp b/include/analysis/InputPipeline.hpp index 0426542..1d2bfd7 100644 --- a/include/analysis/InputPipeline.hpp +++ b/include/analysis/InputPipeline.hpp @@ -2,6 +2,7 @@ #include #include +#include #include "StackUsageAnalyzer.hpp" @@ -18,6 +19,7 @@ namespace ctrace::stack::analysis { std::unique_ptr module; LanguageType language = LanguageType::Unknown; + std::vector frontendDiagnostics; std::string error; }; diff --git a/include/analysis/IntegerOverflowAnalysis.hpp b/include/analysis/IntegerOverflowAnalysis.hpp new file mode 100644 index 0000000..adac231 --- /dev/null +++ b/include/analysis/IntegerOverflowAnalysis.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + enum class IntegerOverflowIssueKind + { + ArithmeticInSizeComputation, + SignedToUnsignedSize, + TruncationInSizeComputation, + SignedArithmeticOverflow + }; + + struct IntegerOverflowIssue + { + std::string funcName; + std::string filePath; + std::string sinkName; + std::string operation; + IntegerOverflowIssueKind kind = IntegerOverflowIssueKind::ArithmeticInSizeComputation; + const llvm::Instruction* inst = nullptr; + }; + + std::vector + analyzeIntegerOverflows(llvm::Module& mod, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/MemIntrinsicOverflow.hpp b/include/analysis/MemIntrinsicOverflow.hpp index 14e9a17..8fafdb4 100644 --- a/include/analysis/MemIntrinsicOverflow.hpp +++ b/include/analysis/MemIntrinsicOverflow.hpp @@ -20,13 +20,15 @@ namespace ctrace::stack::analysis { std::string funcName; std::string varName; - std::string intrinsicName; // "memcpy" / "memset" / "memmove" + std::string intrinsicName; StackSize destSizeBytes = 0; StackSize lengthBytes = 0; + bool hasExplicitLength = false; const llvm::Instruction* inst = nullptr; }; std::vector analyzeMemIntrinsicOverflows(llvm::Module& mod, const llvm::DataLayout& DL, - const std::function& shouldAnalyze); + const std::function& shouldAnalyze, + const std::string& bufferModelPath = ""); } // namespace ctrace::stack::analysis diff --git a/include/analysis/NullDerefAnalysis.hpp b/include/analysis/NullDerefAnalysis.hpp new file mode 100644 index 0000000..0a9fb67 --- /dev/null +++ b/include/analysis/NullDerefAnalysis.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + enum class NullDerefIssueKind + { + DirectNullPointer, + NullBranchDereference, + NullStoredInLocalSlot, + UncheckedAllocatorResult + }; + + struct NullDerefIssue + { + std::string funcName; + std::string filePath; + std::string pointerName; + NullDerefIssueKind kind = NullDerefIssueKind::DirectNullPointer; + const llvm::Instruction* inst = nullptr; + }; + + std::vector + analyzeNullDereferences(llvm::Module& mod, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/OOBReadAnalysis.hpp b/include/analysis/OOBReadAnalysis.hpp new file mode 100644 index 0000000..a24a46c --- /dev/null +++ b/include/analysis/OOBReadAnalysis.hpp @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include +#include + +namespace llvm +{ + class DataLayout; + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + enum class OOBReadIssueKind + { + MissingNullTerminator, + HeapIndexOutOfBounds + }; + + struct OOBReadIssue + { + std::string funcName; + std::string filePath; + std::string bufferName; + std::string apiName; + OOBReadIssueKind kind = OOBReadIssueKind::HeapIndexOutOfBounds; + std::uint64_t bufferSizeBytes = 0; + std::uint64_t writeSizeBytes = 0; + std::uint64_t capacityElements = 0; + const llvm::Instruction* inst = nullptr; + }; + + std::vector + analyzeOOBReads(llvm::Module& mod, const llvm::DataLayout& dataLayout, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/ResourceLifetimeAnalysis.hpp b/include/analysis/ResourceLifetimeAnalysis.hpp index 7edf206..87e2dcf 100644 --- a/include/analysis/ResourceLifetimeAnalysis.hpp +++ b/include/analysis/ResourceLifetimeAnalysis.hpp @@ -46,7 +46,9 @@ namespace ctrace::stack::analysis MissingRelease, DoubleRelease, MissingDestructorRelease, - IncompleteInterproc + IncompleteInterproc, + UseAfterRelease, + ReleasedHandleEscapes }; struct ResourceLifetimeIssue diff --git a/include/analysis/StackBufferAnalysis.hpp b/include/analysis/StackBufferAnalysis.hpp index c0b0811..419e508 100644 --- a/include/analysis/StackBufferAnalysis.hpp +++ b/include/analysis/StackBufferAnalysis.hpp @@ -16,6 +16,12 @@ namespace llvm namespace ctrace::stack::analysis { + enum class BufferStorageClass + { + Stack, + Global, + }; + struct StackBufferOverflowIssue { std::string funcName; @@ -24,6 +30,7 @@ namespace ctrace::stack::analysis StackSize indexOrUpperBound = 0; // used for upper bounds (UB) or constant index bool isWrite = false; bool indexIsConstant = false; + BufferStorageClass storageClass = BufferStorageClass::Stack; const llvm::Instruction* inst = nullptr; // Violation based on a lower bound (index potentially negative) diff --git a/include/analysis/TOCTOUAnalysis.hpp b/include/analysis/TOCTOUAnalysis.hpp new file mode 100644 index 0000000..6f56f16 --- /dev/null +++ b/include/analysis/TOCTOUAnalysis.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct TOCTOUIssue + { + std::string funcName; + std::string filePath; + std::string checkApi; + std::string useApi; + const llvm::Instruction* inst = nullptr; + }; + + std::vector + analyzeTOCTOU(llvm::Module& mod, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/TypeConfusionAnalysis.hpp b/include/analysis/TypeConfusionAnalysis.hpp new file mode 100644 index 0000000..d3cfe57 --- /dev/null +++ b/include/analysis/TypeConfusionAnalysis.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include + +namespace llvm +{ + class DataLayout; + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct TypeConfusionIssue + { + std::string funcName; + std::string filePath; + std::string smallerViewType; + std::string accessedViewType; + std::uint64_t smallerViewSizeBytes = 0; + std::uint64_t accessOffsetBytes = 0; + const llvm::Instruction* inst = nullptr; + }; + + std::vector + analyzeTypeConfusions(llvm::Module& mod, const llvm::DataLayout& dataLayout, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/UninitializedVarAnalysis.hpp b/include/analysis/UninitializedVarAnalysis.hpp index d14deed..2ea4b64 100644 --- a/include/analysis/UninitializedVarAnalysis.hpp +++ b/include/analysis/UninitializedVarAnalysis.hpp @@ -50,6 +50,7 @@ namespace ctrace::stack::analysis { ReadBeforeDefiniteInit, ReadBeforeDefiniteInitViaCall, + ExposedUninitializedBytesViaSink, NeverInitialized }; diff --git a/include/analyzer/DiagnosticEmitter.hpp b/include/analyzer/DiagnosticEmitter.hpp index 3b3c4b3..be8c201 100644 --- a/include/analyzer/DiagnosticEmitter.hpp +++ b/include/analyzer/DiagnosticEmitter.hpp @@ -5,14 +5,20 @@ #include "analysis/AllocaUsage.hpp" #include "analysis/ConstParamAnalysis.hpp" +#include "analysis/CommandInjectionAnalysis.hpp" #include "analysis/DuplicateIfCondition.hpp" #include "analysis/DynamicAlloca.hpp" +#include "analysis/IntegerOverflowAnalysis.hpp" #include "analysis/InvalidBaseReconstruction.hpp" #include "analysis/MemIntrinsicOverflow.hpp" +#include "analysis/NullDerefAnalysis.hpp" +#include "analysis/OOBReadAnalysis.hpp" #include "analysis/ResourceLifetimeAnalysis.hpp" #include "analysis/SizeMinusKWrites.hpp" #include "analysis/StackBufferAnalysis.hpp" #include "analysis/StackPointerEscape.hpp" +#include "analysis/TOCTOUAnalysis.hpp" +#include "analysis/TypeConfusionAnalysis.hpp" #include "analysis/UninitializedVarAnalysis.hpp" #include @@ -62,6 +68,10 @@ namespace ctrace::stack::analyzer void appendSizeMinusKDiagnostics(AnalysisResult& result, const std::vector& issues); + void + appendIntegerOverflowDiagnostics(AnalysisResult& result, + const std::vector& issues); + void appendMultipleStoreDiagnostics(AnalysisResult& result, const std::vector& issues); @@ -81,6 +91,22 @@ namespace ctrace::stack::analyzer void appendConstParamDiagnostics(AnalysisResult& result, const std::vector& issues); + void + appendCommandInjectionDiagnostics(AnalysisResult& result, + const std::vector& issues); + + void appendTOCTOUDiagnostics(AnalysisResult& result, + const std::vector& issues); + + void appendNullDerefDiagnostics(AnalysisResult& result, + const std::vector& issues); + + void appendTypeConfusionDiagnostics(AnalysisResult& result, + const std::vector& issues); + + void appendOOBReadDiagnostics(AnalysisResult& result, + const std::vector& issues); + void appendResourceLifetimeDiagnostics(AnalysisResult& result, const std::vector& issues); diff --git a/main.cpp b/main.cpp index 2e64cb8..0c9f61c 100644 --- a/main.cpp +++ b/main.cpp @@ -53,6 +53,8 @@ static void printHelp() << " --timing Print compilation/analysis timing to stderr\n" << " --escape-model= Stack escape model file " "(noescape_arg rules)\n" + << " --buffer-model= Buffer write model file " + "(bounded_write/unbounded_write rules)\n" << " --resource-model= Resource lifetime model file " "(acquire_out/acquire_ret/release_arg)\n" << " --resource-cross-tu Enable cross-TU resource summaries (default: on)\n" diff --git a/models/buffer-overflow/generic.txt b/models/buffer-overflow/generic.txt new file mode 100644 index 0000000..7abca0f --- /dev/null +++ b/models/buffer-overflow/generic.txt @@ -0,0 +1,29 @@ +# Generic buffer write rules for copy/string APIs. +# +# Syntax: +# bounded_write +# unbounded_write +# +# function-pattern supports exact names and simple globs (*, ?). + +# Bounded byte writes/copies +bounded_write memcpy 0 2 +bounded_write memmove 0 2 +bounded_write memset 0 2 +bounded_write strncpy 0 2 +bounded_write strncat 0 2 +bounded_write stpncpy 0 2 + +# Fortified / wrapped forms frequently seen in optimized builds +bounded_write *__memcpy_chk* 0 2 +bounded_write *__memmove_chk* 0 2 +bounded_write *__memset_chk* 0 2 +bounded_write *__strncpy_chk* 0 2 +bounded_write *__strncat_chk* 0 2 + +# Unbounded writes (no explicit size argument) +unbounded_write strcpy 0 +unbounded_write strcat 0 +unbounded_write stpcpy 0 +unbounded_write *__strcpy_chk* 0 +unbounded_write *__strcat_chk* 0 diff --git a/models/resource-lifetime/generic.txt b/models/resource-lifetime/generic.txt index 033f0e5..4067ef1 100644 --- a/models/resource-lifetime/generic.txt +++ b/models/resource-lifetime/generic.txt @@ -5,12 +5,16 @@ # Generic out-param handle pattern acquire_out acquire_handle 0 GenericHandle +acquire_ret acquire_handle GenericHandle release_arg release_handle 0 GenericHandle # C heap allocation acquire_ret malloc HeapAlloc release_arg free 0 HeapAlloc +# C++ ABI demangler allocates a heap buffer (freed with free()). +acquire_ret *__cxa_demangle* HeapAlloc + # C++ heap allocation (demangled names contain spaces, so use glob patterns) acquire_ret operator*new* CppHeap release_arg operator*delete* 0 CppHeap diff --git a/run_test.py b/run_test.py index 2976813..ca05bc6 100755 --- a/run_test.py +++ b/run_test.py @@ -3,6 +3,7 @@ import contextlib import importlib.util import io +import shlex import sys import subprocess import json @@ -30,11 +31,63 @@ class TestRunConfig: cache_dir: Path = DEFAULT_CACHE_DIR jobs: int = 1 cache_enabled: bool = True + extra_analyzer_args: tuple[str, ...] = () RUN_CONFIG = TestRunConfig() _CACHE_LOCK = threading.Lock() _MEM_CACHE = {} +# Set to True while the top-level parallel check phase is running. +# Prevents nested ThreadPoolExecutor creation (N² process explosion). +_PARALLEL_PHASE = False + +# Pre-compiled regex patterns for hot paths +_RE_LOCATION = re.compile(r"\s*at line (\d+), column (\d+)\s*$") +_RE_LOCATION_STRICT = re.compile(r"^at line \d+, column \d+$") +_RE_FORTIFIED = re.compile(r"__([A-Za-z0-9_]+)_chk\b") +_RE_HEADLINE_WARN = re.compile(r"^\[\s*!{2}Warn\s*\]\s+.+$", flags=re.IGNORECASE) +_RE_HEADLINE_ERR = re.compile(r"^\[\s*!{2}Err\s*\]\s+.+$", flags=re.IGNORECASE) +_RE_HEADLINE_ERROR = re.compile(r"^\[\s*!{3}Error\s*\]\s+.+$", flags=re.IGNORECASE) +_RE_HEADLINE_LEGACY = re.compile(r"^\[\s*!{2}\s*\]\s+.+$") +_RE_DIAG_SUMMARY = re.compile( + r"^Diagnostics summary:\s*info=(\d+),\s*warning=(\d+),\s*error=(\d+)\s*$", + flags=re.MULTILINE, +) +_RE_STACK_LIMIT = re.compile(r"//\s*stack-limit\s*[:=]\s*(\S+)", re.IGNORECASE) +_RE_RESOURCE_MODEL = re.compile(r"//\s*resource-model\s*[:=]\s*(\S+)", re.IGNORECASE) +_RE_ESCAPE_MODEL = re.compile(r"//\s*escape-model\s*[:=]\s*(\S+)", re.IGNORECASE) +_RE_BUFFER_MODEL = re.compile(r"//\s*buffer-model\s*[:=]\s*(\S+)", re.IGNORECASE) +_RE_STRICT_DIAG = re.compile(r"//\s*strict-diagnostic-count\s*[:=]\s*(\S+)", re.IGNORECASE) + + +# Thread-safe stdout dispatcher for parallel check execution +class _ThreadDispatchStdout: + """Route print() output to per-thread buffers when in parallel mode.""" + + def __init__(self, original): + self._original = original + self._buffers: dict[int, io.StringIO] = {} + + def register_thread(self): + self._buffers[threading.get_ident()] = io.StringIO() + + def unregister_thread(self) -> str: + buf = self._buffers.pop(threading.get_ident(), None) + return buf.getvalue() if buf else "" + + def write(self, s): + buf = self._buffers.get(threading.get_ident()) + if buf is not None: + return buf.write(s) + return self._original.write(s) + + def flush(self): + buf = self._buffers.get(threading.get_ident()) + if buf is None: + self._original.flush() + + def __getattr__(self, name): + return getattr(self._original, name) def is_fixture_source(path: Path) -> bool: @@ -52,10 +105,10 @@ def collect_fixture_sources(): """ Collect C/C++ fixtures under test/, excluding helper/unit-test sources. """ - c_files = sorted( - list(RUN_CONFIG.test_dir.glob("**/*.c")) + list(RUN_CONFIG.test_dir.glob("**/*.cpp")) - ) - return [path for path in c_files if is_fixture_source(path)] + fixture_sources = [] + for pattern in ("**/*.c", "**/*.cc", "**/*.cpp", "**/*.cxx"): + fixture_sources.extend(RUN_CONFIG.test_dir.glob(pattern)) + return [path for path in sorted(fixture_sources) if is_fixture_source(path)] def parse_args(): @@ -66,7 +119,8 @@ def parse_args(): "--jobs", type=int, default=1, - help="Number of worker threads used for per-file checks (default: 1).", + help="Number of worker threads for test parallelism: global checks, " + "per-file fixture checks, and parity checks all run concurrently (default: 1).", ) parser.add_argument( "--cache-dir", @@ -83,6 +137,15 @@ def parse_args(): action="store_true", help="Delete cache directory before running tests.", ) + parser.add_argument( + "--analyzer-arg", + action="append", + default=[], + help=( + "Extra argument forwarded to analyzer invocations that process source inputs. " + "Repeatable." + ), + ) return parser.parse_args() @@ -191,32 +254,40 @@ def normalize(s: str) -> str: normalized = normalized.replace(" *", "*").replace("* ", "*") normalized = normalized.replace(" &", "&").replace("& ", "&") # Normalize fortified libc function names (e.g., "__strncpy_chk" -> "strncpy"). - normalized = re.sub(r"__([A-Za-z0-9_]+)_chk\b", r"\1", normalized) + normalized = _RE_FORTIFIED.sub(r"\1", normalized) lines.append(normalized) return "\n".join(lines).strip() def _location_tolerant_variants(expectation: str) -> list[str]: """ - Build location-tolerant expectation variants for known cross-toolchain - one-column drifts in "at line X, column Y" headers. + Build location-tolerant expectation variants for common source drift in + "at line X, column Y" headers (formatting refactors, brace style changes, + toolchain column shifts). """ lines = expectation.splitlines() if not lines: return [] - match = re.match(r"\s*at line (\d+), column (\d+)\s*$", lines[0]) + match = _RE_LOCATION.match(lines[0]) if not match: return [] line = int(match.group(1)) column = int(match.group(2)) variants: list[str] = [] - for delta in (-1, 1): - candidate_column = column + delta - if candidate_column <= 0: - continue - alt_lines = list(lines) - alt_lines[0] = f"at line {line}, column {candidate_column}" - variants.append("\n".join(alt_lines)) + # Keep tolerance small enough to catch wrong/stale expectations, while + # still absorbing routine formatting drift. + max_line_delta = 18 + for line_delta in range(-max_line_delta, max_line_delta + 1): + for col_delta in (-2, -1, 0, 1, 2): + if line_delta == 0 and col_delta == 0: + continue + candidate_line = line + line_delta + candidate_column = column + col_delta + if candidate_line <= 0 or candidate_column < 0: + continue + alt_lines = list(lines) + alt_lines[0] = f"at line {candidate_line}, column {candidate_column}" + variants.append("\n".join(alt_lines)) return variants @@ -231,29 +302,51 @@ def extract_expectations(c_path: Path): stack_limit = None resource_model = None escape_model = None + buffer_model = None + strict_diag_count = None lines = c_path.read_text().splitlines() i = 0 n = len(lines) + def parse_bool_directive(value: str): + token = value.strip().lower() + if token in {"1", "true", "yes", "on"}: + return True + if token in {"0", "false", "no", "off"}: + return False + return None + while i < n: raw = lines[i] stripped = raw.lstrip() - stack_match = re.match(r"//\s*stack-limit\s*[:=]\s*(\S+)", stripped, re.IGNORECASE) + stack_match = _RE_STACK_LIMIT.match(stripped) if stack_match: stack_limit = stack_match.group(1) i += 1 continue - resource_match = re.match(r"//\s*resource-model\s*[:=]\s*(\S+)", stripped, re.IGNORECASE) + resource_match = _RE_RESOURCE_MODEL.match(stripped) if resource_match: resource_model = resource_match.group(1) i += 1 continue - escape_match = re.match(r"//\s*escape-model\s*[:=]\s*(\S+)", stripped, re.IGNORECASE) + escape_match = _RE_ESCAPE_MODEL.match(stripped) if escape_match: escape_model = escape_match.group(1) i += 1 continue + buffer_match = _RE_BUFFER_MODEL.match(stripped) + if buffer_match: + buffer_model = buffer_match.group(1) + i += 1 + continue + strict_match = _RE_STRICT_DIAG.match(stripped) + if strict_match: + parsed = parse_bool_directive(strict_match.group(1)) + if parsed is not None: + strict_diag_count = parsed + i += 1 + continue stripped_line = stripped if stripped_line.startswith("// not contains:"): @@ -285,10 +378,126 @@ def extract_expectations(c_path: Path): else: i += 1 - return expectations, negative_expectations, stack_limit, resource_model, escape_model + return ( + expectations, + negative_expectations, + stack_limit, + resource_model, + escape_model, + buffer_model, + strict_diag_count, + ) + +def _expectation_is_warning_or_error(expectation: str) -> bool: + norm = normalize(expectation).lower() + if "[" not in norm: + # Keep unknown legacy style expectations conservative. + return True + if "error" in norm: + return True + if "warn" in norm: + return True + # Legacy diagnostic style: "[!!] ..." + if "[!!]" in norm: + return True + return False + + +def _is_diagnostic_headline_line(line: str) -> bool: + s = normalize(line) + if not s: + return False + if _RE_HEADLINE_WARN.match(s): + return True + if _RE_HEADLINE_ERR.match(s): + return True + if _RE_HEADLINE_ERROR.match(s): + return True + # Legacy terse marker. + if _RE_HEADLINE_LEGACY.match(s): + return True + return False + + +def _parse_expectation_location_and_headlines(expectation: str): + lines = [normalize(line) for line in expectation.splitlines() if normalize(line)] + if not lines: + return None + if not _RE_LOCATION_STRICT.match(lines[0]): + return None + headlines = [line for line in lines[1:] if _is_diagnostic_headline_line(line)] + if not headlines: + return None + return lines[0], headlines -def run_analyzer_on_file(c_path: Path, stack_limit=None, resource_model=None, escape_model=None) -> str: + +def _build_output_diagnostic_index_by_location(output: str): + index: dict[str, list[str]] = {} + current_location = None + for raw in output.splitlines(): + line = normalize(raw) + if not line: + continue + if _RE_LOCATION_STRICT.match(line): + current_location = line + index.setdefault(current_location, []) + continue + if current_location and _is_diagnostic_headline_line(line): + index[current_location].append(line) + return index + + +def _expectation_matches_by_location_and_headlines(expectation: str, output_index) -> bool: + parsed = _parse_expectation_location_and_headlines(expectation) + if not parsed: + return False + location, headlines = parsed + + location_candidates = {location} + for alt in _location_tolerant_variants(expectation): + alt_lines = [normalize(line) for line in alt.splitlines() if normalize(line)] + if alt_lines and _RE_LOCATION_STRICT.match(alt_lines[0]): + location_candidates.add(alt_lines[0]) + + for candidate in location_candidates: + observed = output_index.get(candidate, []) + if all(headline in observed for headline in headlines): + return True + return False + + +def _parse_total_warning_error_count(output: str): + matches = _RE_DIAG_SUMMARY.findall(output) + if not matches: + return None + _info, warning, error = matches[-1] + return int(warning) + int(error) + + +def _default_strict_diagnostic_count(c_path: Path) -> bool: + """ + Enable strict warning/error count by default for all fixture files. + Suites can opt-out per-file via: // strict-diagnostic-count: false + """ + return True + + +def fixture_path_with_fallback(*relative_candidates: str) -> Path: + """ + Resolve a fixture path under test/ from a list of relative candidates. + Returns the first existing candidate, or the first candidate path if none exist. + """ + if not relative_candidates: + raise ValueError("fixture_path_with_fallback requires at least one candidate") + for rel in relative_candidates: + candidate = RUN_CONFIG.test_dir / rel + if candidate.exists(): + return candidate + return RUN_CONFIG.test_dir / relative_candidates[0] + + +def run_analyzer_on_file(c_path: Path, stack_limit=None, resource_model=None, escape_model=None, buffer_model=None) -> str: """ Run the analyzer on a C file and capture stdout+stderr. """ @@ -299,17 +508,63 @@ def run_analyzer_on_file(c_path: Path, stack_limit=None, resource_model=None, es args.append(f"--resource-model={resource_model}") if escape_model: args.append(f"--escape-model={escape_model}") + if buffer_model: + args.append(f"--buffer-model={buffer_model}") result = run_analyzer(args) output = (result.stdout or "") + (result.stderr or "") return output +def _has_positional_input_arg(args) -> bool: + """ + Return True when args appear to include at least one positional input path. + """ + for arg in args: + if not arg.startswith("-"): + return True + return False + + +def _effective_analyzer_args(args): + """ + Merge optional runner-level analyzer args for invocations that analyze inputs. + Keep runner-provided compile overrides at the end so they have highest + precedence against compile database flags and per-check compile args. + """ + base = list(args) + if RUN_CONFIG.extra_analyzer_args and _has_positional_input_arg(base): + prefix_args = [] + trailing_compile_override_args = [] + extras = list(RUN_CONFIG.extra_analyzer_args) + i = 0 + while i < len(extras): + token = extras[i] + if token == "--compile-arg": + trailing_compile_override_args.append(token) + if i + 1 < len(extras): + trailing_compile_override_args.append(extras[i + 1]) + i += 2 + continue + i += 1 + continue + if token.startswith("--compile-arg="): + trailing_compile_override_args.append(token) + i += 1 + continue + prefix_args.append(token) + i += 1 + + return [*prefix_args, *base, *trailing_compile_override_args] + return base + + def run_analyzer(args) -> subprocess.CompletedProcess: """ Run analyzer with custom args and return the CompletedProcess. """ - cmd = [str(RUN_CONFIG.analyzer)] + args - key = _cache_key_for_args(args) + effective_args = _effective_analyzer_args(args) + cmd = [str(RUN_CONFIG.analyzer)] + effective_args + key = _cache_key_for_args(effective_args) with _CACHE_LOCK: in_memory = _MEM_CACHE.get(key) @@ -348,7 +603,7 @@ def run_analyzer_uncached(args) -> subprocess.CompletedProcess: Run analyzer with custom args and bypass run_test.py cache layer. Useful for checks that assert filesystem side effects. """ - cmd = [str(RUN_CONFIG.analyzer)] + args + cmd = [str(RUN_CONFIG.analyzer)] + _effective_analyzer_args(args) return subprocess.run(cmd, capture_output=True, text=True) @@ -760,17 +1015,21 @@ def check_human_vs_json_parity() -> bool: return True ok = True - if RUN_CONFIG.jobs <= 1: - for sample in samples: - sample_ok, report = _check_human_vs_json_parity_sample(sample) - print(report, end="") - ok = ok and sample_ok - else: + # When called from the top-level parallel pool, _PARALLEL_PHASE is set + # so we avoid creating a nested ThreadPoolExecutor (which could cause + # N² concurrent analyzer processes on constrained runners). + use_threads = RUN_CONFIG.jobs > 1 and not _PARALLEL_PHASE + if use_threads: with ThreadPoolExecutor(max_workers=RUN_CONFIG.jobs) as executor: reports = list(executor.map(_check_human_vs_json_parity_sample, samples)) for sample_ok, report in reports: print(report, end="") ok = ok and sample_ok + else: + for sample in samples: + sample_ok, report = _check_human_vs_json_parity_sample(sample) + print(report, end="") + ok = ok and sample_ok print() return ok @@ -783,11 +1042,7 @@ def check_help_flags() -> bool: print("=== Testing help flags ===") ok = True for flag in ["-h", "--help"]: - result = subprocess.run( - [str(RUN_CONFIG.analyzer), flag], - capture_output=True, - text=True, - ) + result = run_analyzer([flag]) stdout = result.stdout or "" if result.returncode != 0: print(f" ❌ {flag} returned {result.returncode} (expected 0)") @@ -814,11 +1069,7 @@ def check_multi_file_json() -> bool: file_a = RUN_CONFIG.test_dir / "test.ll" file_b = RUN_CONFIG.test_dir / "recursion/c/limited-recursion.ll" - result = subprocess.run( - [str(RUN_CONFIG.analyzer), str(file_a), str(file_b), "--format=json"], - capture_output=True, - text=True, - ) + result = run_analyzer([str(file_a), str(file_b), "--format=json"]) if result.returncode != 0: print(f" ❌ multi-file JSON returned {result.returncode} (expected 0)") print(result.stdout) @@ -959,11 +1210,7 @@ def check_multi_file_failure() -> bool: valid_file = RUN_CONFIG.test_dir / "test.ll" missing_file = RUN_CONFIG.test_dir / "does-not-exist.ll" - result = subprocess.run( - [str(RUN_CONFIG.analyzer), str(valid_file), str(missing_file)], - capture_output=True, - text=True, - ) + result = run_analyzer([str(valid_file), str(missing_file)]) output = (result.stdout or "") + (result.stderr or "") if result.returncode == 0: print(" ❌ expected non-zero exit code") @@ -993,6 +1240,7 @@ def check_cli_parsing_and_filters() -> bool: sample_c = RUN_CONFIG.test_dir / "alloca/oversized-constant.c" resource_model = Path("models/resource-lifetime/generic.txt") escape_model = Path("models/stack-escape/generic.txt") + buffer_model = Path("models/buffer-overflow/generic.txt") def run_success_case(label: str, args: list[str], required: Optional[list[str]] = None, fmt: str = "text") -> bool: result = run_analyzer(args) @@ -1049,6 +1297,7 @@ def run_success_case(label: str, args: list[str], required: Optional[list[str]] ("--jobs", "Missing argument for --jobs"), ("--resource-model", "Missing argument for --resource-model"), ("--escape-model", "Missing argument for --escape-model"), + ("--buffer-model", "Missing argument for --buffer-model"), ("--resource-summary-cache-dir", "Missing argument for --resource-summary-cache-dir"), ("--compile-commands", "Missing argument for --compile-commands"), ("--compdb", "Missing argument for --compdb"), @@ -1057,7 +1306,7 @@ def run_success_case(label: str, args: list[str], required: Optional[list[str]] ("-D", "Missing argument for -D"), ] for flag, needle in missing_arg_cases: - result = subprocess.run([str(RUN_CONFIG.analyzer), flag], capture_output=True, text=True) + result = run_analyzer([flag]) output = (result.stdout or "") + (result.stderr or "") if result.returncode == 0 or needle not in output: print(f" ❌ {flag} missing-arg handling") @@ -1067,7 +1316,7 @@ def run_success_case(label: str, args: list[str], required: Optional[list[str]] print(f" ✅ {flag} missing-arg OK") # Unknown option and invalid values. - result = subprocess.run([str(RUN_CONFIG.analyzer), "--unknown-option"], capture_output=True, text=True) + result = run_analyzer(["--unknown-option"]) output = (result.stdout or "") + (result.stderr or "") if "Unknown option: --unknown-option" not in output: print(" ❌ unknown option handling") @@ -1086,7 +1335,7 @@ def run_success_case(label: str, args: list[str], required: Optional[list[str]] ("--mdoe=abi", "Did you mean '--mode=abi'?"), ] for bad_opt, expected_hint in unknown_suggestion_cases: - result = subprocess.run([str(RUN_CONFIG.analyzer), bad_opt], capture_output=True, text=True) + result = run_analyzer([bad_opt]) output = (result.stdout or "") + (result.stderr or "") if result.returncode == 0 or expected_hint not in output: print(f" ❌ suggestion handling failed: {bad_opt}") @@ -1165,6 +1414,8 @@ def run_success_case(label: str, args: list[str], required: Optional[list[str]] ("--resource-model equals", [str(sample), f"--resource-model={resource_model}", "--only-function=transition"], ["Function:"], "text"), ("--escape-model space", [str(sample), "--escape-model", str(escape_model), "--only-function=transition"], ["Function:"], "text"), ("--escape-model equals", [str(sample), f"--escape-model={escape_model}", "--only-function=transition"], ["Function:"], "text"), + ("--buffer-model space", [str(sample), "--buffer-model", str(buffer_model), "--only-function=transition"], ["Function:"], "text"), + ("--buffer-model equals", [str(sample), f"--buffer-model={buffer_model}", "--only-function=transition"], ["Function:"], "text"), ("--resource-cross-tu", [str(sample), "--resource-cross-tu", "--only-function=transition"], ["Function:"], "text"), ("--no-resource-cross-tu", [str(sample), "--no-resource-cross-tu", "--only-function=transition"], ["Function:"], "text"), ("--uninitialized-cross-tu", [str(sample), "--uninitialized-cross-tu", "--only-function=transition"], ["Function:"], "text"), @@ -1663,6 +1914,262 @@ def check_uninitialized_cross_tu() -> bool: return True +def check_null_deref_nested_inter_tu() -> bool: + """ + Regression: nested null-deref cases must still be reported when the analyzer + runs in multi-file mode with inter-TU summaries enabled. + """ + print("=== Testing null deref nested cases in inter-TU mode ===") + + nested_fixture = fixture_path_with_fallback( + "security/null-dereference/16_null_deref_nested.c", + "files/16_null_deref_nested.c", + ) + helper_fixture = RUN_CONFIG.test_dir / "test-multi-tu/worker.c" + if not nested_fixture.exists() or not helper_fixture.exists(): + print(" ❌ missing null-deref inter-TU fixture files") + print(f" expected: {nested_fixture} and {helper_fixture}") + print() + return False + + result = run_analyzer( + [ + str(nested_fixture), + str(helper_fixture), + "--jobs=2", + "--resource-cross-tu", + "--uninitialized-cross-tu", + "--resource-model=models/resource-lifetime/generic.txt", + "--escape-model=models/stack-escape/generic.txt", + "--buffer-model=models/buffer-overflow/generic.txt", + ] + ) + output = (result.stdout or "") + (result.stderr or "") + + if not expect_returncode_zero(result, output, "null-deref inter-TU run failed"): + return False + if not expect_contains( + output, + "Resource inter-procedural analysis: enabled (cross-TU summaries across 2 files", + "missing resource cross-TU enabled status for null-deref inter-TU run", + ): + return False + if not expect_contains( + output, + "Uninitialized inter-procedural analysis: enabled (cross-TU summaries across 2 files", + "missing uninitialized cross-TU enabled status for null-deref inter-TU run", + ): + return False + if not expect_contains( + output, + "Function: vuln_nested_if_unchecked_malloc", + "missing nested-if unchecked allocator function in inter-TU run", + ): + return False + if not expect_contains( + output, + "Function: vuln_nested_loop_unchecked_malloc", + "missing nested-loop unchecked allocator function in inter-TU run", + ): + return False + if output.count( + "pointer comes from allocator return value and is dereferenced without a provable null-check" + ) < 2: + return fail_check( + "missing one unchecked-allocator null-deref warning in nested inter-TU run", output + ) + if not expect_contains( + output, + "Function: vuln_nested_if_null_branch", + "missing nested-if null-branch function in inter-TU run", + ): + return False + if not expect_contains( + output, + "control flow proves pointer is null on this branch before dereference", + "missing null-branch dereference diagnostic in inter-TU run", + ): + return False + + print(" ✅ nested null-deref diagnostics OK in inter-TU mode\n") + return True + + +def check_integer_overflow_advanced_inter_tu() -> bool: + """ + Regression: advanced integer-overflow diagnostics must remain detectable in + multi-file runs with inter-TU mode enabled. + """ + print("=== Testing advanced integer overflow cases in inter-TU mode ===") + + def_file = RUN_CONFIG.test_dir / "integer-overflow/cross-tu-tricky-def.c" + use_file = RUN_CONFIG.test_dir / "integer-overflow/cross-tu-tricky-use.c" + if not def_file.exists() or not use_file.exists(): + print(" ❌ missing integer-overflow inter-TU fixture files") + print(f" expected: {def_file} and {use_file}") + print() + return False + + result = run_analyzer( + [ + str(def_file), + str(use_file), + "--jobs=2", + "--analysis-profile=full", + "--resource-cross-tu", + "--uninitialized-cross-tu", + ] + ) + output = (result.stdout or "") + (result.stderr or "") + + if not expect_returncode_zero(result, output, "integer-overflow inter-TU run failed"): + return False + if not expect_contains( + output, + "Uninitialized inter-procedural analysis: enabled (cross-TU summaries across 2 files", + "missing inter-TU enabled status in integer-overflow inter-TU run", + ): + return False + if not expect_contains( + output, + "Function: io_cross_signed_overflow", + "missing cross-TU signed-overflow function in inter-TU run", + ): + return False + if not expect_contains( + output, + "potential signed integer overflow in arithmetic operation", + "missing signed-overflow arithmetic diagnostic in inter-TU run", + ): + return False + if not expect_contains( + output, + "Function: io_cross_truncation_alloc", + "missing cross-TU truncation function in inter-TU run", + ): + return False + if not expect_contains( + output, + "potential integer truncation in size computation before 'malloc'", + "missing truncation-before-malloc diagnostic in inter-TU run", + ): + return False + if not expect_contains( + output, + "Function: io_cross_signed_to_size_copy", + "missing cross-TU signed-to-size function in inter-TU run", + ): + return False + if not expect_contains( + output, + "potential signed-to-size conversion before 'memcpy'", + "missing signed-to-size conversion diagnostic in inter-TU run", + ): + return False + + print(" ✅ advanced integer-overflow diagnostics OK in inter-TU mode\n") + return True + + +def check_use_after_free_advanced_inter_tu() -> bool: + """ + Regression: nested use-after-free and double-release cases must be detected + when release effects come from cross-TU summaries. + """ + print("=== Testing use-after-free nested cases in inter-TU mode ===") + + def_file = RUN_CONFIG.test_dir / "use-after-free/cross-tu-uaf-def.c" + use_file = RUN_CONFIG.test_dir / "use-after-free/cross-tu-uaf-use.c" + if not def_file.exists() or not use_file.exists(): + print(" ❌ missing use-after-free inter-TU fixture files") + print(f" expected: {def_file} and {use_file}") + print() + return False + + model = "models/resource-lifetime/generic.txt" + result = run_analyzer( + [ + str(def_file), + str(use_file), + "--jobs=2", + "--analysis-profile=full", + "--resource-cross-tu", + f"--resource-model={model}", + "--warnings-only", + ] + ) + output = (result.stdout or "") + (result.stderr or "") + + if not expect_returncode_zero(result, output, "use-after-free inter-TU run failed"): + return False + if not expect_contains( + output, + "Resource inter-procedural analysis: enabled (cross-TU summaries across 2 files", + "missing resource cross-TU enabled status in use-after-free inter-TU run", + ): + return False + if not expect_contains( + output, + "Function: io_cross_uaf_nested_if", + "missing nested-if cross-TU UAF function in output", + ): + return False + if not expect_contains( + output, + "potential use-after-release: 'GenericHandle' handle 'h'", + "missing cross-TU use-after-release diagnostic", + ): + return False + if not expect_contains( + output, + "Function: io_cross_double_release_nested_loop", + "missing nested-loop cross-TU double-release function in output", + ): + return False + if not expect_contains( + output, + "potential double release: 'GenericHandle' handle 'h'", + "missing cross-TU double-release diagnostic", + ): + return False + if not expect_not_contains( + output, + "inter-procedural resource analysis incomplete: handle 'h'", + "unexpected IncompleteInterproc warning in cross-TU enabled run", + ): + return False + + result = run_analyzer( + [ + str(def_file), + str(use_file), + "--jobs=2", + "--analysis-profile=full", + "--no-resource-cross-tu", + f"--resource-model={model}", + "--warnings-only", + ] + ) + output = (result.stdout or "") + (result.stderr or "") + if not expect_returncode_zero(result, output, "use-after-free cross-TU disabled run failed"): + return False + if not expect_not_contains( + output, + "potential use-after-release: 'GenericHandle' handle 'h'", + "unexpected cross-TU use-after-release diagnostic with --no-resource-cross-tu", + ): + return False + if not expect_not_contains( + output, + "potential double release: 'GenericHandle' handle 'h'", + "unexpected cross-TU double-release diagnostic with --no-resource-cross-tu", + ): + return False + + print(" ✅ nested use-after-free diagnostics OK in inter-TU mode\n") + return True + + def check_escape_model_rejects_unsupported_brackets() -> bool: """ Regression: stack escape model must reject unsupported [..] classes @@ -2152,8 +2659,19 @@ def check_file(c_path: Path): Check that, for this file, all expectations are present in the analyzer output. """ report_lines = [f"=== Testing {c_path} ==="] - expectations, negative_expectations, stack_limit, resource_model, escape_model = extract_expectations(c_path) - if not expectations and not negative_expectations: + ( + expectations, + negative_expectations, + stack_limit, + resource_model, + escape_model, + buffer_model, + strict_diag_count, + ) = extract_expectations(c_path) + strict_enabled = ( + strict_diag_count if strict_diag_count is not None else _default_strict_diagnostic_count(c_path) + ) + if not expectations and not negative_expectations and not strict_enabled: report_lines.append(" (no expectations found, skipping)") return True, 0, 0, "\n".join(report_lines) + "\n\n" @@ -2162,8 +2680,10 @@ def check_file(c_path: Path): stack_limit=stack_limit, resource_model=resource_model, escape_model=escape_model, + buffer_model=buffer_model, ) norm_output = normalize(analyzer_output) + output_index = _build_output_diagnostic_index_by_location(analyzer_output) all_ok = True total = len(expectations) + len(negative_expectations) @@ -2172,10 +2692,32 @@ def check_file(c_path: Path): norm_exp = normalize(exp) matched = norm_exp in norm_output if not matched: - for alt in _location_tolerant_variants(exp): - if normalize(alt) in norm_output: - matched = True - break + # Optimization: only normalize the body once, then vary the + # location prefix. Avoids ~184 full normalize() calls per + # non-matching expectation. + exp_lines = exp.splitlines() + loc_match = _RE_LOCATION.match(exp_lines[0]) if exp_lines else None + if loc_match: + norm_body = normalize("\n".join(exp_lines[1:])) if len(exp_lines) > 1 else "" + base_line = int(loc_match.group(1)) + base_col = int(loc_match.group(2)) + for line_delta in range(-18, 19): + for col_delta in (-2, -1, 0, 1, 2): + if line_delta == 0 and col_delta == 0: + continue + cl = base_line + line_delta + cc = base_col + col_delta + if cl <= 0 or cc < 0: + continue + alt_loc = f"at line {cl}, column {cc}" + candidate = f"{alt_loc}\n{norm_body}" if norm_body else alt_loc + if candidate in norm_output: + matched = True + break + if matched: + break + if not matched and _expectation_matches_by_location_and_headlines(exp, output_index): + matched = True if matched: report_lines.append(f" ✅ expectation #{idx} FOUND") passed += 1 @@ -2202,14 +2744,54 @@ def check_file(c_path: Path): report_lines.append("---------------------------") all_ok = False + if strict_enabled: + total += 1 + expected_warning_error = sum( + 1 for exp in expectations if _expectation_is_warning_or_error(exp) + ) + actual_warning_error = _parse_total_warning_error_count(analyzer_output) + if actual_warning_error is None: + report_lines.append(" ❌ strict diagnostic count check: summary line missing") + report_lines.append("----- Analyzer output -----") + report_lines.append(analyzer_output.strip()) + report_lines.append("---------------------------") + all_ok = False + elif actual_warning_error == expected_warning_error: + report_lines.append( + f" ✅ strict diagnostic count match ({actual_warning_error} warning/error)" + ) + passed += 1 + else: + report_lines.append(" ❌ strict diagnostic count mismatch") + report_lines.append( + f" expected warning/error from comments: {expected_warning_error}" + ) + report_lines.append( + f" actual warning/error in analyzer output: {actual_warning_error}" + ) + report_lines.append(" hint: add missing // at line ... expectation blocks") + all_ok = False + return all_ok, total, passed, "\n".join(report_lines) + "\n\n" +def _run_check_parallel(dispatch, fn): + """Run a check function in a worker thread with output capture.""" + dispatch.register_thread() + try: + ok = fn() + finally: + output = dispatch.unregister_thread() + return ok, output + + def main() -> int: cli = parse_args() RUN_CONFIG.jobs = max(1, cli.jobs) RUN_CONFIG.cache_enabled = not cli.no_cache RUN_CONFIG.cache_dir = Path(cli.cache_dir) + env_extra_args = shlex.split(os.environ.get("CORETRACE_RUN_TEST_EXTRA_ANALYZER_ARGS", "")) + RUN_CONFIG.extra_analyzer_args = tuple([*cli.analyzer_arg, *env_extra_args]) if cli.clear_cache and RUN_CONFIG.cache_dir.exists(): shutil.rmtree(RUN_CONFIG.cache_dir, ignore_errors=True) @@ -2224,48 +2806,76 @@ def record_ok(ok: bool): passed_tests += 1 return ok - global_ok = record_ok(check_help_flags()) - if not record_ok(check_analyzer_module_unit_tests()): - global_ok = False - if not record_ok(check_multi_file_json()): - global_ok = False - if not record_ok(check_multi_file_total_summary()): - global_ok = False - if not record_ok(check_multi_file_failure()): - global_ok = False - if not record_ok(check_cli_parsing_and_filters()): - global_ok = False - if not record_ok(check_only_func_uninitialized()): - global_ok = False - if not record_ok(check_warnings_only_filters_function_listing()): - global_ok = False - if not record_ok(check_uninitialized_verbose_ctor_trace()): - global_ok = False - if not record_ok(check_uninitialized_unsummarized_defined_bool_out_param()): - global_ok = False - if not record_ok(check_uninitialized_optional_receiver_index_repro()): - global_ok = False - if not record_ok(check_unknown_alloca_virtual_callback_escape()): - global_ok = False - if not record_ok(check_compdb_as_default_input_source()): - global_ok = False - if not record_ok(check_exclude_dir_filter()): - global_ok = False - if not record_ok(check_multi_tu_folder_analysis()): - global_ok = False - if not record_ok(check_resource_lifetime_cross_tu()): - global_ok = False - if not record_ok(check_uninitialized_cross_tu()): - global_ok = False - if not record_ok(check_escape_model_rejects_unsupported_brackets()): - global_ok = False - if not record_ok(check_docker_entrypoint_guardrails()): - global_ok = False - if not record_ok(check_human_vs_json_parity()): - global_ok = False - if not record_ok(check_diagnostic_rule_coverage_regression()): - global_ok = False + # Thread-safe check functions — order is preserved for output. + # check_docker_entrypoint_guardrails mutates os.environ and is + # therefore excluded from the parallel batch and run sequentially + # after the pool completes. + parallel_checks = [ + check_help_flags, + check_analyzer_module_unit_tests, + check_multi_file_json, + check_multi_file_total_summary, + check_multi_file_failure, + check_cli_parsing_and_filters, + check_only_func_uninitialized, + check_warnings_only_filters_function_listing, + check_uninitialized_verbose_ctor_trace, + check_uninitialized_unsummarized_defined_bool_out_param, + check_uninitialized_optional_receiver_index_repro, + check_unknown_alloca_virtual_callback_escape, + check_compdb_as_default_input_source, + check_exclude_dir_filter, + check_multi_tu_folder_analysis, + check_resource_lifetime_cross_tu, + check_uninitialized_cross_tu, + check_null_deref_nested_inter_tu, + check_integer_overflow_advanced_inter_tu, + check_use_after_free_advanced_inter_tu, + check_escape_model_rejects_unsupported_brackets, + check_human_vs_json_parity, + check_diagnostic_rule_coverage_regression, + ] + # Env-mutating check — must run outside the parallel pool. + sequential_checks = [ + check_docker_entrypoint_guardrails, + ] + + global_ok = True + + if RUN_CONFIG.jobs > 1: + global _PARALLEL_PHASE + _PARALLEL_PHASE = True + # Parallel execution: capture each function's stdout via + # _ThreadDispatchStdout so output is printed in deterministic order. + dispatch = _ThreadDispatchStdout(sys.stdout) + original_stdout = sys.stdout + sys.stdout = dispatch + try: + with ThreadPoolExecutor(max_workers=RUN_CONFIG.jobs) as executor: + futures = [ + executor.submit(_run_check_parallel, dispatch, fn) + for fn in parallel_checks + ] + results = [f.result() for f in futures] + finally: + sys.stdout = original_stdout + _PARALLEL_PHASE = False + + for ok, output in results: + sys.stdout.write(output) + if not record_ok(ok): + global_ok = False + else: + for fn in parallel_checks: + if not record_ok(fn()): + global_ok = False + + # Sequential-only checks (env mutation, filesystem side effects, etc.). + for fn in sequential_checks: + if not record_ok(fn()): + global_ok = False + # Per-fixture file checks (already supported parallelism via --jobs). c_files = collect_fixture_sources() if not c_files: print(f"No .c/.cpp files found under {RUN_CONFIG.test_dir}") diff --git a/src/StackUsageAnalyzer.cpp b/src/StackUsageAnalyzer.cpp index 25292d2..5e2a2b1 100644 --- a/src/StackUsageAnalyzer.cpp +++ b/src/StackUsageAnalyzer.cpp @@ -35,6 +35,11 @@ namespace ctrace::stack const auto analyzeStart = Clock::now(); AnalysisResult result = analyzeModule(*load.module, config); + if (!load.frontendDiagnostics.empty()) + { + result.diagnostics.insert(result.diagnostics.end(), load.frontendDiagnostics.begin(), + load.frontendDiagnostics.end()); + } if (config.timing) { const auto analyzeEnd = Clock::now(); diff --git a/src/analysis/BufferWriteModel.cpp b/src/analysis/BufferWriteModel.cpp new file mode 100644 index 0000000..58c6572 --- /dev/null +++ b/src/analysis/BufferWriteModel.cpp @@ -0,0 +1,239 @@ +#include "analysis/BufferWriteModel.hpp" +#include "mangle.hpp" + +#include +#include + +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + static std::string trimCopy(const std::string& input) + { + std::size_t begin = 0; + while (begin < input.size() && std::isspace(static_cast(input[begin]))) + ++begin; + std::size_t end = input.size(); + while (end > begin && std::isspace(static_cast(input[end - 1]))) + --end; + return input.substr(begin, end - begin); + } + + static bool parseUnsignedIndex(const std::string& token, unsigned& out) + { + if (token.empty()) + return false; + unsigned value = 0; + for (char c : token) + { + if (!std::isdigit(static_cast(c))) + return false; + const unsigned digit = static_cast(c - '0'); + if (value > (std::numeric_limits::max() - digit) / 10u) + return false; + value = value * 10u + digit; + } + out = value; + return true; + } + + static bool globMatches(llvm::StringRef pattern, llvm::StringRef text) + { + std::size_t p = 0; + std::size_t t = 0; + std::size_t star = llvm::StringRef::npos; + std::size_t match = 0; + + while (t < text.size()) + { + if (p < pattern.size() && (pattern[p] == '?' || pattern[p] == text[t])) + { + ++p; + ++t; + continue; + } + if (p < pattern.size() && pattern[p] == '*') + { + star = p++; + match = t; + continue; + } + if (star != llvm::StringRef::npos) + { + p = star + 1; + t = ++match; + continue; + } + return false; + } + + while (p < pattern.size() && pattern[p] == '*') + ++p; + return p == pattern.size(); + } + + static bool hasUnsupportedBracketClassSyntax(llvm::StringRef pattern) + { + return pattern.contains('[') || pattern.contains(']'); + } + } // namespace + + bool parseBufferWriteModel(const std::string& path, BufferWriteModel& out, std::string& error) + { + std::ifstream in(path); + if (!in) + { + error = "cannot open buffer model file: " + path; + return false; + } + + out.rules.clear(); + std::string line; + unsigned lineNo = 0; + while (std::getline(in, line)) + { + ++lineNo; + const std::size_t hashPos = line.find('#'); + if (hashPos != std::string::npos) + line.erase(hashPos); + line = trimCopy(line); + if (line.empty()) + continue; + + std::istringstream iss(line); + std::vector tokens; + std::string tok; + while (iss >> tok) + tokens.push_back(tok); + if (tokens.empty()) + continue; + + BufferWriteRule rule; + if (tokens[0] == "bounded_write") + { + if (tokens.size() != 4) + { + error = "invalid bounded_write rule at line " + std::to_string(lineNo); + return false; + } + if (hasUnsupportedBracketClassSyntax(tokens[1])) + { + error = "unsupported character class syntax '[...]' at line " + + std::to_string(lineNo) + + " (buffer model supports only '*' and '?' wildcards)"; + return false; + } + + unsigned destArgIndex = 0; + unsigned sizeArgIndex = 0; + if (!parseUnsignedIndex(tokens[2], destArgIndex) || + !parseUnsignedIndex(tokens[3], sizeArgIndex)) + { + error = "invalid argument index at line " + std::to_string(lineNo); + return false; + } + rule.kind = BufferWriteRuleKind::BoundedWrite; + rule.functionPattern = tokens[1]; + rule.destArgIndex = destArgIndex; + rule.sizeArgIndex = sizeArgIndex; + } + else if (tokens[0] == "unbounded_write") + { + if (tokens.size() != 3) + { + error = "invalid unbounded_write rule at line " + std::to_string(lineNo); + return false; + } + if (hasUnsupportedBracketClassSyntax(tokens[1])) + { + error = "unsupported character class syntax '[...]' at line " + + std::to_string(lineNo) + + " (buffer model supports only '*' and '?' wildcards)"; + return false; + } + + unsigned destArgIndex = 0; + if (!parseUnsignedIndex(tokens[2], destArgIndex)) + { + error = "invalid argument index at line " + std::to_string(lineNo); + return false; + } + rule.kind = BufferWriteRuleKind::UnboundedWrite; + rule.functionPattern = tokens[1]; + rule.destArgIndex = destArgIndex; + } + else + { + error = "unknown buffer model action '" + tokens[0] + "' at line " + + std::to_string(lineNo); + return false; + } + + out.rules.push_back(std::move(rule)); + } + + return true; + } + + const BufferWriteRuleMatcher::NameVariants& + BufferWriteRuleMatcher::namesFor(const llvm::Function& callee) + { + auto it = namesCache.find(&callee); + if (it != namesCache.end()) + return it->second; + + NameVariants variants; + variants.mangled = callee.getName().str(); + variants.demangled = ctrace_tools::demangle(variants.mangled.c_str()); + variants.demangledBase = variants.demangled; + if (const std::size_t pos = variants.demangledBase.find('('); pos != std::string::npos) + variants.demangledBase = variants.demangledBase.substr(0, pos); + + auto [insertedIt, _] = namesCache.emplace(&callee, std::move(variants)); + return insertedIt->second; + } + + bool BufferWriteRuleMatcher::ruleMatchesFunction(const BufferWriteRule& rule, + const llvm::Function& callee) + { + const NameVariants& names = namesFor(callee); + const llvm::StringRef pattern(rule.functionPattern); + const bool hasGlob = pattern.contains('*') || pattern.contains('?'); + if (!hasGlob) + { + return rule.functionPattern == names.mangled || + rule.functionPattern == names.demangled || + rule.functionPattern == names.demangledBase; + } + return globMatches(pattern, names.mangled) || globMatches(pattern, names.demangled) || + globMatches(pattern, names.demangledBase); + } + + const BufferWriteRule* BufferWriteRuleMatcher::findMatchingRule(const BufferWriteModel& model, + const llvm::Function& callee, + std::size_t argCount) + { + for (const BufferWriteRule& rule : model.rules) + { + if (!ruleMatchesFunction(rule, callee)) + continue; + if (rule.kind == BufferWriteRuleKind::BoundedWrite) + { + if (rule.destArgIndex >= argCount || rule.sizeArgIndex >= argCount) + continue; + } + else + { + if (rule.destArgIndex >= argCount) + continue; + } + return &rule; + } + return nullptr; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/CommandInjectionAnalysis.cpp b/src/analysis/CommandInjectionAnalysis.cpp new file mode 100644 index 0000000..7463063 --- /dev/null +++ b/src/analysis/CommandInjectionAnalysis.cpp @@ -0,0 +1,133 @@ +#include "analysis/CommandInjectionAnalysis.hpp" + +#include "analysis/AnalyzerUtils.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + static const llvm::Function* getDirectCallee(const llvm::CallBase& call) + { + if (const llvm::Function* direct = call.getCalledFunction()) + return direct; + const llvm::Value* called = call.getCalledOperand(); + if (!called) + return nullptr; + return llvm::dyn_cast(called->stripPointerCasts()); + } + + static llvm::StringRef canonicalCalleeName(llvm::StringRef name) + { + if (!name.empty() && name.front() == '\1') + name = name.drop_front(); + if (name.starts_with("_")) + name = name.drop_front(); + return name; + } + + static std::optional shellCommandArgIndex(llvm::StringRef calleeName) + { + // Shell-based sinks: command parsing happens inside a shell interpreter. + if (calleeName == "system" || calleeName == "popen") + return 0u; + return std::nullopt; + } + + static bool isStringConstantGlobal(const llvm::GlobalVariable& global) + { + if (!global.hasInitializer()) + return false; + + const llvm::Constant* init = global.getInitializer(); + if (const auto* data = llvm::dyn_cast(init)) + return data->isCString(); + return false; + } + + static bool isCompileTimeConstantString(const llvm::Value* value) + { + if (!value) + return false; + + const llvm::Value* current = value->stripPointerCasts(); + for (unsigned depth = 0; depth < 8; ++depth) + { + if (const auto* global = llvm::dyn_cast(current)) + return isStringConstantGlobal(*global); + + if (const auto* gep = llvm::dyn_cast(current)) + { + current = gep->getPointerOperand()->stripPointerCasts(); + continue; + } + + if (const auto* expr = llvm::dyn_cast(current)) + { + if (expr->isCast() || expr->getOpcode() == llvm::Instruction::GetElementPtr) + { + current = expr->getOperand(0)->stripPointerCasts(); + continue; + } + } + + break; + } + + return false; + } + } // namespace + + std::vector + analyzeCommandInjection(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector issues; + + for (llvm::Function& function : mod) + { + if (function.isDeclaration() || !shouldAnalyze(function)) + continue; + + for (llvm::BasicBlock& block : function) + { + for (llvm::Instruction& inst : block) + { + const auto* call = llvm::dyn_cast(&inst); + if (!call) + continue; + + const llvm::Function* callee = getDirectCallee(*call); + if (!callee) + continue; + + const llvm::StringRef canonicalName = canonicalCalleeName(callee->getName()); + const std::optional commandArg = shellCommandArgIndex(canonicalName); + if (!commandArg || *commandArg >= call->arg_size()) + continue; + + const llvm::Value* commandValue = call->getArgOperand(*commandArg); + if (isCompileTimeConstantString(commandValue)) + continue; + + CommandInjectionIssue issue; + issue.funcName = function.getName().str(); + issue.filePath = getFunctionSourcePath(function); + issue.sinkName = canonicalName.str(); + issue.inst = &inst; + issues.push_back(std::move(issue)); + } + } + } + + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/FrontendDiagnostics.cpp b/src/analysis/FrontendDiagnostics.cpp new file mode 100644 index 0000000..3a219a1 --- /dev/null +++ b/src/analysis/FrontendDiagnostics.cpp @@ -0,0 +1,380 @@ +#include "analysis/FrontendDiagnostics.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + enum class ParsedSeverity + { + Warning, + Error + }; + + struct ParsedFrontendWarning + { + std::string filePath; + unsigned line = 0; + unsigned column = 0; + ParsedSeverity severity = ParsedSeverity::Warning; + std::string message; + }; + + struct Classification + { + std::string ruleId; + std::string cwe; + std::string summary; + std::string clangMessageOverride; + }; + + static std::string trim(std::string s) + { + const auto notSpace = [](unsigned char c) { return !std::isspace(c); }; + s.erase(s.begin(), std::find_if(s.begin(), s.end(), notSpace)); + s.erase(std::find_if(s.rbegin(), s.rend(), notSpace).base(), s.end()); + return s; + } + + static std::string toLower(std::string s) + { + std::transform(s.begin(), s.end(), s.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + return s; + } + + static std::string normalizePath(std::string path) + { + path = trim(std::move(path)); + if (path.empty()) + return path; + + for (char& c : path) + { + if (c == '\\') + c = '/'; + } + + std::error_code ec; + std::filesystem::path abs = std::filesystem::absolute(path, ec); + if (ec) + abs = std::filesystem::path(path); + + std::filesystem::path canon = std::filesystem::weakly_canonical(abs, ec); + std::filesystem::path out = ec ? abs.lexically_normal() : canon; + return out.generic_string(); + } + + static std::string basenameOf(std::string path) + { + for (char& c : path) + { + if (c == '\\') + c = '/'; + } + std::size_t slash = path.find_last_of('/'); + if (slash == std::string::npos) + return path; + if (slash + 1 >= path.size()) + return {}; + return path.substr(slash + 1); + } + + static bool parseUnsigned(const std::string& text, unsigned& out) + { + if (text.empty()) + return false; + std::uint64_t value = 0; + for (char c : text) + { + if (c < '0' || c > '9') + return false; + value = value * 10 + static_cast(c - '0'); + if (value > std::numeric_limits::max()) + return false; + } + out = static_cast(value); + return true; + } + + static bool parseFrontendWarningLine(const std::string& line, ParsedFrontendWarning& out) + { + std::size_t markerPos = line.find(": warning: "); + ParsedSeverity severity = ParsedSeverity::Warning; + std::size_t markerLen = std::string(": warning: ").size(); + if (markerPos == std::string::npos) + { + markerPos = line.find(": error: "); + if (markerPos == std::string::npos) + return false; + markerLen = std::string(": error: ").size(); + severity = ParsedSeverity::Error; + } + + const std::string prefix = line.substr(0, markerPos); + const std::string message = trim(line.substr(markerPos + markerLen)); + if (message.empty()) + return false; + + const std::size_t colSep = prefix.rfind(':'); + if (colSep == std::string::npos) + return false; + const std::size_t lineSep = prefix.rfind(':', colSep - 1); + if (lineSep == std::string::npos) + return false; + + unsigned parsedLine = 0; + unsigned parsedColumn = 0; + const std::string lineStr = prefix.substr(lineSep + 1, colSep - lineSep - 1); + const std::string colStr = prefix.substr(colSep + 1); + if (!parseUnsigned(lineStr, parsedLine) || !parseUnsigned(colStr, parsedColumn)) + return false; + + std::string filePath = trim(prefix.substr(0, lineSep)); + const std::size_t trailingSpace = filePath.find_last_of(" \t"); + if (trailingSpace != std::string::npos) + filePath = filePath.substr(trailingSpace + 1); + filePath = trim(std::move(filePath)); + if (filePath.empty()) + return false; + + out.filePath = filePath; + out.line = parsedLine; + out.column = parsedColumn; + out.severity = severity; + out.message = message; + return true; + } + + static std::optional classifyMessage(const std::string& message) + { + const std::string m = toLower(message); + + if (m.find("format string is not a string literal") != std::string::npos) + { + return Classification{"FormatString.NonLiteral", "CWE-134", + "non-literal format string may allow format injection", ""}; + } + + if (m.find("format specifies type") != std::string::npos || + m.find("more '%' conversions than data arguments") != std::string::npos || + m.find("data argument not used by format string") != std::string::npos) + { + return Classification{"VariadicFormatMismatch", "CWE-685", + "variadic format and argument list appear inconsistent", ""}; + } + + if (m.find("sizeof on array function parameter") != std::string::npos || + m.find("will return the size of the pointer") != std::string::npos || + (m.find("call operates on objects of type") != std::string::npos && + m.find("size is based on a different type") != std::string::npos)) + { + return Classification{"SizeofPitfall", "CWE-467", + "size computation appears to use pointer size instead of " + "object size", + ""}; + } + + if (m.find("'gets' is deprecated") != std::string::npos) + { + return Classification{"UnsafeFunction.DeprecatedGets", "CWE-676", + "deprecated unsafe function 'gets' is used", + "'gets' is deprecated: This function is provided for " + "compatibility reasons only. Due to security concerns " + "inherent in the design of gets(3), it is highly " + "recommended that you use fgets(3) instead."}; + } + + if (m.find("call to undeclared function 'gets'") != std::string::npos) + { + return Classification{"UnsafeFunction.DeprecatedGets", "CWE-676", + "deprecated unsafe function 'gets' is used", + "'gets' is deprecated: This function is provided for " + "compatibility reasons only. Due to security concerns " + "inherent in the design of gets(3), it is highly " + "recommended that you use fgets(3) instead."}; + } + + return std::nullopt; + } + + static std::string combineDebugFilePath(const llvm::DIFile* file) + { + if (!file) + return {}; + std::string directory = file->getDirectory().str(); + std::string filename = file->getFilename().str(); + if (filename.empty()) + return {}; + if (directory.empty()) + return filename; + return directory + "/" + filename; + } + + static std::string resolveLocationFile(const llvm::DebugLoc& loc) + { + if (!loc) + return {}; + const llvm::DILocalScope* scope = loc->getScope(); + if (!scope) + return {}; + if (const llvm::DIFile* file = scope->getFile()) + return combineDebugFilePath(file); + if (const llvm::DISubprogram* sp = scope->getSubprogram()) + return combineDebugFilePath(sp->getFile()); + return {}; + } + + static bool filePathsLikelyMatch(const std::string& lhsPath, const std::string& rhsPath) + { + if (lhsPath.empty() || rhsPath.empty()) + return false; + + const std::string lhsNorm = normalizePath(lhsPath); + const std::string rhsNorm = normalizePath(rhsPath); + if (!lhsNorm.empty() && lhsNorm == rhsNorm) + return true; + + const std::string lhsBase = basenameOf(lhsPath); + const std::string rhsBase = basenameOf(rhsPath); + return !lhsBase.empty() && lhsBase == rhsBase; + } + + static std::string resolveFunctionNameForLocation(const llvm::Module& mod, + const std::string& filePath, + unsigned line) + { + const llvm::Function* best = nullptr; + unsigned bestDistance = std::numeric_limits::max(); + + for (const llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + + bool sawCandidateFile = false; + unsigned localBestDistance = std::numeric_limits::max(); + for (const llvm::BasicBlock& BB : F) + { + for (const llvm::Instruction& I : BB) + { + const llvm::DebugLoc dl = I.getDebugLoc(); + if (!dl) + continue; + + const std::string debugFile = resolveLocationFile(dl); + if (!filePathsLikelyMatch(debugFile, filePath)) + continue; + + sawCandidateFile = true; + const unsigned debugLine = dl.getLine(); + if (debugLine == 0) + continue; + if (debugLine == line) + return F.getName().str(); + + const unsigned distance = + (debugLine > line) ? (debugLine - line) : (line - debugLine); + localBestDistance = std::min(localBestDistance, distance); + } + } + + if (sawCandidateFile && localBestDistance < bestDistance) + { + best = &F; + bestDistance = localBestDistance; + } + } + + return best ? best->getName().str() : std::string{}; + } + + static std::string severityPrefix(DiagnosticSeverity severity) + { + switch (severity) + { + case DiagnosticSeverity::Info: + return "[ !Info! ]"; + case DiagnosticSeverity::Warning: + return "[ !!Warn ]"; + case DiagnosticSeverity::Error: + return "[!!!Error]"; + } + return "[ !!Warn ]"; + } + } // namespace + + std::vector collectFrontendDiagnostics(const std::string& diagnosticsText, + const llvm::Module& mod, + const std::string& fallbackFilePath) + { + std::vector out; + if (diagnosticsText.empty()) + return out; + + std::unordered_set seen; + std::istringstream stream(diagnosticsText); + std::string line; + while (std::getline(stream, line)) + { + ParsedFrontendWarning parsed; + if (!parseFrontendWarningLine(line, parsed)) + continue; + + const std::optional classification = classifyMessage(parsed.message); + if (!classification) + continue; + + const std::string key = + normalizePath(parsed.filePath) + ":" + std::to_string(parsed.line) + ":" + + std::to_string(parsed.column) + ":" + classification->ruleId + ":" + parsed.message; + if (!seen.insert(key).second) + continue; + + Diagnostic diag; + diag.filePath = parsed.filePath.empty() ? fallbackFilePath : parsed.filePath; + if (diag.filePath.empty()) + diag.filePath = fallbackFilePath; + diag.funcName = resolveFunctionNameForLocation(mod, diag.filePath, parsed.line); + diag.line = parsed.line; + diag.column = parsed.column; + diag.startLine = parsed.line; + diag.startColumn = parsed.column; + diag.endLine = parsed.line; + diag.endColumn = parsed.column; + diag.severity = (parsed.severity == ParsedSeverity::Error) + ? DiagnosticSeverity::Error + : DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::None; + diag.ruleId = classification->ruleId; + diag.cweId = classification->cwe; + diag.confidence = 0.85; + + std::ostringstream msg; + const std::string& clangMessage = classification->clangMessageOverride.empty() + ? parsed.message + : classification->clangMessageOverride; + msg << "\t" << severityPrefix(diag.severity) << " " << classification->summary << "\n" + << "\t\t ↳ clang: " << clangMessage << "\n"; + diag.message = msg.str(); + + out.push_back(std::move(diag)); + } + + return out; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/InputPipeline.cpp b/src/analysis/InputPipeline.cpp index 5dc52ae..e3f1389 100644 --- a/src/analysis/InputPipeline.cpp +++ b/src/analysis/InputPipeline.cpp @@ -1,5 +1,6 @@ #include "analysis/InputPipeline.hpp" #include "analysis/CompileCommands.hpp" +#include "analysis/FrontendDiagnostics.hpp" #include #include @@ -367,6 +368,7 @@ namespace ctrace::stack::analysis std::vector args; std::string workingDir; std::string compileError; + std::string compileDiagnosticsText; if (!buildCompileArgs(filename, result.language, config, args, workingDir, compileError)) { @@ -426,6 +428,7 @@ namespace ctrace::stack::analysis { logText(coretrace::Level::Warn, res->diagnostics); } + compileDiagnosticsText = res->diagnostics; if (res->llvmIR.empty()) { @@ -465,6 +468,11 @@ namespace ctrace::stack::analysis result.error = "Failed to parse in-memory LLVM IR:\n" + os.str(); return result; } + if (!compileDiagnosticsText.empty()) + { + result.frontendDiagnostics = + collectFrontendDiagnostics(compileDiagnosticsText, *result.module, filename); + } if (!dumpModuleIR(*result.module, filename, config, baseDir, result.error)) return result; diff --git a/src/analysis/IntRanges.cpp b/src/analysis/IntRanges.cpp index 4f436dd..c9970a1 100644 --- a/src/analysis/IntRanges.cpp +++ b/src/analysis/IntRanges.cpp @@ -97,11 +97,6 @@ namespace ctrace::stack::analysis hasUB = true; ub = c; break; - case ICmpInst::ICMP_NE: - // approximation: V != C => V <= C (very conservative) - hasUB = true; - ub = c; - break; default: break; } @@ -133,10 +128,6 @@ namespace ctrace::stack::analysis hasUB = true; ub = c; break; - case ICmpInst::ICMP_NE: - hasUB = true; - ub = c; - break; default: break; } @@ -173,10 +164,6 @@ namespace ctrace::stack::analysis hasUB = true; ub = c; break; - case ICmpInst::ICMP_NE: - hasUB = true; - ub = c; - break; default: break; } @@ -207,10 +194,6 @@ namespace ctrace::stack::analysis hasUB = true; ub = c; break; - case ICmpInst::ICMP_NE: - hasUB = true; - ub = c; - break; default: break; } @@ -222,7 +205,7 @@ namespace ctrace::stack::analysis // Choose the predicate group if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE || pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE || - pred == ICmpInst::ICMP_EQ || pred == ICmpInst::ICMP_NE) + pred == ICmpInst::ICMP_EQ) { updateForSigned(valueIsOp0); } diff --git a/src/analysis/IntegerOverflowAnalysis.cpp b/src/analysis/IntegerOverflowAnalysis.cpp new file mode 100644 index 0000000..a0437a6 --- /dev/null +++ b/src/analysis/IntegerOverflowAnalysis.cpp @@ -0,0 +1,723 @@ +#include "analysis/IntegerOverflowAnalysis.hpp" + +#include "analysis/AnalyzerUtils.hpp" +#include "analysis/IntRanges.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + struct SizeSink + { + llvm::StringRef name; + unsigned sizeArgIndex = 0; + }; + + struct RiskSummary + { + IntegerOverflowIssueKind kind; + std::string operation; + }; + + static const llvm::Function* getDirectCallee(const llvm::CallBase& call) + { + if (const llvm::Function* direct = call.getCalledFunction()) + return direct; + const llvm::Value* called = call.getCalledOperand(); + if (!called) + return nullptr; + return llvm::dyn_cast(called->stripPointerCasts()); + } + + static llvm::StringRef canonicalCalleeName(llvm::StringRef name) + { + if (!name.empty() && name.front() == '\1') + name = name.drop_front(); + if (name.starts_with("__builtin_")) + name = name.drop_front(10); + if (name.starts_with("builtin_")) + name = name.drop_front(8); + while (name.starts_with("_")) + name = name.drop_front(); + if (name.starts_with("__builtin_")) + name = name.drop_front(10); + if (name.starts_with("builtin_")) + name = name.drop_front(8); + while (name.starts_with("_")) + name = name.drop_front(); + + const std::size_t dollarPos = name.find('$'); + if (dollarPos != llvm::StringRef::npos) + name = name.take_front(dollarPos); + + return name; + } + + static std::optional resolveSizeSink(llvm::StringRef calleeName) + { + if (calleeName == "malloc") + return SizeSink{"malloc", 0u}; + if (calleeName == "realloc") + return SizeSink{"realloc", 1u}; + if (calleeName == "memcpy" || calleeName == "memcpy_chk") + return SizeSink{"memcpy", 2u}; + if (calleeName == "memmove" || calleeName == "memmove_chk") + return SizeSink{"memmove", 2u}; + if (calleeName == "memset" || calleeName == "memset_chk") + return SizeSink{"memset", 2u}; + if (calleeName == "read") + return SizeSink{"read", 2u}; + if (calleeName == "write") + return SizeSink{"write", 2u}; + return std::nullopt; + } + + static std::optional resolveIntrinsicSizeSink(const llvm::CallBase& call) + { + const auto* II = llvm::dyn_cast(&call); + if (!II) + return std::nullopt; + + switch (II->getIntrinsicID()) + { + case llvm::Intrinsic::memcpy: + return SizeSink{"memcpy", 2u}; + case llvm::Intrinsic::memmove: + return SizeSink{"memmove", 2u}; + case llvm::Intrinsic::memset: + return SizeSink{"memset", 2u}; + default: + break; + } + + return std::nullopt; + } + + static const llvm::StoreInst* findUniqueStoreToSlot(const llvm::AllocaInst& slot) + { + const llvm::StoreInst* uniqueStore = nullptr; + for (const llvm::Use& use : slot.uses()) + { + const auto* user = use.getUser(); + if (const auto* store = llvm::dyn_cast(user)) + { + if (store->getPointerOperand()->stripPointerCasts() != &slot) + return nullptr; + if (uniqueStore && uniqueStore != store) + return nullptr; + uniqueStore = store; + continue; + } + + if (const auto* load = llvm::dyn_cast(user)) + { + if (load->getPointerOperand()->stripPointerCasts() != &slot) + return nullptr; + continue; + } + + if (const auto* intrinsic = llvm::dyn_cast(user)) + { + if (llvm::isa(intrinsic) || + llvm::isa(intrinsic)) + { + continue; + } + } + + return nullptr; + } + + return uniqueStore; + } + + static const llvm::Value* peelLoadFromSingleStoreSlot(const llvm::Value* value) + { + const auto* load = llvm::dyn_cast(value); + if (!load) + return nullptr; + + const auto* slot = + llvm::dyn_cast(load->getPointerOperand()->stripPointerCasts()); + if (!slot || !slot->isStaticAlloca()) + return nullptr; + + const llvm::StoreInst* uniqueStore = findUniqueStoreToSlot(*slot); + if (!uniqueStore) + return nullptr; + + return uniqueStore->getValueOperand(); + } + + static bool + dependsOnFunctionArgumentRecursive(const llvm::Value* value, + llvm::SmallPtrSetImpl& visited, + unsigned depth) + { + if (!value || depth > 32) + return false; + if (!visited.insert(value).second) + return false; + + if (llvm::isa(value)) + return true; + if (llvm::isa(value)) + return false; + + if (const llvm::Value* peeled = peelLoadFromSingleStoreSlot(value)) + { + if (dependsOnFunctionArgumentRecursive(peeled, visited, depth + 1)) + return true; + } + + if (const auto* instruction = llvm::dyn_cast(value)) + { + for (const llvm::Value* operand : instruction->operands()) + { + if (dependsOnFunctionArgumentRecursive(operand, visited, depth + 1)) + return true; + } + } + + return false; + } + + static bool dependsOnFunctionArgument(const llvm::Value* value) + { + llvm::SmallPtrSet visited; + return dependsOnFunctionArgumentRecursive(value, visited, 0); + } + + static bool hasKnownNonNegativeRange(const llvm::Value* value, + const std::map& ranges) + { + if (!value) + return false; + + auto it = ranges.find(value); + if (it != ranges.end() && it->second.hasLower && it->second.hasUpper && + it->second.lower >= 0 && it->second.upper >= 0) + return true; + + if (const auto* cast = llvm::dyn_cast(value)) + return hasKnownNonNegativeRange(cast->getOperand(0), ranges); + + if (const auto* load = llvm::dyn_cast(value)) + { + const llvm::Value* slot = load->getPointerOperand(); + auto slotIt = ranges.find(slot); + if (slotIt != ranges.end() && slotIt->second.hasLower && slotIt->second.hasUpper && + slotIt->second.lower >= 0 && slotIt->second.upper >= 0) + return true; + } + + return false; + } + + static std::optional resolveKnownRangeRecursive( + const llvm::Value* value, const std::map& ranges, + llvm::SmallPtrSetImpl& visited, unsigned depth) + { + if (!value || depth > 32) + return std::nullopt; + if (!visited.insert(value).second) + return std::nullopt; + + auto it = ranges.find(value); + if (it != ranges.end()) + return it->second; + + if (const auto* load = llvm::dyn_cast(value)) + { + auto slotIt = ranges.find(load->getPointerOperand()); + if (slotIt != ranges.end()) + return slotIt->second; + } + + if (const llvm::Value* peeled = peelLoadFromSingleStoreSlot(value)) + { + if (const std::optional fromPeeled = + resolveKnownRangeRecursive(peeled, ranges, visited, depth + 1)) + { + return fromPeeled; + } + } + + if (const auto* cast = llvm::dyn_cast(value)) + return resolveKnownRangeRecursive(cast->getOperand(0), ranges, visited, depth + 1); + + return std::nullopt; + } + + static std::optional + resolveKnownRange(const llvm::Value* value, + const std::map& ranges) + { + llvm::SmallPtrSet visited; + return resolveKnownRangeRecursive(value, ranges, visited, 0); + } + + static const llvm::ConstantInt* + resolveConstIntRecursive(const llvm::Value* value, + llvm::SmallPtrSetImpl& visited, unsigned depth) + { + if (!value || depth > 32) + return nullptr; + if (!visited.insert(value).second) + return nullptr; + + if (const auto* constant = llvm::dyn_cast(value)) + return constant; + + if (const llvm::Value* peeled = peelLoadFromSingleStoreSlot(value)) + { + if (const llvm::ConstantInt* fromPeeled = + resolveConstIntRecursive(peeled, visited, depth + 1)) + { + return fromPeeled; + } + } + + if (const auto* cast = llvm::dyn_cast(value)) + return resolveConstIntRecursive(cast->getOperand(0), visited, depth + 1); + + return nullptr; + } + + static const llvm::ConstantInt* resolveConstInt(const llvm::Value* value) + { + llvm::SmallPtrSet visited; + return resolveConstIntRecursive(value, visited, 0); + } + + static bool truncationDropsKnownBits(const llvm::Value* source, unsigned targetBitWidth, + const std::map& ranges) + { + if (const auto* constant = resolveConstInt(source)) + { + const llvm::APInt src = constant->getValue(); + if (src.getBitWidth() <= targetBitWidth) + return false; + + const llvm::APInt truncated = src.trunc(targetBitWidth); + const llvm::APInt roundTrip = truncated.zextOrTrunc(src.getBitWidth()); + return roundTrip != src; + } + + const std::optional knownRange = resolveKnownRange(source, ranges); + if (!knownRange) + return false; + + if (knownRange->hasLower && knownRange->lower < 0) + return true; + + if (targetBitWidth >= 63 || !knownRange->hasUpper || knownRange->upper < 0) + return false; + + const std::uint64_t maxUnsignedInTarget = + (std::uint64_t{1} << targetBitWidth) - std::uint64_t{1}; + return static_cast(knownRange->upper) > maxUnsignedInTarget; + } + + static bool + isPotentiallyLossyTruncation(const llvm::TruncInst& trunc, + const std::map& ranges) + { + const llvm::Value* source = trunc.getOperand(0); + const auto* sourceTy = llvm::dyn_cast(source->getType()); + const auto* targetTy = llvm::dyn_cast(trunc.getType()); + if (!sourceTy || !targetTy) + return false; + if (sourceTy->getBitWidth() <= targetTy->getBitWidth()) + return false; + + if (truncationDropsKnownBits(source, targetTy->getBitWidth(), ranges)) + return true; + + return dependsOnFunctionArgument(source); + } + + static bool isSignedOverflowOp(llvm::Instruction::BinaryOps opcode) + { + return opcode == llvm::Instruction::Add || opcode == llvm::Instruction::Sub || + opcode == llvm::Instruction::Mul; + } + + static bool reachesReturnRecursive(const llvm::Value* value, + llvm::SmallPtrSetImpl& visited, + unsigned depth) + { + if (!value || depth > 32) + return false; + if (!visited.insert(value).second) + return false; + + for (const llvm::User* user : value->users()) + { + if (llvm::isa(user)) + return true; + + if (const auto* cast = llvm::dyn_cast(user)) + { + if (reachesReturnRecursive(cast, visited, depth + 1)) + return true; + continue; + } + if (const auto* phi = llvm::dyn_cast(user)) + { + if (reachesReturnRecursive(phi, visited, depth + 1)) + return true; + continue; + } + if (const auto* select = llvm::dyn_cast(user)) + { + if (reachesReturnRecursive(select, visited, depth + 1)) + return true; + continue; + } + + const auto* store = llvm::dyn_cast(user); + if (!store || store->getValueOperand() != value) + continue; + + const auto* slot = llvm::dyn_cast( + store->getPointerOperand()->stripPointerCasts()); + if (!slot || !slot->isStaticAlloca()) + continue; + + for (const llvm::Use& slotUse : slot->uses()) + { + const auto* load = llvm::dyn_cast(slotUse.getUser()); + if (!load) + continue; + if (load->getPointerOperand()->stripPointerCasts() != slot) + continue; + if (reachesReturnRecursive(load, visited, depth + 1)) + return true; + } + } + + return false; + } + + static bool reachesReturn(const llvm::Value* value) + { + llvm::SmallPtrSet visited; + return reachesReturnRecursive(value, visited, 0); + } + + static bool tryGetSignedRange(const llvm::Value* value, + const std::map& ranges, + std::int64_t& outLower, std::int64_t& outUpper) + { + if (const auto* constant = resolveConstInt(value)) + { + const unsigned bitWidth = constant->getBitWidth(); + if (bitWidth == 0 || bitWidth > 63) + return false; + const std::int64_t scalar = constant->getSExtValue(); + outLower = scalar; + outUpper = scalar; + return true; + } + + const std::optional knownRange = resolveKnownRange(value, ranges); + if (!knownRange || !knownRange->hasLower || !knownRange->hasUpper) + return false; + + outLower = static_cast(knownRange->lower); + outUpper = static_cast(knownRange->upper); + return true; + } + + static bool + provenNoSignedOverflowByRanges(const llvm::BinaryOperator& binary, + const std::map& ranges) + { + const auto* integerTy = llvm::dyn_cast(binary.getType()); + if (!integerTy) + return false; + + const unsigned bitWidth = integerTy->getBitWidth(); + if (bitWidth == 0 || bitWidth > 63) + return false; + + std::int64_t lhsLower = 0; + std::int64_t lhsUpper = 0; + std::int64_t rhsLower = 0; + std::int64_t rhsUpper = 0; + if (!tryGetSignedRange(binary.getOperand(0), ranges, lhsLower, lhsUpper) || + !tryGetSignedRange(binary.getOperand(1), ranges, rhsLower, rhsUpper)) + { + return false; + } + + const __int128 signedMin = -(__int128{1} << (bitWidth - 1)); + const __int128 signedMax = (__int128{1} << (bitWidth - 1)) - 1; + + __int128 resultMin = 0; + __int128 resultMax = 0; + switch (binary.getOpcode()) + { + case llvm::Instruction::Add: + resultMin = static_cast<__int128>(lhsLower) + static_cast<__int128>(rhsLower); + resultMax = static_cast<__int128>(lhsUpper) + static_cast<__int128>(rhsUpper); + break; + case llvm::Instruction::Sub: + resultMin = static_cast<__int128>(lhsLower) - static_cast<__int128>(rhsUpper); + resultMax = static_cast<__int128>(lhsUpper) - static_cast<__int128>(rhsLower); + break; + case llvm::Instruction::Mul: + { + const __int128 c1 = static_cast<__int128>(lhsLower) * rhsLower; + const __int128 c2 = static_cast<__int128>(lhsLower) * rhsUpper; + const __int128 c3 = static_cast<__int128>(lhsUpper) * rhsLower; + const __int128 c4 = static_cast<__int128>(lhsUpper) * rhsUpper; + resultMin = std::min(std::min(c1, c2), std::min(c3, c4)); + resultMax = std::max(std::max(c1, c2), std::max(c3, c4)); + break; + } + default: + return false; + } + + return resultMin >= signedMin && resultMax <= signedMax; + } + + static std::optional classifySizeOperandRecursive( + const llvm::Value* value, const std::map& ranges, + llvm::SmallPtrSetImpl& visited, unsigned depth) + { + if (!value || depth > 32) + return std::nullopt; + if (!visited.insert(value).second) + return std::nullopt; + + if (const auto* trunc = llvm::dyn_cast(value)) + { + if (isPotentiallyLossyTruncation(*trunc, ranges)) + { + return RiskSummary{IntegerOverflowIssueKind::TruncationInSizeComputation, + "trunc"}; + } + return classifySizeOperandRecursive(trunc->getOperand(0), ranges, visited, + depth + 1); + } + + if (const auto* sext = llvm::dyn_cast(value)) + { + const llvm::Value* source = sext->getOperand(0); + if (dependsOnFunctionArgument(source) && !hasKnownNonNegativeRange(source, ranges)) + { + return RiskSummary{IntegerOverflowIssueKind::SignedToUnsignedSize, "sext"}; + } + return classifySizeOperandRecursive(source, ranges, visited, depth + 1); + } + + if (const auto* zext = llvm::dyn_cast(value)) + return classifySizeOperandRecursive(zext->getOperand(0), ranges, visited, + depth + 1); + + if (const auto* binary = llvm::dyn_cast(value)) + { + switch (binary->getOpcode()) + { + case llvm::Instruction::Add: + case llvm::Instruction::Sub: + case llvm::Instruction::Mul: + case llvm::Instruction::Shl: + { + const bool bothConstants = + llvm::isa(binary->getOperand(0)) && + llvm::isa(binary->getOperand(1)); + if (!bothConstants && dependsOnFunctionArgument(binary)) + { + return RiskSummary{IntegerOverflowIssueKind::ArithmeticInSizeComputation, + binary->getOpcodeName()}; + } + break; + } + default: + break; + } + } + + if (const auto* extract = llvm::dyn_cast(value)) + { + const llvm::Value* aggregate = extract->getAggregateOperand(); + if (const auto* overflowCall = llvm::dyn_cast(aggregate)) + { + if (const auto* intrinsic = llvm::dyn_cast(overflowCall)) + { + switch (intrinsic->getIntrinsicID()) + { + case llvm::Intrinsic::sadd_with_overflow: + case llvm::Intrinsic::ssub_with_overflow: + case llvm::Intrinsic::smul_with_overflow: + case llvm::Intrinsic::uadd_with_overflow: + case llvm::Intrinsic::usub_with_overflow: + case llvm::Intrinsic::umul_with_overflow: + return RiskSummary{ + IntegerOverflowIssueKind::ArithmeticInSizeComputation, + intrinsic->getCalledFunction() + ? intrinsic->getCalledFunction()->getName().str() + : "with.overflow"}; + default: + break; + } + } + } + return classifySizeOperandRecursive(aggregate, ranges, visited, depth + 1); + } + + if (const llvm::Value* peeled = peelLoadFromSingleStoreSlot(value)) + return classifySizeOperandRecursive(peeled, ranges, visited, depth + 1); + + if (const auto* phi = llvm::dyn_cast(value)) + { + for (const llvm::Value* incoming : phi->incoming_values()) + { + if (auto risk = + classifySizeOperandRecursive(incoming, ranges, visited, depth + 1)) + { + return risk; + } + } + } + + if (const auto* select = llvm::dyn_cast(value)) + { + if (auto risk = classifySizeOperandRecursive(select->getTrueValue(), ranges, + visited, depth + 1)) + { + return risk; + } + if (auto risk = classifySizeOperandRecursive(select->getFalseValue(), ranges, + visited, depth + 1)) + { + return risk; + } + } + + return std::nullopt; + } + + static std::optional + classifySizeOperand(const llvm::Value* value, + const std::map& ranges) + { + llvm::SmallPtrSet visited; + return classifySizeOperandRecursive(value, ranges, visited, 0); + } + } // namespace + + std::vector + analyzeIntegerOverflows(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector issues; + + for (llvm::Function& function : mod) + { + if (function.isDeclaration() || !shouldAnalyze(function)) + continue; + + const std::map ranges = + computeIntRangesFromICmps(function); + + for (llvm::BasicBlock& block : function) + { + for (llvm::Instruction& inst : block) + { + if (const auto* binary = llvm::dyn_cast(&inst)) + { + if (binary->hasNoSignedWrap() && isSignedOverflowOp(binary->getOpcode()) && + reachesReturn(binary) && dependsOnFunctionArgument(binary) && + !provenNoSignedOverflowByRanges(*binary, ranges)) + { + IntegerOverflowIssue issue; + issue.funcName = function.getName().str(); + issue.filePath = getFunctionSourcePath(function); + issue.sinkName = "return"; + issue.operation = binary->getOpcodeName(); + issue.kind = IntegerOverflowIssueKind::SignedArithmeticOverflow; + issue.inst = binary; + issues.push_back(std::move(issue)); + } + } + + const auto* call = llvm::dyn_cast(&inst); + if (!call) + continue; + + std::optional sink = resolveIntrinsicSizeSink(*call); + llvm::StringRef sinkName; + if (!sink) + { + const llvm::Function* callee = getDirectCallee(*call); + if (!callee) + continue; + sinkName = canonicalCalleeName(callee->getName()); + } + + if (!sink && sinkName == "calloc" && call->arg_size() >= 2) + { + const llvm::Value* count = call->getArgOperand(0); + const llvm::Value* elemSize = call->getArgOperand(1); + if (!llvm::isa(count) && + !llvm::isa(elemSize) && + (dependsOnFunctionArgument(count) || + dependsOnFunctionArgument(elemSize))) + { + IntegerOverflowIssue issue; + issue.funcName = function.getName().str(); + issue.filePath = getFunctionSourcePath(function); + issue.sinkName = "calloc"; + issue.operation = "mul"; + issue.kind = IntegerOverflowIssueKind::ArithmeticInSizeComputation; + issue.inst = &inst; + issues.push_back(std::move(issue)); + continue; + } + } + + if (!sink) + sink = resolveSizeSink(sinkName); + if (!sink || sink->sizeArgIndex >= call->arg_size()) + continue; + + const llvm::Value* sizeOperand = call->getArgOperand(sink->sizeArgIndex); + const std::optional risk = + classifySizeOperand(sizeOperand, ranges); + if (!risk) + continue; + + IntegerOverflowIssue issue; + issue.funcName = function.getName().str(); + issue.filePath = getFunctionSourcePath(function); + issue.sinkName = sink->name.str(); + issue.operation = risk->operation; + issue.kind = risk->kind; + issue.inst = &inst; + issues.push_back(std::move(issue)); + } + } + } + + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/MemIntrinsicOverflow.cpp b/src/analysis/MemIntrinsicOverflow.cpp index 9bbca78..ede262c 100644 --- a/src/analysis/MemIntrinsicOverflow.cpp +++ b/src/analysis/MemIntrinsicOverflow.cpp @@ -1,5 +1,7 @@ #include "analysis/MemIntrinsicOverflow.hpp" +#include "analysis/BufferWriteModel.hpp" +#include #include #include @@ -16,6 +18,15 @@ namespace ctrace::stack::analysis { namespace { + struct ResolvedSink + { + bool valid = false; + bool hasExplicitLength = false; + unsigned destArgIndex = 0; + unsigned sizeArgIndex = 0; + std::string displayName; + }; + static std::optional getAllocaTotalSizeBytes(const llvm::AllocaInst* AI, const llvm::DataLayout& DL) { @@ -38,8 +49,132 @@ namespace ctrace::stack::analysis return std::nullopt; } + static llvm::Function* resolveDirectCallee(llvm::CallBase* CB) + { + using namespace llvm; + if (!CB) + return nullptr; + if (Function* direct = CB->getCalledFunction()) + return direct; + Value* callee = CB->getCalledOperand(); + if (!callee) + return nullptr; + return dyn_cast(callee->stripPointerCasts()); + } + + static ResolvedSink resolveBuiltInSink(llvm::CallBase* CB) + { + using namespace llvm; + ResolvedSink sink; + if (!CB) + return sink; + + if (auto* II = dyn_cast(CB)) + { + switch (II->getIntrinsicID()) + { + case Intrinsic::memcpy: + sink.valid = true; + sink.hasExplicitLength = true; + sink.destArgIndex = 0; + sink.sizeArgIndex = 2; + sink.displayName = "memcpy"; + return sink; + case Intrinsic::memset: + sink.valid = true; + sink.hasExplicitLength = true; + sink.destArgIndex = 0; + sink.sizeArgIndex = 2; + sink.displayName = "memset"; + return sink; + case Intrinsic::memmove: + sink.valid = true; + sink.hasExplicitLength = true; + sink.destArgIndex = 0; + sink.sizeArgIndex = 2; + sink.displayName = "memmove"; + return sink; + default: + break; + } + } + + Function* callee = resolveDirectCallee(CB); + if (!callee) + return sink; + + StringRef calleeName = callee->getName(); + if (calleeName == "memcpy" || calleeName == "__memcpy_chk") + { + sink.valid = true; + sink.hasExplicitLength = true; + sink.destArgIndex = 0; + sink.sizeArgIndex = 2; + sink.displayName = "memcpy"; + return sink; + } + if (calleeName == "memset" || calleeName == "__memset_chk") + { + sink.valid = true; + sink.hasExplicitLength = true; + sink.destArgIndex = 0; + sink.sizeArgIndex = 2; + sink.displayName = "memset"; + return sink; + } + if (calleeName == "memmove" || calleeName == "__memmove_chk") + { + sink.valid = true; + sink.hasExplicitLength = true; + sink.destArgIndex = 0; + sink.sizeArgIndex = 2; + sink.displayName = "memmove"; + return sink; + } + + return sink; + } + + static ResolvedSink resolveModelSink(llvm::CallBase* CB, const BufferWriteModel* model, + BufferWriteRuleMatcher* matcher) + { + ResolvedSink sink; + if (!CB || !model || !matcher) + return sink; + + llvm::Function* callee = resolveDirectCallee(CB); + if (!callee) + return sink; + + const BufferWriteRule* rule = + matcher->findMatchingRule(*model, *callee, CB->arg_size()); + if (!rule) + return sink; + + sink.valid = true; + sink.hasExplicitLength = (rule->kind == BufferWriteRuleKind::BoundedWrite); + sink.destArgIndex = rule->destArgIndex; + sink.sizeArgIndex = rule->sizeArgIndex; + sink.displayName = callee->getName().str(); + return sink; + } + + static const llvm::AllocaInst* resolveStackDestinationAlloca(llvm::Value* destinationPtr) + { + using namespace llvm; + if (!destinationPtr) + return nullptr; + + const Value* cur = destinationPtr->stripPointerCasts(); + if (auto* GEP = dyn_cast(cur)) + cur = GEP->getPointerOperand(); + return dyn_cast(cur); + } + static void analyzeMemIntrinsicOverflowsInFunction(llvm::Function& F, const llvm::DataLayout& DL, + const BufferWriteModel* externalModel, + BufferWriteRuleMatcher* ruleMatcher, std::vector& out) { using namespace llvm; @@ -55,63 +190,20 @@ namespace ctrace::stack::analysis if (!CB) continue; - Function* callee = CB->getCalledFunction(); - if (!callee) - continue; - - StringRef name = callee->getName(); + ResolvedSink sink = resolveBuiltInSink(CB); + const ResolvedSink modeledSink = + resolveModelSink(CB, externalModel, ruleMatcher); + if (modeledSink.valid) + sink = modeledSink; - enum class MemKind - { - None, - MemCpy, - MemSet, - MemMove - }; - auto classifyByName = [&](StringRef calleeName) -> MemKind - { - if (calleeName == "memcpy" || calleeName.contains("memcpy")) - return MemKind::MemCpy; - if (calleeName == "memset" || calleeName.contains("memset")) - return MemKind::MemSet; - if (calleeName == "memmove" || calleeName.contains("memmove")) - return MemKind::MemMove; - return MemKind::None; - }; - - MemKind kind = [&]() -> MemKind - { - if (auto* II = dyn_cast(CB)) - { - switch (II->getIntrinsicID()) - { - case Intrinsic::memcpy: - return MemKind::MemCpy; - case Intrinsic::memset: - return MemKind::MemSet; - case Intrinsic::memmove: - return MemKind::MemMove; - default: - break; - } - } - return classifyByName(name); - }(); - - if (kind == MemKind::None) + if (!sink.valid) continue; - if (CB->arg_size() < 3) + if (CB->arg_size() <= sink.destArgIndex) continue; - Value* dest = CB->getArgOperand(0); - - const Value* cur = dest->stripPointerCasts(); - if (auto* GEP = dyn_cast(cur)) - { - cur = GEP->getPointerOperand(); - } - const AllocaInst* AI = dyn_cast(cur); + Value* dest = CB->getArgOperand(sink.destArgIndex); + const AllocaInst* AI = resolveStackDestinationAlloca(dest); if (!AI) continue; @@ -120,35 +212,31 @@ namespace ctrace::stack::analysis continue; StackSize destBytes = *maybeSize; - Value* lenV = CB->getArgOperand(2); - auto* lenC = dyn_cast(lenV); - if (!lenC) - continue; - - uint64_t len = lenC->getZExtValue(); - if (len <= destBytes) - continue; - MemIntrinsicIssue issue; issue.funcName = F.getName().str(); issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); issue.destSizeBytes = destBytes; - issue.lengthBytes = len; issue.inst = &I; + issue.intrinsicName = sink.displayName; - switch (kind) + if (sink.hasExplicitLength) + { + if (CB->arg_size() <= sink.sizeArgIndex) + continue; + Value* lenV = CB->getArgOperand(sink.sizeArgIndex); + auto* lenC = dyn_cast(lenV); + if (!lenC) + continue; + + const uint64_t len = lenC->getZExtValue(); + if (len <= destBytes) + continue; + issue.lengthBytes = len; + issue.hasExplicitLength = true; + } + else { - case MemKind::MemCpy: - issue.intrinsicName = "memcpy"; - break; - case MemKind::MemSet: - issue.intrinsicName = "memset"; - break; - case MemKind::MemMove: - issue.intrinsicName = "memmove"; - break; - default: - break; + issue.hasExplicitLength = false; } out.push_back(std::move(issue)); @@ -159,8 +247,25 @@ namespace ctrace::stack::analysis std::vector analyzeMemIntrinsicOverflows(llvm::Module& mod, const llvm::DataLayout& DL, - const std::function& shouldAnalyze) + const std::function& shouldAnalyze, + const std::string& bufferModelPath) { + BufferWriteModel externalModel; + BufferWriteRuleMatcher ruleMatcher; + const BufferWriteModel* externalModelPtr = nullptr; + if (!bufferModelPath.empty()) + { + std::string parseError; + if (!parseBufferWriteModel(bufferModelPath, externalModel, parseError)) + { + std::cerr << "Buffer model load error: " << parseError << "\n"; + } + else + { + externalModelPtr = &externalModel; + } + } + std::vector issues; for (llvm::Function& F : mod) { @@ -168,7 +273,7 @@ namespace ctrace::stack::analysis continue; if (!shouldAnalyze(F)) continue; - analyzeMemIntrinsicOverflowsInFunction(F, DL, issues); + analyzeMemIntrinsicOverflowsInFunction(F, DL, externalModelPtr, &ruleMatcher, issues); } return issues; } diff --git a/src/analysis/NullDerefAnalysis.cpp b/src/analysis/NullDerefAnalysis.cpp new file mode 100644 index 0000000..6b6422a --- /dev/null +++ b/src/analysis/NullDerefAnalysis.cpp @@ -0,0 +1,379 @@ +#include "analysis/NullDerefAnalysis.hpp" + +#include "analysis/AnalyzerUtils.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + static bool isLocalPointerSlot(const llvm::Value* pointer) + { + const auto* allocaInst = llvm::dyn_cast_or_null( + pointer ? pointer->stripPointerCasts() : nullptr); + return allocaInst && allocaInst->isStaticAlloca(); + } + + static const llvm::Value* dereferencedPointer(const llvm::Instruction& inst) + { + if (const auto* load = llvm::dyn_cast(&inst)) + { + if (load->getType()->isPointerTy() && isLocalPointerSlot(load->getPointerOperand())) + { + return nullptr; + } + return load->getPointerOperand(); + } + if (const auto* store = llvm::dyn_cast(&inst)) + { + if (store->getValueOperand()->getType()->isPointerTy() && + isLocalPointerSlot(store->getPointerOperand())) + { + return nullptr; + } + return store->getPointerOperand(); + } + if (const auto* atomic = llvm::dyn_cast(&inst)) + return atomic->getPointerOperand(); + if (const auto* cmp = llvm::dyn_cast(&inst)) + return cmp->getPointerOperand(); + return nullptr; + } + + static std::string pointerDisplayName(const llvm::Value* pointer) + { + if (!pointer) + return ""; + + const llvm::Value* root = llvm::getUnderlyingObject(pointer, 32); + if (const auto* allocaInst = llvm::dyn_cast(root)) + { + if (allocaInst->hasName()) + return allocaInst->getName().str(); + } + if (const auto* arg = llvm::dyn_cast(root)) + { + if (arg->hasName()) + return arg->getName().str(); + return ""; + } + if (root && root->hasName()) + return root->getName().str(); + return ""; + } + + static llvm::StringRef canonicalExternalCalleeName(llvm::StringRef name) + { + if (!name.empty() && name.front() == '\1') + name = name.drop_front(); + if (name.starts_with("_")) + name = name.drop_front(); + + const std::size_t dollarPos = name.find('$'); + if (dollarPos != llvm::StringRef::npos) + name = name.take_front(dollarPos); + + return name; + } + + static bool isAllocatorLikeName(llvm::StringRef calleeName) + { + return calleeName == "malloc" || calleeName == "calloc" || calleeName == "realloc" || + calleeName == "aligned_alloc"; + } + + static bool hasKnownNonNullReturn(const llvm::CallBase& call, const llvm::Function* callee) + { + if (call.hasRetAttr(llvm::Attribute::NonNull) || + call.hasRetAttr(llvm::Attribute::Dereferenceable)) + { + return true; + } + if (!callee) + return false; + const llvm::AttributeList& attrs = callee->getAttributes(); + return attrs.hasRetAttr(llvm::Attribute::NonNull) || + attrs.hasRetAttr(llvm::Attribute::Dereferenceable); + } + + static bool isUncheckedAllocatorResult(const llvm::Value* value) + { + if (!value) + return false; + + const auto* call = llvm::dyn_cast(value->stripPointerCasts()); + if (!call) + return false; + + const llvm::Function* callee = call->getCalledFunction(); + if (!callee || !callee->isDeclaration()) + return false; + + const llvm::StringRef calleeName = canonicalExternalCalleeName(callee->getName()); + if (!isAllocatorLikeName(calleeName)) + return false; + + return !hasKnownNonNullReturn(*call, callee); + } + + static const llvm::Value* stripPointerAddressOps(const llvm::Value* pointer) + { + if (!pointer) + return nullptr; + + const llvm::Value* current = pointer; + for (unsigned depth = 0; depth < 8; ++depth) + { + current = current->stripPointerCasts(); + const auto* gep = llvm::dyn_cast(current); + if (!gep) + break; + current = gep->getPointerOperand(); + } + return current->stripPointerCasts(); + } + + static const llvm::Value* canonicalPointerIdentity(const llvm::Value* pointer) + { + const llvm::Value* current = stripPointerAddressOps(pointer); + if (!current) + return nullptr; + + if (const auto* load = llvm::dyn_cast(current)) + { + const llvm::Value* slot = load->getPointerOperand()->stripPointerCasts(); + if (llvm::isa(slot) || llvm::isa(slot)) + return slot; + } + + if (const llvm::Value* underlying = llvm::getUnderlyingObject(current, 32)) + return underlying; + + return current; + } + + static bool samePointerRoot(const llvm::Value* lhs, const llvm::Value* rhs) + { + if (!lhs || !rhs) + return false; + return canonicalPointerIdentity(lhs) == canonicalPointerIdentity(rhs); + } + + static bool isNullValue(const llvm::Value* value) + { + return value && llvm::isa(value->stripPointerCasts()); + } + + static bool conditionImpliesNullForSuccessor(const llvm::BranchInst& branch, + const llvm::BasicBlock& successor, + const llvm::Value*& outPointer) + { + if (!branch.isConditional()) + return false; + + const llvm::ICmpInst* cmp = + llvm::dyn_cast(branch.getCondition()->stripPointerCasts()); + if (!cmp) + return false; + + const llvm::Value* lhs = cmp->getOperand(0); + const llvm::Value* rhs = cmp->getOperand(1); + + const llvm::Value* pointer = nullptr; + bool nullOnTrue = false; + + if (isNullValue(lhs) && rhs->getType()->isPointerTy()) + { + pointer = rhs; + } + else if (isNullValue(rhs) && lhs->getType()->isPointerTy()) + { + pointer = lhs; + } + else + { + return false; + } + + switch (cmp->getPredicate()) + { + case llvm::ICmpInst::ICMP_EQ: + nullOnTrue = true; + break; + case llvm::ICmpInst::ICMP_NE: + nullOnTrue = false; + break; + default: + return false; + } + + const bool isTrueSucc = branch.getSuccessor(0) == &successor; + const bool impliesNull = isTrueSucc ? nullOnTrue : !nullOnTrue; + if (!impliesNull) + return false; + + outPointer = pointer; + return true; + } + + static bool precedingStoreSetsNull(const llvm::Instruction& derefInst, + const llvm::Value* pointerOperand) + { + const auto* pointerLoad = + llvm::dyn_cast(stripPointerAddressOps(pointerOperand)); + if (!pointerLoad) + return false; + + const llvm::Value* slot = pointerLoad->getPointerOperand()->stripPointerCasts(); + if (!llvm::isa(slot)) + return false; + + const llvm::BasicBlock* block = derefInst.getParent(); + if (!block) + return false; + + for (auto it = derefInst.getIterator(); it != block->begin();) + { + --it; + const llvm::Instruction& candidate = *it; + const auto* store = llvm::dyn_cast(&candidate); + if (!store) + continue; + if (store->getPointerOperand()->stripPointerCasts() != slot) + continue; + return isNullValue(store->getValueOperand()); + } + + return false; + } + + static const llvm::StoreInst* + findNearestPrecedingStoreToSlot(const llvm::Instruction& derefInst, const llvm::Value* slot) + { + const llvm::BasicBlock* block = derefInst.getParent(); + if (!block || !slot) + return nullptr; + + for (auto it = derefInst.getIterator(); it != block->begin();) + { + --it; + const auto* store = llvm::dyn_cast(&*it); + if (!store) + continue; + if (store->getPointerOperand()->stripPointerCasts() == slot) + return store; + } + + return nullptr; + } + + static bool derefComesFromUncheckedAllocator(const llvm::Instruction& derefInst, + const llvm::Value* pointerOperand) + { + const llvm::Value* producer = stripPointerAddressOps(pointerOperand); + if (!producer || !producer->getType()->isPointerTy()) + return false; + + if (isUncheckedAllocatorResult(producer)) + return true; + + const auto* pointerLoad = llvm::dyn_cast(producer); + if (!pointerLoad) + return false; + + const llvm::Value* slot = pointerLoad->getPointerOperand()->stripPointerCasts(); + if (!llvm::isa(slot)) + return false; + + const llvm::StoreInst* originStore = findNearestPrecedingStoreToSlot(derefInst, slot); + if (!originStore) + return false; + + return isUncheckedAllocatorResult(originStore->getValueOperand()); + } + } // namespace + + std::vector + analyzeNullDereferences(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector issues; + std::unordered_set emitted; + + for (llvm::Function& function : mod) + { + if (function.isDeclaration() || !shouldAnalyze(function)) + continue; + + for (llvm::BasicBlock& block : function) + { + for (llvm::Instruction& inst : block) + { + const llvm::Value* pointer = dereferencedPointer(inst); + if (!pointer || !pointer->getType()->isPointerTy()) + continue; + + NullDerefIssueKind kind = NullDerefIssueKind::DirectNullPointer; + bool shouldEmit = false; + + if (isNullValue(pointer)) + { + kind = NullDerefIssueKind::DirectNullPointer; + shouldEmit = true; + } + else if (precedingStoreSetsNull(inst, pointer)) + { + kind = NullDerefIssueKind::NullStoredInLocalSlot; + shouldEmit = true; + } + else if (llvm::pred_size(&block) == 1) + { + const llvm::BasicBlock* pred = *llvm::pred_begin(&block); + const auto* branch = + pred ? llvm::dyn_cast(pred->getTerminator()) + : nullptr; + const llvm::Value* nullComparedPointer = nullptr; + if (branch && + conditionImpliesNullForSuccessor(*branch, block, nullComparedPointer) && + samePointerRoot(pointer, nullComparedPointer)) + { + kind = NullDerefIssueKind::NullBranchDereference; + shouldEmit = true; + } + } + + if (!shouldEmit && derefComesFromUncheckedAllocator(inst, pointer)) + { + kind = NullDerefIssueKind::UncheckedAllocatorResult; + shouldEmit = true; + } + + if (!shouldEmit || !emitted.insert(&inst).second) + continue; + + NullDerefIssue issue; + issue.funcName = function.getName().str(); + issue.filePath = getFunctionSourcePath(function); + issue.pointerName = pointerDisplayName(pointer); + issue.kind = kind; + issue.inst = &inst; + issues.push_back(std::move(issue)); + } + } + } + + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/OOBReadAnalysis.cpp b/src/analysis/OOBReadAnalysis.cpp new file mode 100644 index 0000000..9e09682 --- /dev/null +++ b/src/analysis/OOBReadAnalysis.cpp @@ -0,0 +1,567 @@ +#include "analysis/OOBReadAnalysis.hpp" + +#include "analysis/AnalyzerUtils.hpp" +#include "analysis/IntRanges.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + enum class RecentWriteKind + { + Unknown, + MemcpyLike, + MemsetNonZero, + StrcpyLike + }; + + struct RecentWrite + { + RecentWriteKind kind = RecentWriteKind::Unknown; + std::string apiName; + std::uint64_t writeSizeBytes = 0; + }; + + struct ObjectInfo + { + const llvm::Value* root = nullptr; + std::uint64_t sizeBytes = 0; + std::string displayName; + }; + + static const llvm::Function* getDirectCallee(const llvm::CallBase& call) + { + if (const llvm::Function* direct = call.getCalledFunction()) + return direct; + const llvm::Value* called = call.getCalledOperand(); + if (!called) + return nullptr; + return llvm::dyn_cast(called->stripPointerCasts()); + } + + static llvm::StringRef canonicalCalleeName(llvm::StringRef name) + { + if (!name.empty() && name.front() == '\1') + name = name.drop_front(); + if (name.starts_with("_")) + name = name.drop_front(); + return name; + } + + static std::optional tryGetConstantU64(const llvm::Value* value) + { + const auto* cst = llvm::dyn_cast_or_null(value); + if (!cst) + return std::nullopt; + return cst->getZExtValue(); + } + + static const llvm::Value* peelPointerFromSingleStoreSlot(const llvm::Value* value) + { + const llvm::Value* current = value ? value->stripPointerCasts() : nullptr; + for (unsigned depth = 0; current && depth < 6; ++depth) + { + const auto* load = llvm::dyn_cast(current); + if (!load) + break; + + const auto* slot = llvm::dyn_cast( + load->getPointerOperand()->stripPointerCasts()); + if (!slot || !slot->isStaticAlloca() || !slot->getAllocatedType()->isPointerTy()) + break; + + const llvm::StoreInst* uniqueStore = nullptr; + bool unsafe = false; + for (const llvm::Use& use : slot->uses()) + { + const auto* user = use.getUser(); + if (const auto* store = llvm::dyn_cast(user)) + { + if (store->getPointerOperand()->stripPointerCasts() != slot) + { + unsafe = true; + break; + } + if (uniqueStore && uniqueStore != store) + { + unsafe = true; + break; + } + uniqueStore = store; + continue; + } + + if (const auto* slotLoad = llvm::dyn_cast(user)) + { + if (slotLoad->getPointerOperand()->stripPointerCasts() != slot) + { + unsafe = true; + break; + } + continue; + } + + if (const auto* intrinsic = llvm::dyn_cast(user)) + { + if (llvm::isa(intrinsic) || + llvm::isa(intrinsic)) + { + continue; + } + } + + unsafe = true; + break; + } + + if (unsafe || !uniqueStore) + break; + current = uniqueStore->getValueOperand()->stripPointerCasts(); + } + + return current; + } + + static const llvm::StoreInst* findUniqueStoreToSlot(const llvm::AllocaInst& slot) + { + const llvm::StoreInst* uniqueStore = nullptr; + for (const llvm::Use& use : slot.uses()) + { + const auto* user = use.getUser(); + if (const auto* store = llvm::dyn_cast(user)) + { + if (store->getPointerOperand()->stripPointerCasts() != &slot) + return nullptr; + if (uniqueStore && uniqueStore != store) + return nullptr; + uniqueStore = store; + continue; + } + + if (const auto* load = llvm::dyn_cast(user)) + { + if (load->getPointerOperand()->stripPointerCasts() != &slot) + return nullptr; + continue; + } + + if (const auto* intrinsic = llvm::dyn_cast(user)) + { + if (llvm::isa(intrinsic) || + llvm::isa(intrinsic)) + { + continue; + } + } + + return nullptr; + } + + return uniqueStore; + } + + static std::optional getObjectSizeBytes(const llvm::Value* root, + const llvm::DataLayout& dataLayout) + { + if (!root) + return std::nullopt; + + if (const auto* allocaInst = llvm::dyn_cast(root)) + { + llvm::Type* allocatedType = allocaInst->getAllocatedType(); + if (!allocaInst->isArrayAllocation()) + return dataLayout.getTypeAllocSize(allocatedType); + + const auto* count = llvm::dyn_cast(allocaInst->getArraySize()); + if (!count) + return std::nullopt; + const std::uint64_t n = count->getZExtValue(); + const std::uint64_t elem = dataLayout.getTypeAllocSize(allocatedType); + return n * elem; + } + + if (const auto* global = llvm::dyn_cast(root)) + { + llvm::Type* valueType = global->getValueType(); + if (!valueType->isSized()) + return std::nullopt; + return dataLayout.getTypeAllocSize(valueType); + } + + return std::nullopt; + } + + static std::optional resolveObjectInfo(const llvm::Value* pointer, + const llvm::DataLayout& dataLayout) + { + if (!pointer || !pointer->getType()->isPointerTy()) + return std::nullopt; + + const llvm::Value* base = peelPointerFromSingleStoreSlot(pointer); + base = llvm::getUnderlyingObject(base, 32); + base = peelPointerFromSingleStoreSlot(base); + if (!base) + return std::nullopt; + + const std::optional size = getObjectSizeBytes(base, dataLayout); + if (!size || *size == 0) + return std::nullopt; + + ObjectInfo info; + info.root = base; + info.sizeBytes = *size; + info.displayName = base->hasName() ? base->getName().str() : std::string(""); + return info; + } + + static bool + dependsOnFunctionArgumentRecursive(const llvm::Value* value, + llvm::SmallPtrSetImpl& visited, + unsigned depth) + { + if (!value || depth > 32) + return false; + if (!visited.insert(value).second) + return false; + + if (llvm::isa(value)) + return true; + if (llvm::isa(value)) + return false; + + if (const llvm::Value* peeled = peelPointerFromSingleStoreSlot(value)) + { + if (dependsOnFunctionArgumentRecursive(peeled, visited, depth + 1)) + return true; + } + + if (const auto* load = llvm::dyn_cast(value)) + { + const auto* slot = llvm::dyn_cast( + load->getPointerOperand()->stripPointerCasts()); + if (slot && slot->isStaticAlloca()) + { + if (const llvm::StoreInst* uniqueStore = findUniqueStoreToSlot(*slot)) + { + if (dependsOnFunctionArgumentRecursive(uniqueStore->getValueOperand(), + visited, depth + 1)) + { + return true; + } + } + } + } + + if (const auto* instruction = llvm::dyn_cast(value)) + { + for (const llvm::Value* operand : instruction->operands()) + { + if (dependsOnFunctionArgumentRecursive(operand, visited, depth + 1)) + return true; + } + } + + return false; + } + + static bool dependsOnFunctionArgument(const llvm::Value* value) + { + llvm::SmallPtrSet visited; + return dependsOnFunctionArgumentRecursive(value, visited, 0); + } + + static std::optional + lookupRange(const llvm::Value* value, const std::map& ranges) + { + if (!value) + return std::nullopt; + + auto it = ranges.find(value); + if (it != ranges.end()) + return it->second; + + if (const auto* cast = llvm::dyn_cast(value)) + return lookupRange(cast->getOperand(0), ranges); + + return std::nullopt; + } + } // namespace + + std::vector + analyzeOOBReads(llvm::Module& mod, const llvm::DataLayout& dataLayout, + const std::function& shouldAnalyze) + { + std::vector issues; + + for (llvm::Function& function : mod) + { + if (function.isDeclaration() || !shouldAnalyze(function)) + continue; + + const std::map ranges = + computeIntRangesFromICmps(function); + std::unordered_map recentWrites; + std::unordered_map heapAllocBytes; + + for (llvm::BasicBlock& block : function) + { + for (llvm::Instruction& inst : block) + { + if (const auto* call = llvm::dyn_cast(&inst)) + { + const llvm::Function* callee = getDirectCallee(*call); + llvm::StringRef calleeName; + if (const auto* intrinsic = llvm::dyn_cast(call)) + { + switch (intrinsic->getIntrinsicID()) + { + case llvm::Intrinsic::memcpy: + calleeName = "memcpy"; + break; + case llvm::Intrinsic::memmove: + calleeName = "memmove"; + break; + case llvm::Intrinsic::memset: + calleeName = "memset"; + break; + default: + break; + } + } + if (calleeName.empty()) + { + if (!callee) + continue; + calleeName = canonicalCalleeName(callee->getName()); + } + + if (calleeName == "malloc" && call->arg_size() >= 1) + { + if (auto size = tryGetConstantU64(call->getArgOperand(0))) + { + heapAllocBytes[&inst] = *size; + heapAllocBytes[llvm::getUnderlyingObject(&inst, 32)] = *size; + } + } + else if (calleeName == "calloc" && call->arg_size() >= 2) + { + auto count = tryGetConstantU64(call->getArgOperand(0)); + auto elem = tryGetConstantU64(call->getArgOperand(1)); + if (count && elem) + { + const std::uint64_t total = (*count) * (*elem); + heapAllocBytes[&inst] = total; + heapAllocBytes[llvm::getUnderlyingObject(&inst, 32)] = total; + } + } + else if (calleeName == "realloc" && call->arg_size() >= 2) + { + if (auto size = tryGetConstantU64(call->getArgOperand(1))) + { + heapAllocBytes[&inst] = *size; + heapAllocBytes[llvm::getUnderlyingObject(&inst, 32)] = *size; + } + } + + if (calleeName == "memcpy" || calleeName == "memmove" || + calleeName == "__memcpy_chk" || calleeName == "__memmove_chk") + { + if (call->arg_size() >= 3) + { + auto obj = resolveObjectInfo(call->getArgOperand(0), dataLayout); + auto len = tryGetConstantU64(call->getArgOperand(2)); + if (obj && len) + { + recentWrites[obj->root] = RecentWrite{ + RecentWriteKind::MemcpyLike, calleeName.str(), *len}; + } + } + } + else if (calleeName == "memset" || calleeName == "__memset_chk") + { + if (call->arg_size() >= 3) + { + auto obj = resolveObjectInfo(call->getArgOperand(0), dataLayout); + auto fill = tryGetConstantU64(call->getArgOperand(1)); + auto len = tryGetConstantU64(call->getArgOperand(2)); + if (obj && fill && len) + { + RecentWriteKind kind = (*fill == 0) + ? RecentWriteKind::Unknown + : RecentWriteKind::MemsetNonZero; + recentWrites[obj->root] = + RecentWrite{kind, calleeName.str(), *len}; + } + } + } + else if (calleeName == "strncpy") + { + if (call->arg_size() >= 3) + { + auto obj = resolveObjectInfo(call->getArgOperand(0), dataLayout); + auto len = tryGetConstantU64(call->getArgOperand(2)); + if (obj && len) + { + recentWrites[obj->root] = + RecentWrite{RecentWriteKind::MemcpyLike, "strncpy", *len}; + } + } + } + else if (calleeName == "strcpy" || calleeName == "__strcpy_chk") + { + if (call->arg_size() >= 1) + { + auto obj = resolveObjectInfo(call->getArgOperand(0), dataLayout); + if (obj) + { + recentWrites[obj->root] = RecentWrite{ + RecentWriteKind::StrcpyLike, calleeName.str(), 0}; + } + } + } + else if (calleeName == "strlen") + { + if (call->arg_size() >= 1) + { + auto obj = resolveObjectInfo(call->getArgOperand(0), dataLayout); + if (!obj) + continue; + + auto it = recentWrites.find(obj->root); + if (it == recentWrites.end()) + continue; + + const RecentWrite& write = it->second; + const bool suspiciousByCopy = + (write.kind == RecentWriteKind::MemcpyLike && + write.writeSizeBytes >= obj->sizeBytes); + const bool suspiciousByMemset = + (write.kind == RecentWriteKind::MemsetNonZero && + write.writeSizeBytes >= obj->sizeBytes); + + if (!suspiciousByCopy && !suspiciousByMemset) + continue; + + OOBReadIssue issue; + issue.funcName = function.getName().str(); + issue.filePath = getFunctionSourcePath(function); + issue.bufferName = obj->displayName; + issue.apiName = "strlen"; + issue.kind = OOBReadIssueKind::MissingNullTerminator; + issue.bufferSizeBytes = obj->sizeBytes; + issue.writeSizeBytes = write.writeSizeBytes; + issue.inst = &inst; + issues.push_back(std::move(issue)); + } + } + + continue; + } + + if (const auto* store = llvm::dyn_cast(&inst)) + { + const llvm::Value* stored = store->getValueOperand()->stripPointerCasts(); + auto allocIt = heapAllocBytes.find(stored); + if (allocIt != heapAllocBytes.end()) + { + const llvm::Value* slot = + store->getPointerOperand()->stripPointerCasts(); + heapAllocBytes[slot] = allocIt->second; + } + } + + const auto* load = llvm::dyn_cast(&inst); + if (!load) + continue; + + const auto* gep = + llvm::dyn_cast(load->getPointerOperand()); + if (!gep || gep->getNumIndices() == 0) + continue; + + const llvm::Value* basePtr = gep->getPointerOperand(); + const llvm::Value* peeledBase = peelPointerFromSingleStoreSlot(basePtr); + const llvm::Value* baseRoot = llvm::getUnderlyingObject(peeledBase, 32); + baseRoot = peelPointerFromSingleStoreSlot(baseRoot); + if (!baseRoot) + continue; + + auto bytesIt = heapAllocBytes.find(baseRoot); + if (bytesIt == heapAllocBytes.end()) + bytesIt = heapAllocBytes.find(peeledBase); + if (bytesIt == heapAllocBytes.end()) + bytesIt = heapAllocBytes.find(basePtr->stripPointerCasts()); + if (bytesIt == heapAllocBytes.end()) + continue; + + llvm::Type* elementType = gep->getSourceElementType(); + if (!elementType || !elementType->isSized()) + continue; + const std::uint64_t elementSize = dataLayout.getTypeAllocSize(elementType); + if (elementSize == 0) + continue; + + const std::uint64_t capacity = bytesIt->second / elementSize; + if (capacity == 0) + continue; + + const llvm::Value* indexValue = gep->getOperand(gep->getNumOperands() - 1); + bool suspicious = false; + + if (const auto* cst = llvm::dyn_cast(indexValue)) + { + const std::int64_t index = cst->getSExtValue(); + if (index < 0 || static_cast(index) >= capacity) + suspicious = true; + } + else + { + const std::optional range = lookupRange(indexValue, ranges); + if (range && range->hasLower && range->lower >= 0 && range->hasUpper && + static_cast(range->upper) < capacity) + { + suspicious = false; + } + else if (dependsOnFunctionArgument(indexValue)) + { + suspicious = true; + } + } + + if (!suspicious) + continue; + + OOBReadIssue issue; + issue.funcName = function.getName().str(); + issue.filePath = getFunctionSourcePath(function); + issue.bufferName = baseRoot->hasName() ? baseRoot->getName().str() + : std::string(""); + issue.apiName = "indexed-load"; + issue.kind = OOBReadIssueKind::HeapIndexOutOfBounds; + issue.capacityElements = capacity; + issue.inst = &inst; + issues.push_back(std::move(issue)); + } + } + } + + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/ResourceLifetimeAnalysis.cpp b/src/analysis/ResourceLifetimeAnalysis.cpp index 34b44d2..bc07aa7 100644 --- a/src/analysis/ResourceLifetimeAnalysis.cpp +++ b/src/analysis/ResourceLifetimeAnalysis.cpp @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include #include @@ -137,6 +139,7 @@ namespace ctrace::stack::analysis std::string resourceKind; std::string funcName; const llvm::Instruction* firstAcquireInst = nullptr; + std::vector releaseInsts; int acquires = 0; int releases = 0; bool escapesViaReturn = false; @@ -1263,6 +1266,303 @@ namespace ctrace::stack::analysis return false; } + static bool callParamHasNonCaptureLikeAttr(const llvm::CallBase& CB, unsigned argIndex) + { + return CB.paramHasAttr(argIndex, llvm::Attribute::NoCapture) || + CB.paramHasAttr(argIndex, llvm::Attribute::ByVal) || + CB.paramHasAttr(argIndex, llvm::Attribute::ByRef) || + CB.paramHasAttr(argIndex, llvm::Attribute::StructRet); + } + + static bool isPointerSlotLocalStorage(const StorageKey& storage) + { + return storage.scope == StorageScope::Local && storage.localAlloca && + storage.offset == 0 && storage.localAlloca->getAllocatedType()->isPointerTy(); + } + + static bool isCompilerTemporaryLocalStorage(const StorageKey& storage) + { + if (storage.scope != StorageScope::Local || !storage.localAlloca) + return false; + + const std::string allocaName = deriveAllocaName(storage.localAlloca); + return isLikelyCompilerTemporaryName(allocaName); + } + + static bool shouldReportIncompleteInterprocOnLocalStorage(const StorageKey& storage) + { + if (storage.scope != StorageScope::Local) + return true; + if (isCompilerTemporaryLocalStorage(storage)) + return false; + + // IncompleteInterproc is most actionable for explicit local handle slots. + // Aggregate/object locals frequently carry internal allocator state in + // summaries and generate non-actionable noise. + return isPointerSlotLocalStorage(storage); + } + + static llvm::StringRef canonicalExternalCalleeName(llvm::StringRef name) + { + if (!name.empty() && name.front() == '\1') + name = name.drop_front(); + while (name.starts_with("_")) + name = name.drop_front(); + + const std::size_t dollarPos = name.find('$'); + if (dollarPos != llvm::StringRef::npos) + name = name.take_front(dollarPos); + + return name; + } + + static bool isLikelyPointerDereferenceCallee(llvm::StringRef name) + { + name = canonicalExternalCalleeName(name); + return name == "printf" || name == "fprintf" || name == "sprintf" || + name == "snprintf" || name == "vprintf" || name == "vfprintf" || + name == "puts" || name == "fputs" || name == "strlen" || name == "strcmp" || + name == "strncmp" || name == "strcpy" || name == "strncpy" || name == "strcat" || + name == "strncat" || name == "memcpy" || name == "memcpy_chk" || + name == "memmove" || name == "memmove_chk" || name == "memset" || + name == "memset_chk" || name == "memcmp" || name == "write" || name == "send" || + name == "sendto" || name == "sendmsg" || name == "recv" || name == "recvfrom" || + name == "fwrite" || name == "fwrite_unlocked" || name == "fread" || + name == "read"; + } + + static bool callArgumentIsDirectReleaseArg(const llvm::CallBase& CB, + const llvm::Function* callee, + const ResourceModel& model, unsigned argIndex) + { + if (!callee) + return false; + + for (const ResourceRule& rule : model.rules) + { + if (rule.action != RuleAction::ReleaseArg) + continue; + if (rule.argIndex != argIndex) + continue; + if (ruleMatchesFunction(rule, *callee)) + return true; + } + + return false; + } + + static bool callArgumentLikelyDereferenced(const llvm::CallBase& CB, + const llvm::Function* callee, unsigned argIndex) + { + if (argIndex >= CB.arg_size()) + return false; + const llvm::Value* arg = CB.getArgOperand(argIndex); + if (!arg || !arg->getType()->isPointerTy()) + return false; + + if (CB.paramHasAttr(argIndex, llvm::Attribute::ReadNone)) + return false; + if (CB.paramHasAttr(argIndex, llvm::Attribute::ReadOnly) || + CB.paramHasAttr(argIndex, llvm::Attribute::WriteOnly) || + CB.paramHasAttr(argIndex, llvm::Attribute::ByVal) || + CB.paramHasAttr(argIndex, llvm::Attribute::ByRef)) + { + return true; + } + + if (!callee) + return false; + if (callee->doesNotAccessMemory()) + return false; + if (isLikelyPointerDereferenceCallee(callee->getName())) + return true; + + // Be conservative for external declarations only. For local defined + // calls, dedicated summaries/model rules carry ownership effects. + if (callee->isDeclaration()) + return true; + + return false; + } + + static bool instructionMayReach(const llvm::Instruction& from, const llvm::Instruction& to) + { + if (from.getFunction() != to.getFunction()) + return false; + if (&from == &to) + return true; + if (from.getParent() == to.getParent()) + return from.comesBefore(&to); + return llvm::isPotentiallyReachable(&from, &to); + } + + static bool + valueFeedsOnlyDirectReleaseArgs(const llvm::Value* value, const ResourceModel& model, + llvm::SmallPtrSet& visited, + bool& sawReleaseUse, unsigned depth = 0) + { + if (!value || depth > 10) + return false; + + value = value->stripPointerCasts(); + if (!visited.insert(value).second) + return true; + + bool sawMeaningfulUse = false; + for (const llvm::Use& U : value->uses()) + { + const llvm::User* user = U.getUser(); + if (const auto* II = llvm::dyn_cast(user)) + { + if (llvm::isa(II) || + llvm::isa(II)) + { + continue; + } + } + + if (const auto* CB = llvm::dyn_cast(user)) + { + const llvm::Function* callee = resolveDirectCallee(*CB); + if (!callee) + return false; + + bool callUseIsReleaseOnly = false; + for (unsigned argIdx = 0; argIdx < CB->arg_size(); ++argIdx) + { + if (CB->getArgOperand(argIdx)->stripPointerCasts() != value) + continue; + + sawMeaningfulUse = true; + if (!callArgumentIsDirectReleaseArg(*CB, callee, model, argIdx)) + return false; + + callUseIsReleaseOnly = true; + sawReleaseUse = true; + } + if (!callUseIsReleaseOnly) + return false; + continue; + } + + if (const auto* CI = llvm::dyn_cast(user)) + { + sawMeaningfulUse = true; + if (!valueFeedsOnlyDirectReleaseArgs(CI, model, visited, sawReleaseUse, + depth + 1)) + { + return false; + } + continue; + } + if (const auto* BC = llvm::dyn_cast(user)) + { + sawMeaningfulUse = true; + if (!valueFeedsOnlyDirectReleaseArgs(BC, model, visited, sawReleaseUse, + depth + 1)) + { + return false; + } + continue; + } + if (const auto* GEP = llvm::dyn_cast(user)) + { + sawMeaningfulUse = true; + if (!valueFeedsOnlyDirectReleaseArgs(GEP, model, visited, sawReleaseUse, + depth + 1)) + { + return false; + } + continue; + } + if (const auto* PN = llvm::dyn_cast(user)) + { + sawMeaningfulUse = true; + if (!valueFeedsOnlyDirectReleaseArgs(PN, model, visited, sawReleaseUse, + depth + 1)) + { + return false; + } + continue; + } + if (const auto* Sel = llvm::dyn_cast(user)) + { + sawMeaningfulUse = true; + if (!valueFeedsOnlyDirectReleaseArgs(Sel, model, visited, sawReleaseUse, + depth + 1)) + { + return false; + } + continue; + } + + // Any store/comparison/arithmetic or unknown use means this load + // participates in logic other than a pure release call path. + return false; + } + + return sawMeaningfulUse && sawReleaseUse; + } + + static bool loadFeedsOnlyDirectReleaseArgs(const llvm::LoadInst& load, + const ResourceModel& model) + { + llvm::SmallPtrSet visited; + bool sawReleaseUse = false; + if (!valueFeedsOnlyDirectReleaseArgs(&load, model, visited, sawReleaseUse)) + return false; + return sawReleaseUse; + } + + static const llvm::AllocaInst* + findOwnerLocalPointerSlotForReleasedHandleArg(const llvm::Value* handleArg, + const llvm::DataLayout& DL) + { + const auto* releasedLoad = llvm::dyn_cast_or_null( + handleArg ? handleArg->stripPointerCasts() : nullptr); + if (!releasedLoad) + return nullptr; + + const llvm::Value* fieldPtr = releasedLoad->getPointerOperand(); + if (!fieldPtr || !fieldPtr->getType()->isPointerTy()) + return nullptr; + + int64_t signedOffset = 0; + const llvm::Value* fieldBase = llvm::GetPointerBaseWithConstantOffset( + fieldPtr->stripPointerCasts(), signedOffset, DL, true); + if (!fieldBase || signedOffset < 0) + return nullptr; + + const auto* ownerLoad = llvm::dyn_cast(fieldBase->stripPointerCasts()); + if (!ownerLoad) + return nullptr; + + const auto* ownerSlot = llvm::dyn_cast( + ownerLoad->getPointerOperand()->stripPointerCasts()); + if (!ownerSlot || !ownerSlot->isStaticAlloca() || + !ownerSlot->getAllocatedType()->isPointerTy()) + { + return nullptr; + } + return ownerSlot; + } + + static bool localPointerSlotContentMayReachReturn(const llvm::Function& F, + const llvm::AllocaInst& slot) + { + for (const llvm::Use& use : slot.uses()) + { + const auto* LI = llvm::dyn_cast(use.getUser()); + if (!LI) + continue; + if (LI->getPointerOperand()->stripPointerCasts() != &slot) + continue; + if (valueMayReachReturn(LI)) + return true; + } + return false; + } + static bool argumentMayCarryAddressOfLocal(const llvm::Function& F, const llvm::DataLayout& DL, const llvm::Value* argValue, @@ -1316,6 +1616,11 @@ namespace ctrace::stack::analysis const llvm::DataLayout& DL, const std::function& shouldAnalyze) { + // LLVM capture tracking can prove that a local object never escapes; + // in that case, no unmodeled call can acquire through its address. + if (llvm::isNonEscapingLocalObject(&sourceSlot)) + return false; + for (const llvm::BasicBlock& BB : F) { for (const llvm::Instruction& I : BB) @@ -1377,6 +1682,8 @@ namespace ctrace::stack::analysis for (unsigned i = 0; i < CB->arg_size(); ++i) { + if (callParamHasNonCaptureLikeAttr(*CB, i)) + continue; if (argumentMayCarryAddressOfLocal(F, DL, CB->getArgOperand(i), sourceSlot)) return true; } @@ -2104,6 +2411,8 @@ namespace ctrace::stack::analysis std::unordered_map localStates; std::unordered_map unknownAcquireEscapeCache; std::unordered_set interprocUncertaintyReported; + std::unordered_set useAfterReleaseReported; + std::unordered_set releasedEscapeReported; auto trackAcquire = [&](const StorageKey& storage, const std::string& resourceKind, const llvm::Instruction* anchorInst) { @@ -2136,6 +2445,115 @@ namespace ctrace::stack::analysis } }; + auto sameLocalStorage = [&](const StorageKey& lhs, const StorageKey& rhs) + { + if (lhs.scope != StorageScope::Local || rhs.scope != StorageScope::Local) + return false; + if (lhs.key == rhs.key) + return true; + return lhs.localAlloca && rhs.localAlloca && lhs.localAlloca == rhs.localAlloca && + lhs.offset == rhs.offset; + }; + + auto reportUseAfterReleaseIfNeeded = + [&](const StorageKey& storage, const llvm::Instruction* anchorInst) + { + if (!storage.valid() || storage.scope != StorageScope::Local || !anchorInst) + return; + + for (const auto& entry : localStates) + { + const LocalHandleState& state = entry.second; + if (!sameLocalStorage(state.storage, storage)) + continue; + if (state.acquires <= 0) + continue; + if (state.releases <= 0) + continue; + + bool reachableRelease = false; + for (const llvm::Instruction* releaseInst : state.releaseInsts) + { + if (!releaseInst) + continue; + if (instructionMayReach(*releaseInst, *anchorInst)) + { + reachableRelease = true; + break; + } + } + if (!reachableRelease) + continue; + + std::ostringstream dedupKey; + if (state.storage.localAlloca) + { + dedupKey << static_cast(state.storage.localAlloca) << ":" + << state.storage.offset; + } + else + { + dedupKey << state.storage.key; + } + dedupKey << "|"; + if (const llvm::DebugLoc loc = anchorInst->getDebugLoc()) + dedupKey << loc.getLine(); + else + dedupKey << static_cast(anchorInst); + if (!useAfterReleaseReported.insert(dedupKey.str()).second) + continue; + + ResourceLifetimeIssue issue; + issue.funcName = F.getName().str(); + issue.resourceKind = state.resourceKind; + issue.handleName = state.storage.displayName.empty() + ? std::string("") + : state.storage.displayName; + issue.inst = anchorInst; + issue.kind = ResourceLifetimeIssueKind::UseAfterRelease; + issues.push_back(std::move(issue)); + } + }; + + auto reportReleasedHandleEscapesIfNeeded = [&](const llvm::Value* releasedHandleValue, + const std::string& resourceKind, + const llvm::Instruction* anchorInst) + { + if (!releasedHandleValue || !anchorInst) + return; + + // "released handle escapes through returned owner object" only makes sense when a + // function can actually return an owner-like value. + if (methodInfo.isDtor || F.getReturnType()->isVoidTy()) + return; + if (!F.getReturnType()->isPointerTy() && !F.getReturnType()->isAggregateType()) + return; + + const llvm::AllocaInst* ownerSlot = + findOwnerLocalPointerSlotForReleasedHandleArg(releasedHandleValue, DL); + if (!ownerSlot) + return; + if (!localPointerSlotContentMayReachReturn(F, *ownerSlot)) + return; + + std::string ownerName = deriveAllocaName(ownerSlot); + if (ownerName.empty() || ownerName == "") + ownerName = "local"; + + std::ostringstream dedupKey; + dedupKey << static_cast(ownerSlot) << "|" << resourceKind; + if (!releasedEscapeReported.insert(dedupKey.str()).second) + return; + + ResourceLifetimeIssue issue; + issue.funcName = F.getName().str(); + issue.resourceKind = resourceKind; + issue.handleName = ownerName; + issue.inst = anchorInst; + issue.kind = ResourceLifetimeIssueKind::ReleasedHandleEscapes; + issues.push_back(std::move(issue)); + }; + auto trackRelease = [&](const StorageKey& storage, const std::string& resourceKind, const llvm::Instruction* anchorInst, bool fromSummary) { @@ -2176,27 +2594,23 @@ namespace ctrace::stack::analysis } state.releases += 1; + if (anchorInst) + state.releaseInsts.push_back(anchorInst); if (state.releases > state.acquires) { if (state.acquires == 0 && state.storage.localAlloca) { - // Parameter shadow slots often appear as local allocas under optnone. - // If the slot is initialized from a function argument and has no local - // acquires tracked, treat release as forwarding ownership, not double release. - if (resolveAllocaArgumentShadow(*state.storage.localAlloca, false) != - nullptr) - return; - - // Summary-originated releases are conservative by nature: callee-local - // acquisitions may be hidden behind unknown/external calls. - if (fromSummary) + const bool shouldReportInterproc = + shouldReportIncompleteInterprocOnLocalStorage(state.storage); + auto emitIncompleteInterproc = [&](const char* debugPath) { + if (!shouldReportInterproc) + return; if (interprocUncertaintyReported.insert(stateKey).second) { - coretrace::log( - coretrace::Level::Info, - "[DEBUG-INTERPROC] PATH=fromSummary func={} handle={}\n", - F.getName().str(), storage.displayName); + coretrace::log(coretrace::Level::Info, + "[DEBUG-INTERPROC] PATH={} func={} handle={}\n", + debugPath, F.getName().str(), storage.displayName); ResourceLifetimeIssue issue; issue.funcName = F.getName().str(); issue.resourceKind = resourceKind; @@ -2207,29 +2621,19 @@ namespace ctrace::stack::analysis issue.kind = ResourceLifetimeIssueKind::IncompleteInterproc; issues.push_back(std::move(issue)); } - state.ownership = OwnershipState::Unknown; + }; + + // Parameter shadow slots often appear as local allocas under optnone. + // If the slot is initialized from a function argument and has no local + // acquires tracked, treat release as forwarding ownership, not double release. + if (resolveAllocaArgumentShadow(*state.storage.localAlloca, false) != + nullptr) return; - } if (localStorageHasExplicitExternalStore(F, *state.storage.localAlloca, state.storage.offset, DL)) { - if (interprocUncertaintyReported.insert(stateKey).second) - { - coretrace::log( - coretrace::Level::Info, - "[DEBUG-INTERPROC] PATH=externalStore func={} handle={}\n", - F.getName().str(), storage.displayName); - ResourceLifetimeIssue issue; - issue.funcName = F.getName().str(); - issue.resourceKind = resourceKind; - issue.handleName = storage.displayName.empty() - ? std::string("") - : storage.displayName; - issue.inst = anchorInst; - issue.kind = ResourceLifetimeIssueKind::IncompleteInterproc; - issues.push_back(std::move(issue)); - } + emitIncompleteInterproc("externalStore"); state.ownership = OwnershipState::Unknown; return; } @@ -2253,22 +2657,17 @@ namespace ctrace::stack::analysis // hard double-release error in this case. if (cacheIt->second) { - if (interprocUncertaintyReported.insert(stateKey).second) - { - coretrace::log( - coretrace::Level::Info, - "[DEBUG-INTERPROC] PATH=escapeUnmodeled func={} handle={}\n", - F.getName().str(), storage.displayName); - ResourceLifetimeIssue issue; - issue.funcName = F.getName().str(); - issue.resourceKind = resourceKind; - issue.handleName = storage.displayName.empty() - ? std::string("") - : storage.displayName; - issue.inst = anchorInst; - issue.kind = ResourceLifetimeIssueKind::IncompleteInterproc; - issues.push_back(std::move(issue)); - } + emitIncompleteInterproc("escapeUnmodeled"); + state.ownership = OwnershipState::Unknown; + return; + } + + // Summary-originated releases are conservative by nature. + // If no concrete unknown-acquire evidence is found and this local + // is not a handle-like slot, suppress non-actionable noise. + if (fromSummary) + { + emitIncompleteInterproc("fromSummary"); state.ownership = OwnershipState::Unknown; return; } @@ -2297,6 +2696,34 @@ namespace ctrace::stack::analysis { for (llvm::Instruction& I : BB) { + if (const auto* LI = llvm::dyn_cast(&I)) + { + if (loadFeedsOnlyDirectReleaseArgs(*LI, model)) + continue; + StorageKey storage = + resolveHandleStorage(LI->getPointerOperand(), F, DL, methodInfo); + reportUseAfterReleaseIfNeeded(storage, &I); + } + else if (const auto* SI = llvm::dyn_cast(&I)) + { + StorageKey storage = + resolveHandleStorage(SI->getPointerOperand(), F, DL, methodInfo); + reportUseAfterReleaseIfNeeded(storage, &I); + } + else if (const auto* MI = llvm::dyn_cast(&I)) + { + StorageKey dstStorage = + resolveHandleStorage(MI->getDest(), F, DL, methodInfo); + reportUseAfterReleaseIfNeeded(dstStorage, &I); + + if (const auto* MTI = llvm::dyn_cast(MI)) + { + StorageKey srcStorage = + resolveHandleStorage(MTI->getSource(), F, DL, methodInfo); + reportUseAfterReleaseIfNeeded(srcStorage, &I); + } + } + auto* CB = llvm::dyn_cast(&I); if (!CB) continue; @@ -2305,6 +2732,20 @@ namespace ctrace::stack::analysis if (!callee) continue; + for (unsigned argIdx = 0; argIdx < CB->arg_size(); ++argIdx) + { + const llvm::Value* arg = CB->getArgOperand(argIdx); + if (!arg || !arg->getType()->isPointerTy()) + continue; + if (callArgumentIsDirectReleaseArg(*CB, callee, model, argIdx)) + continue; + if (!callArgumentLikelyDereferenced(*CB, callee, argIdx)) + continue; + + StorageKey storage = resolveHandleStorage(arg, F, DL, methodInfo); + reportUseAfterReleaseIfNeeded(storage, &I); + } + bool matchedDirectRule = false; for (const ResourceRule& rule : model.rules) { @@ -2344,6 +2785,7 @@ namespace ctrace::stack::analysis const llvm::Value* handleArg = CB->getArgOperand(rule.argIndex); StorageKey storage = resolveHandleStorage(handleArg, F, DL, methodInfo); trackRelease(storage, rule.resourceKind, &I, false); + reportReleasedHandleEscapesIfNeeded(handleArg, rule.resourceKind, &I); break; } } @@ -2434,6 +2876,7 @@ namespace ctrace::stack::analysis continue; StorageKey storage = resolveHandleStorage(retVal, F, DL, methodInfo); + reportUseAfterReleaseIfNeeded(storage, RI); for (auto& entry : localStates) { LocalHandleState& state = entry.second; diff --git a/src/analysis/StackBufferAnalysis.cpp b/src/analysis/StackBufferAnalysis.cpp index f59e89c..126f01b 100644 --- a/src/analysis/StackBufferAnalysis.cpp +++ b/src/analysis/StackBufferAnalysis.cpp @@ -10,8 +10,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -65,6 +67,72 @@ namespace ctrace::stack::analysis return AnalysisComplexityBudgets{}; } + static bool isArrayBackedType(const llvm::Type* type) + { + using namespace llvm; + if (!type) + return false; + if (type->isArrayTy()) + return true; + + if (auto* structTy = dyn_cast(type)) + { + for (unsigned i = 0; i < structTy->getNumElements(); ++i) + { + if (isArrayBackedType(structTy->getElementType(i))) + return true; + } + } + + return false; + } + + static std::string buildAliasPathString(const std::vector& aliasPath) + { + if (aliasPath.empty()) + return {}; + + std::vector normalized(aliasPath.rbegin(), aliasPath.rend()); + std::string chain; + for (std::size_t i = 0; i < normalized.size(); ++i) + { + chain += normalized[i]; + if (i + 1 < normalized.size()) + chain += " -> "; + } + return chain; + } + + static std::optional getGlobalElementCount(const llvm::GlobalVariable* GV) + { + using namespace llvm; + if (!GV) + return std::nullopt; + + if (auto* arrayTy = dyn_cast(GV->getValueType())) + { + return arrayTy->getNumElements(); + } + + return std::nullopt; + } + + static const llvm::GlobalVariable* resolveArrayGlobalFromPointer(const llvm::Value* V) + { + using namespace llvm; + if (!V) + return nullptr; + + const Value* base = getUnderlyingObject(V); + auto* GV = dyn_cast_or_null(base); + if (!GV) + return nullptr; + if (!isArrayBackedType(GV->getValueType())) + return nullptr; + + return GV; + } + // Size (in elements) for a stack array alloca static std::optional getAllocaElementCount(llvm::AllocaInst* AI) { @@ -122,23 +190,13 @@ namespace ctrace::stack::analysis auto isArrayAlloca = [](const AllocaInst* AI) -> bool { - Type* T = AI->getAllocatedType(); // Consider a "stack buffer" as: // - real arrays, // - array-typed allocas (VLA in IR), - // - structs that contain at least one array field. - if (T->isArrayTy() || AI->isArrayAllocation()) + // - structs containing array fields. + if (AI->isArrayAllocation()) return true; - - if (auto* ST = llvm::dyn_cast(T)) - { - for (unsigned i = 0; i < ST->getNumElements(); ++i) - { - if (ST->getElementType(i)->isArrayTy()) - return true; - } - } - return false; + return isArrayBackedType(AI->getAllocatedType()); }; // Avoid weird aliasing loops @@ -347,6 +405,456 @@ namespace ctrace::stack::analysis allowPointerStoreScan); } + static void intersectRange(IntRange& target, const IntRange& incoming) + { + if (incoming.hasLower) + { + if (!target.hasLower || incoming.lower > target.lower) + { + target.hasLower = true; + target.lower = incoming.lower; + } + } + + if (incoming.hasUpper) + { + if (!target.hasUpper || incoming.upper < target.upper) + { + target.hasUpper = true; + target.upper = incoming.upper; + } + } + } + + static const llvm::StoreInst* findUniqueStoreToKeyInBlock(const llvm::BasicBlock& block, + const llvm::Value* key) + { + using namespace llvm; + const StoreInst* uniqueStore = nullptr; + for (const Instruction& I : block) + { + const auto* SI = dyn_cast(&I); + if (!SI || SI->getPointerOperand() != key) + continue; + if (uniqueStore) + return nullptr; + uniqueStore = SI; + } + return uniqueStore; + } + + static std::size_t countStoresToKeyInFunction(const llvm::Function& F, + const llvm::Value* key) + { + using namespace llvm; + std::size_t count = 0; + for (const BasicBlock& BB : F) + { + for (const Instruction& I : BB) + { + const auto* SI = dyn_cast(&I); + if (SI && SI->getPointerOperand() == key) + ++count; + } + } + return count; + } + + static std::optional extractConstantInitValue(const llvm::StoreInst& store) + { + using namespace llvm; + const auto* C = dyn_cast(store.getValueOperand()); + if (!C) + return std::nullopt; + return C->getSExtValue(); + } + + static bool isDirectLoadFromKey(const llvm::Value* value, const llvm::Value* key) + { + using namespace llvm; + const auto* LI = dyn_cast(value); + if (!LI) + return false; + return LI->getPointerOperand() == key; + } + + static std::optional extractConstantStepValue(const llvm::StoreInst& store, + const llvm::Value* key) + { + using namespace llvm; + const auto* BO = dyn_cast(store.getValueOperand()); + if (!BO) + return std::nullopt; + + const Value* lhs = BO->getOperand(0); + const Value* rhs = BO->getOperand(1); + const auto* lhsC = dyn_cast(lhs); + const auto* rhsC = dyn_cast(rhs); + + long long step = 0; + switch (BO->getOpcode()) + { + case Instruction::Add: + if (isDirectLoadFromKey(lhs, key) && rhsC) + step = rhsC->getSExtValue(); + else if (lhsC && isDirectLoadFromKey(rhs, key)) + step = lhsC->getSExtValue(); + else + return std::nullopt; + break; + case Instruction::Sub: + if (isDirectLoadFromKey(lhs, key) && rhsC) + step = -rhsC->getSExtValue(); + else + return std::nullopt; + break; + default: + return std::nullopt; + } + + if (step == 0) + return std::nullopt; + return step; + } + + static std::optional deriveBoundedRangeFromNeLoopGuard( + const llvm::BasicBlock& target, const llvm::BasicBlock& condBlock, + const llvm::Value* key, const llvm::ConstantInt& boundConstant, bool takesTrueEdge) + { + using namespace llvm; + if (!takesTrueEdge) + return std::nullopt; + if (!isa(key)) + return std::nullopt; + + const Function* parent = condBlock.getParent(); + if (!parent) + return std::nullopt; + + // Keep the heuristic strict: one init store + one update store. + if (countStoresToKeyInFunction(*parent, key) != 2) + return std::nullopt; + + const BasicBlock* initBlock = nullptr; + const BasicBlock* updateBlock = nullptr; + long long initValue = 0; + long long stepValue = 0; + std::size_t predCount = 0; + + for (const BasicBlock* incoming : predecessors(&condBlock)) + { + ++predCount; + const StoreInst* SI = findUniqueStoreToKeyInBlock(*incoming, key); + if (!SI) + continue; + + if (const auto maybeInit = extractConstantInitValue(*SI)) + { + if (initBlock) + return std::nullopt; + initBlock = incoming; + initValue = *maybeInit; + continue; + } + + if (const auto maybeStep = extractConstantStepValue(*SI, key)) + { + if (updateBlock) + return std::nullopt; + updateBlock = incoming; + stepValue = *maybeStep; + } + } + + if (predCount != 2 || !initBlock || !updateBlock) + return std::nullopt; + + bool updateReachableFromAccess = (updateBlock == &target); + if (!updateReachableFromAccess) + { + for (const BasicBlock* succ : successors(&target)) + { + if (succ == updateBlock) + { + updateReachableFromAccess = true; + break; + } + } + } + if (!updateReachableFromAccess) + return std::nullopt; + + const long long boundValue = boundConstant.getSExtValue(); + + IntRange out; + if (stepValue > 0) + { + if (initValue >= boundValue) + return std::nullopt; + const long long delta = boundValue - initValue; + if (delta % stepValue != 0) + return std::nullopt; + out.hasLower = true; + out.lower = initValue; + out.hasUpper = true; + out.upper = boundValue; + return out; + } + + if (initValue <= boundValue) + return std::nullopt; + const long long stepMagnitude = -stepValue; + const long long delta = initValue - boundValue; + if (delta % stepMagnitude != 0) + return std::nullopt; + out.hasLower = true; + out.lower = boundValue; + out.hasUpper = true; + out.upper = initValue; + return out; + } + + static bool deriveConstraintFromPredicate(llvm::ICmpInst::Predicate pred, bool valueIsOp0, + const llvm::ConstantInt& constant, IntRange& out) + { + using namespace llvm; + bool hasLB = false; + bool hasUB = false; + long long lb = 0; + long long ub = 0; + + auto updateForSigned = [&](long long c) + { + if (valueIsOp0) + { + switch (pred) + { + case ICmpInst::ICMP_SLT: // V < C => V <= C-1 + hasUB = true; + ub = c - 1; + break; + case ICmpInst::ICMP_SLE: // V <= C => V <= C + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_SGT: // V > C => V >= C+1 + hasLB = true; + lb = c + 1; + break; + case ICmpInst::ICMP_SGE: // V >= C => V >= C + hasLB = true; + lb = c; + break; + case ICmpInst::ICMP_EQ: // V == C => [C, C] + hasLB = true; + lb = c; + hasUB = true; + ub = c; + break; + default: + break; + } + } + else + { + // C ? V <=> V ? C (reversed) + switch (pred) + { + case ICmpInst::ICMP_SGT: // C > V => V < C => V <= C-1 + hasUB = true; + ub = c - 1; + break; + case ICmpInst::ICMP_SGE: // C >= V => V <= C + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_SLT: // C < V => V > C => V >= C+1 + hasLB = true; + lb = c + 1; + break; + case ICmpInst::ICMP_SLE: // C <= V => V >= C + hasLB = true; + lb = c; + break; + case ICmpInst::ICMP_EQ: // C == V => [C, C] + hasLB = true; + lb = c; + hasUB = true; + ub = c; + break; + default: + break; + } + } + }; + + auto updateForUnsigned = [&](unsigned long long cUnsigned) + { + long long c = static_cast(cUnsigned); + if (valueIsOp0) + { + switch (pred) + { + case ICmpInst::ICMP_ULT: // V < C => V <= C-1 + hasUB = true; + ub = c - 1; + break; + case ICmpInst::ICMP_ULE: // V <= C + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_UGT: // V > C => V >= C+1 + hasLB = true; + lb = c + 1; + break; + case ICmpInst::ICMP_UGE: // V >= C + hasLB = true; + lb = c; + break; + case ICmpInst::ICMP_EQ: + hasLB = true; + lb = c; + hasUB = true; + ub = c; + break; + default: + break; + } + } + else + { + switch (pred) + { + case ICmpInst::ICMP_UGT: // C > V => V < C + hasUB = true; + ub = c - 1; + break; + case ICmpInst::ICMP_UGE: // C >= V => V <= C + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_ULT: // C < V => V > C + hasLB = true; + lb = c + 1; + break; + case ICmpInst::ICMP_ULE: // C <= V => V >= C + hasLB = true; + lb = c; + break; + case ICmpInst::ICMP_EQ: + hasLB = true; + lb = c; + hasUB = true; + ub = c; + break; + default: + break; + } + } + }; + + if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE || + pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE || + pred == ICmpInst::ICMP_EQ) + { + updateForSigned(constant.getSExtValue()); + } + else if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE || + pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) + { + updateForUnsigned(constant.getZExtValue()); + } + + if (!(hasLB || hasUB)) + return false; + + out.hasLower = hasLB; + out.lower = lb; + out.hasUpper = hasUB; + out.upper = ub; + return true; + } + + static std::optional deriveIncomingEdgeRange(const llvm::BasicBlock& target, + const llvm::Value* key) + { + using namespace llvm; + + const BasicBlock* pred = target.getSinglePredecessor(); + if (!pred) + return std::nullopt; + + const auto* br = dyn_cast(pred->getTerminator()); + if (!br || !br->isConditional()) + return std::nullopt; + + const bool takesTrueEdge = br->getSuccessor(0) == ⌖ + const auto* icmp = dyn_cast(br->getCondition()); + if (!icmp) + return std::nullopt; + + const Value* op0 = icmp->getOperand(0); + const Value* op1 = icmp->getOperand(1); + + auto matchesKey = [key](const Value* V) -> bool + { + if (V == key) + return true; + if (const auto* LI = dyn_cast(V)) + return LI->getPointerOperand() == key; + return false; + }; + + const ConstantInt* C = nullptr; + bool valueIsOp0 = false; + if (matchesKey(op0) && (C = dyn_cast(op1))) + { + valueIsOp0 = true; + } + else if (matchesKey(op1) && (C = dyn_cast(op0))) + { + valueIsOp0 = false; + } + else + { + return std::nullopt; + } + + IntRange out; + const auto predToApply = + takesTrueEdge ? icmp->getPredicate() : icmp->getInversePredicate(); + if (!deriveConstraintFromPredicate(predToApply, valueIsOp0, *C, out)) + { + if (predToApply == ICmpInst::ICMP_NE) + { + return deriveBoundedRangeFromNeLoopGuard(target, *pred, key, *C, takesTrueEdge); + } + return std::nullopt; + } + + return out; + } + + static IntRange refineRangeForAccessSite(const IntRange& coarseRange, + const llvm::BasicBlock& accessBlock, + const llvm::Value* key) + { + IntRange refined = coarseRange; + const auto incomingRange = deriveIncomingEdgeRange(accessBlock, key); + if (!incomingRange) + return refined; + + intersectRange(refined, *incomingRange); + if (refined.hasLower && refined.hasUpper && refined.lower > refined.upper) + { + // Path-insensitive coarse bounds can conflict with edge-specific bounds. + // In that case prefer the edge-local constraint for this access site. + return *incomingRange; + } + + return refined; + } + static void analyzeStackBufferOverflowsInFunction(llvm::Function& F, std::vector& out, @@ -366,18 +874,21 @@ namespace ctrace::stack::analysis struct CachedResolution { const AllocaInst* alloca = nullptr; + const GlobalVariable* global = nullptr; std::vector aliasPath; bool computed = false; }; std::unordered_map resolutionCache; - auto resolveArrayAllocaCached = - [&](const Value* basePtr, std::vector& aliasPath) -> const AllocaInst* + auto resolveArrayBufferBaseCached = + [&](const Value* basePtr, std::vector& aliasPath, + const GlobalVariable*& globalOut) -> const AllocaInst* { auto& cached = resolutionCache[basePtr]; if (cached.computed) { aliasPath = cached.aliasPath; + globalOut = cached.global; return cached.alloca; } @@ -385,9 +896,20 @@ namespace ctrace::stack::analysis std::vector resolvedPath; cached.alloca = resolveArrayAllocaFromPointer(basePtr, F, resolvedPath, allowPointerStoreScan); + if (!cached.alloca) + cached.global = resolveArrayGlobalFromPointer(basePtr); + if (cached.alloca) + { cached.aliasPath = std::move(resolvedPath); + } + else if (cached.global && cached.global->hasName()) + { + cached.aliasPath.push_back(cached.global->getName().str()); + } + aliasPath = cached.aliasPath; + globalOut = cached.global; return cached.alloca; }; @@ -405,8 +927,9 @@ namespace ctrace::stack::analysis // 1) Find the pointer base (test, &test[0], ptr, etc.) const Value* basePtr = GEP->getPointerOperand(); std::vector aliasPath; - const AllocaInst* AI = resolveArrayAllocaCached(basePtr, aliasPath); - if (!AI) + const GlobalVariable* GV = nullptr; + const AllocaInst* AI = resolveArrayBufferBaseCached(basePtr, aliasPath, GV); + if (!AI && !GV) continue; // 2) Determine the logical target array size and retrieve the index. @@ -465,13 +988,22 @@ namespace ctrace::stack::analysis } // If we could not infer a size via the GEP, - // fall back to the size derived from the alloca - // (case char buf[10]; ptr = buf; ptr[i]). + // fall back to the size derived from the resolved base + // (stack alloca or global array, case char buf[10]; ptr = buf; ptr[i]). if (arraySize == 0 || !idxVal) { - if (!shouldUseAllocaFallback(AI, F)) - continue; - auto maybeCount = getAllocaElementCount(const_cast(AI)); + std::optional maybeCount; + if (AI) + { + if (!shouldUseAllocaFallback(AI, F)) + continue; + maybeCount = getAllocaElementCount(const_cast(AI)); + } + else if (GV) + { + maybeCount = getGlobalElementCount(GV); + } + if (!maybeCount) continue; arraySize = *maybeCount; @@ -485,8 +1017,18 @@ namespace ctrace::stack::analysis idxVal = idxIt->get(); } - std::string varName = - AI->hasName() ? AI->getName().str() : std::string(""); + const BufferStorageClass storageClass = + GV ? BufferStorageClass::Global : BufferStorageClass::Stack; + std::string varName = ""; + if (AI) + { + varName = AI->hasName() ? AI->getName().str() : std::string(""); + } + else if (GV) + { + varName = + GV->hasName() ? GV->getName().str() : std::string(""); + } // "baseIdxVal" = loop variable "i" without casts (sext/zext...) Value* baseIdxVal = idxVal; @@ -512,20 +1054,10 @@ namespace ctrace::stack::analysis report.indexOrUpperBound = static_cast(idxValue); report.isWrite = true; report.indexIsConstant = true; + report.storageClass = storageClass; report.inst = S; report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } + report.aliasPath = buildAliasPathString(aliasPath); out.push_back(std::move(report)); } else if (auto* L = dyn_cast(GU)) @@ -537,20 +1069,10 @@ namespace ctrace::stack::analysis report.indexOrUpperBound = static_cast(idxValue); report.isWrite = false; report.indexIsConstant = true; + report.storageClass = storageClass; report.inst = L; report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } + report.aliasPath = buildAliasPathString(aliasPath); out.push_back(std::move(report)); } } @@ -569,19 +1091,35 @@ namespace ctrace::stack::analysis key = LI->getPointerOperand(); } - auto itRange = ranges.find(key); - if (itRange == ranges.end()) + IntRange R; + bool hasRange = false; + + if (auto itRange = ranges.find(key); itRange != ranges.end()) { - // no known bound => say nothing here - continue; + R = refineRangeForAccessSite(itRange->second, BB, key); + hasRange = R.hasLower || R.hasUpper; + } + else if (const auto localRange = deriveIncomingEdgeRange(BB, key)) + { + R = *localRange; + hasRange = R.hasLower || R.hasUpper; } - const IntRange& R = itRange->second; + if (!hasRange) + continue; - // 5.a) Upper bound out of range: UB >= arraySize - if (R.hasUpper && R.upper >= 0 && static_cast(R.upper) >= arraySize) + // 5.a) Index range exceeds array end (upper or lower bound already >= size). + const bool upperOutOfRange = + R.hasUpper && R.upper >= 0 && static_cast(R.upper) >= arraySize; + const bool lowerOutOfRange = + R.hasLower && R.lower >= 0 && static_cast(R.lower) >= arraySize; + if (upperOutOfRange || lowerOutOfRange) { - StackSize ub = static_cast(R.upper); + StackSize ub = 0; + if (upperOutOfRange) + ub = static_cast(R.upper); + else + ub = static_cast(R.lower); for (User* GU : GEP->users()) { @@ -594,20 +1132,10 @@ namespace ctrace::stack::analysis report.indexOrUpperBound = ub; report.isWrite = true; report.indexIsConstant = false; + report.storageClass = storageClass; report.inst = S; report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } + report.aliasPath = buildAliasPathString(aliasPath); out.push_back(std::move(report)); } else if (auto* L = dyn_cast(GU)) @@ -619,20 +1147,10 @@ namespace ctrace::stack::analysis report.indexOrUpperBound = ub; report.isWrite = false; report.indexIsConstant = false; + report.storageClass = storageClass; report.inst = L; report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } + report.aliasPath = buildAliasPathString(aliasPath); out.push_back(std::move(report)); } } @@ -651,22 +1169,12 @@ namespace ctrace::stack::analysis report.arraySize = arraySize; report.isWrite = true; report.indexIsConstant = false; + report.storageClass = storageClass; report.inst = S; report.isLowerBoundViolation = true; report.lowerBound = R.lower; report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } + report.aliasPath = buildAliasPathString(aliasPath); out.push_back(std::move(report)); } else if (auto* L = dyn_cast(GU)) @@ -677,22 +1185,12 @@ namespace ctrace::stack::analysis report.arraySize = arraySize; report.isWrite = false; report.indexIsConstant = false; + report.storageClass = storageClass; report.inst = L; report.isLowerBoundViolation = true; report.lowerBound = R.lower; report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } + report.aliasPath = buildAliasPathString(aliasPath); out.push_back(std::move(report)); } } diff --git a/src/analysis/TOCTOUAnalysis.cpp b/src/analysis/TOCTOUAnalysis.cpp new file mode 100644 index 0000000..4262795 --- /dev/null +++ b/src/analysis/TOCTOUAnalysis.cpp @@ -0,0 +1,265 @@ +#include "analysis/TOCTOUAnalysis.hpp" + +#include "analysis/AnalyzerUtils.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + struct PathEvent + { + const llvm::Instruction* inst = nullptr; + const llvm::Value* root = nullptr; + std::string literal; + std::string api; + unsigned order = 0; + }; + + static const llvm::Function* getDirectCallee(const llvm::CallBase& call) + { + if (const llvm::Function* direct = call.getCalledFunction()) + return direct; + const llvm::Value* called = call.getCalledOperand(); + if (!called) + return nullptr; + return llvm::dyn_cast(called->stripPointerCasts()); + } + + static llvm::StringRef canonicalCalleeName(llvm::StringRef name) + { + if (!name.empty() && name.front() == '\1') + name = name.drop_front(); + if (name.starts_with("_")) + name = name.drop_front(); + return name; + } + + static std::optional checkPathArgIndex(llvm::StringRef calleeName) + { + if (calleeName == "access" || calleeName == "stat" || calleeName == "lstat") + return 0u; + if (calleeName == "faccessat" || calleeName == "fstatat") + return 1u; + return std::nullopt; + } + + static const llvm::Value* peelPointerFromSingleStoreSlot(const llvm::Value* value) + { + const llvm::Value* current = value->stripPointerCasts(); + for (unsigned depth = 0; depth < 4; ++depth) + { + const auto* load = llvm::dyn_cast(current); + if (!load) + break; + + const auto* slot = llvm::dyn_cast( + load->getPointerOperand()->stripPointerCasts()); + if (!slot || !slot->isStaticAlloca() || !slot->getAllocatedType()->isPointerTy()) + break; + + const llvm::StoreInst* uniqueStore = nullptr; + bool unsafe = false; + for (const llvm::Use& use : slot->uses()) + { + const auto* user = use.getUser(); + if (const auto* store = llvm::dyn_cast(user)) + { + if (store->getPointerOperand()->stripPointerCasts() != slot) + { + unsafe = true; + break; + } + if (uniqueStore && uniqueStore != store) + { + unsafe = true; + break; + } + uniqueStore = store; + continue; + } + + if (const auto* slotLoad = llvm::dyn_cast(user)) + { + if (slotLoad->getPointerOperand()->stripPointerCasts() != slot) + { + unsafe = true; + break; + } + continue; + } + + if (const auto* intrinsic = llvm::dyn_cast(user)) + { + if (llvm::isa(intrinsic) || + llvm::isa(intrinsic)) + { + continue; + } + } + + unsafe = true; + break; + } + + if (unsafe || !uniqueStore) + break; + current = uniqueStore->getValueOperand()->stripPointerCasts(); + } + + return current; + } + + static std::optional usePathArgIndex(llvm::StringRef calleeName) + { + if (calleeName == "open" || calleeName == "fopen") + return 0u; + if (calleeName == "openat") + return 1u; + return std::nullopt; + } + + static std::optional tryExtractStringLiteral(const llvm::Value* value) + { + if (!value) + return std::nullopt; + + const llvm::Value* current = value->stripPointerCasts(); + for (unsigned depth = 0; depth < 8; ++depth) + { + if (const auto* global = llvm::dyn_cast(current)) + { + if (!global->hasInitializer()) + return std::nullopt; + + const llvm::Constant* init = global->getInitializer(); + if (const auto* data = llvm::dyn_cast(init)) + { + if (data->isCString()) + return data->getAsCString().str(); + } + return std::nullopt; + } + + if (const auto* gep = llvm::dyn_cast(current)) + { + current = gep->getPointerOperand()->stripPointerCasts(); + continue; + } + + if (const auto* expr = llvm::dyn_cast(current)) + { + if (expr->isCast() || expr->getOpcode() == llvm::Instruction::GetElementPtr) + { + current = expr->getOperand(0)->stripPointerCasts(); + continue; + } + } + + break; + } + + return std::nullopt; + } + + static bool likelySamePath(const PathEvent& lhs, const PathEvent& rhs) + { + if (lhs.root && rhs.root && lhs.root == rhs.root) + return true; + if (!lhs.literal.empty() && !rhs.literal.empty() && lhs.literal == rhs.literal) + return true; + return false; + } + } // namespace + + std::vector + analyzeTOCTOU(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector issues; + + for (llvm::Function& function : mod) + { + if (function.isDeclaration() || !shouldAnalyze(function)) + continue; + + std::vector checks; + std::vector uses; + unsigned order = 0; + + for (llvm::BasicBlock& block : function) + { + for (llvm::Instruction& inst : block) + { + ++order; + + const auto* call = llvm::dyn_cast(&inst); + if (!call) + continue; + + const llvm::Function* callee = getDirectCallee(*call); + if (!callee) + continue; + + const llvm::StringRef canonicalName = canonicalCalleeName(callee->getName()); + const std::optional checkArg = checkPathArgIndex(canonicalName); + const std::optional useArg = usePathArgIndex(canonicalName); + if (!checkArg && !useArg) + continue; + + const unsigned argIndex = checkArg ? *checkArg : *useArg; + if (argIndex >= call->arg_size()) + continue; + + const llvm::Value* pathValue = + peelPointerFromSingleStoreSlot(call->getArgOperand(argIndex)); + PathEvent event; + event.inst = &inst; + event.root = llvm::getUnderlyingObject(pathValue, 32); + event.literal = tryExtractStringLiteral(pathValue).value_or(""); + event.api = canonicalName.str(); + event.order = order; + + if (checkArg) + checks.push_back(std::move(event)); + else + uses.push_back(std::move(event)); + } + } + + for (const PathEvent& useEvent : uses) + { + for (const PathEvent& checkEvent : checks) + { + if (checkEvent.order >= useEvent.order) + continue; + if (!likelySamePath(checkEvent, useEvent)) + continue; + + TOCTOUIssue issue; + issue.funcName = function.getName().str(); + issue.filePath = getFunctionSourcePath(function); + issue.checkApi = checkEvent.api; + issue.useApi = useEvent.api; + issue.inst = useEvent.inst; + issues.push_back(std::move(issue)); + break; + } + } + } + + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/TypeConfusionAnalysis.cpp b/src/analysis/TypeConfusionAnalysis.cpp new file mode 100644 index 0000000..be1862d --- /dev/null +++ b/src/analysis/TypeConfusionAnalysis.cpp @@ -0,0 +1,436 @@ +#include "analysis/TypeConfusionAnalysis.hpp" + +#include "analysis/AnalyzerUtils.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + struct ViewObservation + { + const llvm::StructType* viewType = nullptr; + std::uint64_t viewSizeBytes = 0; + std::uint64_t accessOffsetBytes = 0; + const llvm::Instruction* inst = nullptr; + llvm::SmallVector structChain; + }; + + static std::string structDisplayName(const llvm::StructType* type) + { + if (!type) + return ""; + if (type->hasName()) + return type->getName().str(); + return ""; + } + + static void collectStructTypeChain(const llvm::Value* pointer, + llvm::SmallVector& out) + { + if (!pointer) + return; + + const llvm::Value* current = pointer; + std::unordered_set seen(out.begin(), out.end()); + for (unsigned depth = 0; depth < 20; ++depth) + { + if (const auto* gep = llvm::dyn_cast(current)) + { + if (const auto* structType = + llvm::dyn_cast(gep->getSourceElementType())) + { + if (!structType->isOpaque() && seen.insert(structType).second) + out.push_back(structType); + } + current = gep->getPointerOperand(); + continue; + } + + if (const auto* cast = llvm::dyn_cast(current)) + { + current = cast->getOperand(0); + continue; + } + + if (const auto* expr = llvm::dyn_cast(current)) + { + if (expr->isCast() || expr->getOpcode() == llvm::Instruction::GetElementPtr) + { + current = expr->getOperand(0); + continue; + } + } + + break; + } + } + + static const llvm::Value* peelPointerFromSingleStoreSlot(const llvm::Value* value) + { + const llvm::Value* current = value ? value->stripPointerCasts() : nullptr; + for (unsigned depth = 0; current && depth < 6; ++depth) + { + const auto* load = llvm::dyn_cast(current); + if (!load) + break; + + const auto* slot = llvm::dyn_cast( + load->getPointerOperand()->stripPointerCasts()); + if (!slot || !slot->isStaticAlloca() || !slot->getAllocatedType()->isPointerTy()) + break; + + const llvm::StoreInst* uniqueStore = nullptr; + bool unsafe = false; + for (const llvm::Use& use : slot->uses()) + { + const auto* user = use.getUser(); + if (const auto* store = llvm::dyn_cast(user)) + { + if (store->getPointerOperand()->stripPointerCasts() != slot) + { + unsafe = true; + break; + } + if (uniqueStore && uniqueStore != store) + { + unsafe = true; + break; + } + uniqueStore = store; + continue; + } + + if (const auto* slotLoad = llvm::dyn_cast(user)) + { + if (slotLoad->getPointerOperand()->stripPointerCasts() != slot) + { + unsafe = true; + break; + } + continue; + } + + if (const auto* intrinsic = llvm::dyn_cast(user)) + { + if (llvm::isa(intrinsic) || + llvm::isa(intrinsic)) + { + continue; + } + } + + unsafe = true; + break; + } + + if (unsafe || !uniqueStore) + break; + current = uniqueStore->getValueOperand()->stripPointerCasts(); + } + + return current; + } + + static bool containsStructTypeRecursive(const llvm::Type* haystack, + const llvm::StructType* needle, + std::unordered_set& visiting, + unsigned depth = 0) + { + if (!haystack || !needle || depth > 24) + return false; + if (haystack == needle) + return true; + if (!visiting.insert(haystack).second) + return false; + + if (const auto* structType = llvm::dyn_cast(haystack)) + { + if (structType->isOpaque()) + return false; + for (llvm::Type* elemType : structType->elements()) + { + if (containsStructTypeRecursive(elemType, needle, visiting, depth + 1)) + return true; + } + return false; + } + + if (const auto* arrayType = llvm::dyn_cast(haystack)) + return containsStructTypeRecursive(arrayType->getElementType(), needle, visiting, + depth + 1); + + if (const auto* vectorType = llvm::dyn_cast(haystack)) + return containsStructTypeRecursive(vectorType->getElementType(), needle, visiting, + depth + 1); + + return false; + } + + static bool structContainsType(const llvm::StructType* container, + const llvm::StructType* nested) + { + if (!container || !nested || container->isOpaque()) + return false; + std::unordered_set visiting; + return containsStructTypeRecursive(container, nested, visiting, 0); + } + + static const llvm::StructType* getConcreteRootStructType(const llvm::Value* root) + { + const llvm::Value* stripped = root ? root->stripPointerCasts() : nullptr; + if (!stripped) + return nullptr; + + const auto* alloca = llvm::dyn_cast(stripped); + if (!alloca) + return nullptr; + + const auto* structType = llvm::dyn_cast(alloca->getAllocatedType()); + if (!structType || structType->isOpaque()) + return nullptr; + return structType; + } + + static bool hasCommonTypeContext(const std::unordered_set& lhs, + const std::unordered_set& rhs) + { + if (lhs.empty() || rhs.empty()) + return false; + const auto& smaller = lhs.size() <= rhs.size() ? lhs : rhs; + const auto& larger = lhs.size() <= rhs.size() ? rhs : lhs; + for (const llvm::StructType* type : smaller) + { + if (larger.find(type) != larger.end()) + return true; + } + return false; + } + + static bool + contextContainsSubobjectView(const std::unordered_set& context, + const llvm::StructType* view) + { + if (!view) + return false; + for (const llvm::StructType* contextType : context) + { + if (!contextType) + continue; + if (contextType == view || structContainsType(contextType, view)) + return true; + } + return false; + } + + static bool viewsLikelyCompatible( + const llvm::StructType* lhs, const llvm::StructType* rhs, + const std::unordered_map>& contextsByView, + const llvm::StructType* concreteRootType) + { + if (!lhs || !rhs) + return false; + if (lhs == rhs) + return true; + + // Typical "field-of-struct" patterns: one view is a nested subobject of the other. + if (structContainsType(lhs, rhs) || structContainsType(rhs, lhs)) + return true; + + // If both views are observed under a shared structural ancestor in IR GEP chains, + // they are likely sibling subobjects of the same aggregate rather than type confusion. + const auto lhsIt = contextsByView.find(lhs); + const auto rhsIt = contextsByView.find(rhs); + if (lhsIt != contextsByView.end() && rhsIt != contextsByView.end() && + hasCommonTypeContext(lhsIt->second, rhsIt->second)) + { + return true; + } + + // If one view's structural context contains the other view type as a subobject, + // this is typically a legitimate "same object, different subobject path" pattern + // (derived/base plus member aggregates, etc.). + if (lhsIt != contextsByView.end() && contextContainsSubobjectView(lhsIt->second, rhs)) + { + return true; + } + if (rhsIt != contextsByView.end() && contextContainsSubobjectView(rhsIt->second, lhs)) + { + return true; + } + + // When the root object type is known (local alloca), keep only mismatches that cannot + // be explained as legal subobjects inside that root aggregate. + if (concreteRootType && structContainsType(concreteRootType, lhs) && + structContainsType(concreteRootType, rhs)) + { + return true; + } + + return false; + } + + static void + collectObservation(const llvm::Value* pointer, const llvm::Instruction& atInst, + const llvm::DataLayout& dataLayout, + std::map>& outByRoot) + { + if (!pointer || !pointer->getType()->isPointerTy()) + return; + + llvm::SmallVector structChain; + collectStructTypeChain(pointer, structChain); + const llvm::Value* peeledPointer = peelPointerFromSingleStoreSlot(pointer); + if (peeledPointer && peeledPointer != pointer) + collectStructTypeChain(peeledPointer, structChain); + if (structChain.empty()) + return; + const llvm::StructType* viewType = structChain.front(); + + int64_t offset = 0; + const llvm::Value* base = + llvm::GetPointerBaseWithConstantOffset(pointer, offset, dataLayout, true); + if (!base || offset < 0) + return; + + const llvm::Value* root = peelPointerFromSingleStoreSlot(base); + root = llvm::getUnderlyingObject(root, 32); + root = peelPointerFromSingleStoreSlot(root); + if (!root) + return; + + const std::uint64_t viewSize = + dataLayout.getTypeAllocSize(const_cast(viewType)); + + ViewObservation obs; + obs.viewType = viewType; + obs.viewSizeBytes = viewSize; + obs.accessOffsetBytes = static_cast(offset); + obs.inst = &atInst; + obs.structChain = structChain; + + outByRoot[root].push_back(std::move(obs)); + } + } // namespace + + std::vector + analyzeTypeConfusions(llvm::Module& mod, const llvm::DataLayout& dataLayout, + const std::function& shouldAnalyze) + { + std::vector issues; + std::unordered_set emitted; + + for (llvm::Function& function : mod) + { + if (function.isDeclaration() || !shouldAnalyze(function)) + continue; + + std::map> observationsByRoot; + + for (llvm::BasicBlock& block : function) + { + for (llvm::Instruction& inst : block) + { + if (const auto* load = llvm::dyn_cast(&inst)) + { + collectObservation(load->getPointerOperand(), inst, dataLayout, + observationsByRoot); + continue; + } + + if (const auto* store = llvm::dyn_cast(&inst)) + { + collectObservation(store->getPointerOperand(), inst, dataLayout, + observationsByRoot); + continue; + } + + if (const auto* call = llvm::dyn_cast(&inst)) + { + for (const llvm::Value* argument : call->args()) + collectObservation(argument, inst, dataLayout, observationsByRoot); + } + } + } + + for (const auto& [root, observations] : observationsByRoot) + { + if (observations.size() < 2) + continue; + + std::uint64_t smallestViewSize = std::numeric_limits::max(); + const llvm::StructType* smallestViewType = nullptr; + std::unordered_set distinctViews; + std::unordered_map> + contextsByView; + distinctViews.reserve(observations.size()); + + for (const ViewObservation& obs : observations) + { + if (!obs.viewType) + continue; + distinctViews.insert(obs.viewType); + auto& context = contextsByView[obs.viewType]; + context.insert(obs.viewType); + for (const llvm::StructType* type : obs.structChain) + context.insert(type); + if (obs.viewSizeBytes < smallestViewSize) + { + smallestViewSize = obs.viewSizeBytes; + smallestViewType = obs.viewType; + } + } + + if (!smallestViewType || distinctViews.size() < 2) + continue; + + const llvm::StructType* concreteRootType = getConcreteRootStructType(root); + for (const ViewObservation& obs : observations) + { + if (!obs.inst || obs.viewType == smallestViewType) + continue; + if (obs.accessOffsetBytes < smallestViewSize) + continue; + if (viewsLikelyCompatible(smallestViewType, obs.viewType, contextsByView, + concreteRootType)) + { + continue; + } + if (!emitted.insert(obs.inst).second) + continue; + + TypeConfusionIssue issue; + issue.funcName = function.getName().str(); + issue.filePath = getFunctionSourcePath(function); + issue.smallerViewType = structDisplayName(smallestViewType); + issue.accessedViewType = structDisplayName(obs.viewType); + issue.smallerViewSizeBytes = smallestViewSize; + issue.accessOffsetBytes = obs.accessOffsetBytes; + issue.inst = obs.inst; + issues.push_back(std::move(issue)); + } + } + } + + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/UninitializedVarAnalysis.cpp b/src/analysis/UninitializedVarAnalysis.cpp index 8bf645e..33221c2 100644 --- a/src/analysis/UninitializedVarAnalysis.cpp +++ b/src/analysis/UninitializedVarAnalysis.cpp @@ -1749,6 +1749,18 @@ namespace ctrace::stack::analysis return 0; } + static void markGuaranteedCtorOrSRetWriteOnPointerOperand( + const llvm::Value* ptrOperand, const TrackedObjectContext& tracked, + const llvm::DataLayout& DL, InitRangeState& initialized, llvm::BitVector* writeSeen, + FunctionSummary* currentSummary, bool requireKnownSize = false) + { + const std::uint64_t inferredSize = inferWriteSizeFromPointerOperand(ptrOperand, DL); + if (requireKnownSize && inferredSize == 0) + return; + markKnownWriteOnPointerOperand(ptrOperand, tracked, DL, initialized, writeSeen, + currentSummary, inferredSize); + } + static bool declarationCallReturnIsControlChecked(const llvm::CallBase& CB) { if (CB.getType()->isVoidTy()) @@ -1820,6 +1832,98 @@ namespace ctrace::stack::analysis return false; } + static llvm::StringRef canonicalExternalCalleeName(llvm::StringRef name) + { + if (!name.empty() && name.front() == '\1') + name = name.drop_front(); + if (name.starts_with("_")) + name = name.drop_front(); + + const std::size_t dollarPos = name.find('$'); + if (dollarPos != llvm::StringRef::npos) + name = name.take_front(dollarPos); + + return name; + } + + struct ExternalReadSinkSignature + { + llvm::StringRef sinkName; + unsigned pointerArgIndex = 0; + }; + + static std::optional + resolveExternalReadSinkSignature(const llvm::Function* callee) + { + if (!callee || !callee->isDeclaration() || callee->isIntrinsic()) + return std::nullopt; + + const llvm::StringRef calleeName = canonicalExternalCalleeName(callee->getName()); + if (calleeName == "write" || calleeName == "send" || calleeName == "sendto") + return ExternalReadSinkSignature{calleeName, 1u}; + + if (calleeName == "fwrite" || calleeName == "fwrite_unlocked") + return ExternalReadSinkSignature{calleeName, 0u}; + + return std::nullopt; + } + + struct ExternalReadSinkSpec + { + llvm::StringRef sinkName; + unsigned pointerArgIndex = 0; + std::uint64_t readSizeBytes = 0; + }; + + static std::optional + resolveExternalReadSinkSpec(const llvm::CallBase& CB, const llvm::Function* callee) + { + const std::optional signature = + resolveExternalReadSinkSignature(callee); + if (!signature) + return std::nullopt; + + if (signature->sinkName == "write" || signature->sinkName == "send" || + signature->sinkName == "sendto") + { + if (CB.arg_size() <= 2) + return std::nullopt; + + const auto* sizeConst = llvm::dyn_cast(CB.getArgOperand(2)); + if (!sizeConst) + return std::nullopt; + + const std::uint64_t size = sizeConst->getZExtValue(); + if (size == 0) + return std::nullopt; + + return ExternalReadSinkSpec{signature->sinkName, signature->pointerArgIndex, size}; + } + + if (signature->sinkName == "fwrite" || signature->sinkName == "fwrite_unlocked") + { + if (CB.arg_size() <= 2) + return std::nullopt; + + const auto* sizeConst = llvm::dyn_cast(CB.getArgOperand(1)); + const auto* countConst = llvm::dyn_cast(CB.getArgOperand(2)); + if (!sizeConst || !countConst) + return std::nullopt; + + const std::uint64_t elemSize = sizeConst->getZExtValue(); + const std::uint64_t count = countConst->getZExtValue(); + if (elemSize == 0 || count == 0) + return std::nullopt; + if (count > std::numeric_limits::max() / elemSize) + return std::nullopt; + + return ExternalReadSinkSpec{signature->sinkName, signature->pointerArgIndex, + elemSize * count}; + } + + return std::nullopt; + } + static bool isKnownMemsetLikeDeclarationArg(const llvm::Function& callee, unsigned argIdx) { if (argIdx != 0) @@ -1892,6 +1996,59 @@ namespace ctrace::stack::analysis isLikelyStatusOutParamDeclarationArg(CB, callee, argIdx); } + static llvm::StringRef normalizeDeclarationCalleeName(llvm::StringRef name) + { + if (!name.empty() && name.front() == '\1') + name = name.drop_front(); + if (name.starts_with("__builtin_")) + name = name.drop_front(10); + if (name.starts_with("builtin_")) + name = name.drop_front(8); + while (name.starts_with("_")) + name = name.drop_front(); + + const std::size_t dollarPos = name.find('$'); + if (dollarPos != llvm::StringRef::npos) + name = name.take_front(dollarPos); + return name; + } + + static bool isKnownUnboundedCStringWriteDeclArg(const llvm::Function& callee, + unsigned argIdx) + { + if (argIdx != 0) + return false; + + const llvm::StringRef name = normalizeDeclarationCalleeName(callee.getName()); + return name == "strcpy" || name == "strcpy_chk" || name == "strcat" || + name == "strcat_chk" || name == "stpcpy" || name == "stpcpy_chk" || + name == "gets" || name == "sprintf" || name == "vsprintf"; + } + + static bool callOriginLooksLikeCSource(const llvm::CallBase& CB) + { + const llvm::DebugLoc dl = CB.getDebugLoc(); + if (!dl) + return false; + const llvm::DILocalScope* scope = + llvm::dyn_cast_or_null(dl.getScope()); + if (!scope) + return false; + const llvm::DIFile* file = scope->getFile(); + if (!file) + { + const llvm::DISubprogram* sp = scope->getSubprogram(); + file = sp ? sp->getFile() : nullptr; + } + if (!file) + return false; + + std::string filename = file->getFilename().str(); + std::transform(filename.begin(), filename.end(), filename.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + return filename.size() >= 2 && filename.ends_with(".c"); + } + static bool declarationCallArgMayWriteThrough(const llvm::CallBase& CB, const llvm::Function* callee, unsigned argIdx) { @@ -1906,13 +2063,29 @@ namespace ctrace::stack::analysis if (!actual || !actual->getType()->isPointerTy()) return false; + // Known output sinks (write/send/fwrite families) consume this pointer as read-only. + if (const std::optional sink = + resolveExternalReadSinkSignature(callee)) + { + if (argIdx == sink->pointerArgIndex) + return false; + } + + // Keep unbounded C-string writes conservative: these calls do not provide a + // trustworthy destination length and should not silently suppress + // "never initialized" diagnostics. + if (isKnownUnboundedCStringWriteDeclArg(*callee, argIdx) && + callOriginLooksLikeCSource(CB)) + { + return false; + } + if (CB.paramHasAttr(argIdx, llvm::Attribute::ReadOnly) || CB.paramHasAttr(argIdx, llvm::Attribute::ReadNone)) { return false; } - if (CB.paramHasAttr(argIdx, llvm::Attribute::WriteOnly)) - return true; + const bool hasDirectWriteOnlyAttr = CB.paramHasAttr(argIdx, llvm::Attribute::WriteOnly); if (isKnownAlwaysWritingDeclarationArg(CB, *callee, argIdx)) return true; @@ -1921,7 +2094,8 @@ namespace ctrace::stack::analysis return false; if (!callee->getReturnType()->isVoidTy() && !declarationCallReturnIsControlChecked(CB)) { - return false; + if (!hasDirectWriteOnlyAttr) + return false; } if (argIdx >= callee->arg_size()) @@ -1933,12 +2107,71 @@ namespace ctrace::stack::analysis { return false; } - if (attrs.hasParamAttr(argIdx, llvm::Attribute::WriteOnly)) - return true; + const bool hasDeclWriteOnlyAttr = + attrs.hasParamAttr(argIdx, llvm::Attribute::WriteOnly); + if (hasDirectWriteOnlyAttr || hasDeclWriteOnlyAttr) + { + // C library headers differ across platforms (and fortify modes) in how + // aggressively they annotate pointer params as writeonly. For non-void + // unchecked-return APIs (e.g. strcpy/memcpy), avoid relying solely on + // these attrs to keep diagnostics stable across toolchains. + if (callee->getReturnType()->isVoidTy() || + declarationCallReturnIsControlChecked(CB)) + return true; + return false; + } return true; } + static bool tryApplyBoundedMemTransferDeclarationWriteEffects( + const llvm::CallBase& CB, const llvm::Function* callee, + const TrackedObjectContext& tracked, const llvm::DataLayout& DL, + InitRangeState& initialized, llvm::BitVector* writeSeen, + FunctionSummary* currentSummary) + { + if (!callee || !callee->isDeclaration()) + return false; + + const llvm::StringRef calleeName = normalizeDeclarationCalleeName(callee->getName()); + const bool isMemcpyLike = calleeName == "memcpy" || calleeName == "memcpy_chk" || + calleeName == "memmove" || calleeName == "memmove_chk"; + if (!isMemcpyLike) + return false; + if (CB.arg_size() <= 2) + return false; + + const auto* sizeConst = llvm::dyn_cast(CB.getArgOperand(2)); + if (!sizeConst) + return false; + const std::uint64_t writeSize = sizeConst->getZExtValue(); + if (writeSize == 0) + return false; + + const llvm::Value* dest = CB.getArgOperand(0); + unsigned objectIdx = 0; + std::uint64_t baseOffset = 0; + bool hasConstOffset = false; + if (!resolveTrackedObjectBase(dest, tracked, DL, objectIdx, baseOffset, + hasConstOffset) || + !hasConstOffset) + { + return false; + } + + const TrackedMemoryObject& obj = tracked.objects[objectIdx]; + if (obj.sizeBytes == 0) + return false; + if (baseOffset >= obj.sizeBytes) + return false; + if (writeSize > obj.sizeBytes - baseOffset) + return false; + + markKnownWriteOnPointerOperand(dest, tracked, DL, initialized, writeSeen, + currentSummary, writeSize); + return true; + } + static void applyExternalDeclarationCallWriteEffects(const llvm::CallBase& CB, const llvm::Function* callee, const TrackedObjectContext& tracked, @@ -1950,6 +2183,12 @@ namespace ctrace::stack::analysis if (!callee || !callee->isDeclaration()) return; + if (tryApplyBoundedMemTransferDeclarationWriteEffects( + CB, callee, tracked, DL, initialized, writeSeen, currentSummary)) + { + return; + } + for (unsigned argIdx = 0; argIdx < CB.arg_size(); ++argIdx) { if (!declarationCallArgMayWriteThrough(CB, callee, argIdx)) @@ -1962,6 +2201,55 @@ namespace ctrace::stack::analysis } } + static void applyExternalDeclarationCallReadEffects( + const llvm::CallBase& CB, const llvm::Function* callee, + const TrackedObjectContext& tracked, const llvm::DataLayout& DL, + const InitRangeState& initialized, llvm::BitVector* readBeforeInitSeen, + FunctionSummary* currentSummary, + std::vector* emittedIssues) + { + const std::optional sink = + resolveExternalReadSinkSpec(CB, callee); + if (!sink) + return; + if (sink->pointerArgIndex >= CB.arg_size()) + return; + + MemoryAccess access; + if (!resolveAccessFromPointer(CB.getArgOperand(sink->pointerArgIndex), + sink->readSizeBytes, tracked, DL, access)) + { + return; + } + + const TrackedMemoryObject& obj = tracked.objects[access.objectIdx]; + const bool isDefInit = isRangeCoveredRespectingNonPaddingLayout( + obj, initialized[access.objectIdx], access.begin, access.end); + if (isDefInit) + return; + + if (isAllocaObject(obj)) + { + if (readBeforeInitSeen && access.objectIdx < readBeforeInitSeen->size()) + readBeforeInitSeen->set(access.objectIdx); + + if (emittedIssues && shouldEmitAllocaIssue(obj)) + { + emittedIssues->push_back( + {CB.getFunction()->getName().str(), getTrackedObjectName(obj), &CB, 0, 0, + sink->sinkName.str(), + UninitializedLocalIssueKind::ExposedUninitializedBytesViaSink}); + } + return; + } + + if (currentSummary && obj.param) + { + addRange(getParamEffect(*currentSummary, *obj.param).readBeforeWriteRanges, + access.begin, access.end); + } + } + static bool unsummarizedDefinedCallArgMayWriteThrough(const llvm::CallBase& CB, const llvm::Function* callee, unsigned argIdx, @@ -2068,13 +2356,12 @@ namespace ctrace::stack::analysis const bool isSRet = CB.paramHasAttr(argIdx, llvm::Attribute::StructRet); const bool isCtorThis = isCtor && argIdx == 0; - const std::uint64_t inferredSize = inferWriteSizeFromPointerOperand(ptrOperand, DL); if (isSRet) { markConstructedOnPointerOperand(ptrOperand, tracked, DL, constructedSeen); - markKnownWriteOnPointerOperand(ptrOperand, tracked, DL, initialized, writeSeen, - currentSummary, inferredSize); + markGuaranteedCtorOrSRetWriteOnPointerOperand( + ptrOperand, tracked, DL, initialized, writeSeen, currentSummary); continue; } @@ -2085,8 +2372,8 @@ namespace ctrace::stack::analysis markConstructedOnPointerOperand(ptrOperand, tracked, DL, constructedSeen); if (isLikelyDefaultConstructorThisArg(CB, callee, argIdx)) markDefaultCtorOnPointerOperand(ptrOperand, tracked, DL, defaultCtorSeen); - markKnownWriteOnPointerOperand(ptrOperand, tracked, DL, initialized, writeSeen, - currentSummary, inferredSize); + markGuaranteedCtorOrSRetWriteOnPointerOperand( + ptrOperand, tracked, DL, initialized, writeSeen, currentSummary); } } } @@ -2124,17 +2411,16 @@ namespace ctrace::stack::analysis if (isSRet) { markConstructedOnPointerOperand(actual, tracked, DL, constructedSeen); - markKnownWriteOnPointerOperand(actual, tracked, DL, initialized, writeSeen, - currentSummary); + markGuaranteedCtorOrSRetWriteOnPointerOperand(actual, tracked, DL, initialized, + writeSeen, currentSummary); } else if (isCtorThis) { markConstructedOnPointerOperand(actual, tracked, DL, constructedSeen); if (isLikelyDefaultConstructorThisArg(CB, callee, argIdx)) markDefaultCtorOnPointerOperand(actual, tracked, DL, defaultCtorSeen); - const std::uint64_t inferredSize = inferWriteSizeFromPointerOperand(actual, DL); - markKnownWriteOnPointerOperand(actual, tracked, DL, initialized, writeSeen, - currentSummary, inferredSize); + markGuaranteedCtorOrSRetWriteOnPointerOperand(actual, tracked, DL, initialized, + writeSeen, currentSummary); } else if (unsummarizedDefinedCallArgMayWriteThrough( CB, callee, argIdx, hasMethodReceiverIdx, methodReceiverIdx)) @@ -2246,6 +2532,7 @@ namespace ctrace::stack::analysis const bool isCtorThis = isCtor && argIdx == 0; const bool isSRet = CB.paramHasAttr(argIdx, llvm::Attribute::StructRet); const bool isMethodReceiver = hasMethodReceiverIdx && argIdx == methodReceiverIdx; + const bool hasGuaranteedCtorOrSRetWrite = isCtorThis || isSRet; if (isCtorThis || isSRet || isMethodReceiver) { markConstructedOnPointerOperand(actual, tracked, DL, constructedSeen); @@ -2256,6 +2543,11 @@ namespace ctrace::stack::analysis const PointerParamEffectSummary& effect = calleeSummary.paramEffects[argIdx]; if (!effect.hasAnyEffect()) { + if (hasGuaranteedCtorOrSRetWrite) + { + markGuaranteedCtorOrSRetWriteOnPointerOperand( + actual, tracked, DL, initialized, writeSeen, currentSummary, true); + } continue; } @@ -2360,6 +2652,16 @@ namespace ctrace::stack::analysis } } + if (hasGuaranteedCtorOrSRetWrite) + { + // Constructors and sret out-params are modeled as producing a fully + // initialized destination object at call boundary. Keep this + // bounded to known-size operands to avoid over-marking unknown + // tail regions in the caller aggregate. + markGuaranteedCtorOrSRetWriteOnPointerOperand(actual, tracked, DL, initialized, + writeSeen, currentSummary, true); + } + if (!effect.pointerSlotWrites.empty()) { if (hasConstOffset) @@ -2588,8 +2890,8 @@ namespace ctrace::stack::analysis if (len && len->isZero()) return; - bool isInitWrite = - llvm::isa(MI) || llvm::isa(MI); + const bool isMemTransfer = llvm::isa(MI); + const bool isInitWrite = llvm::isa(MI) || isMemTransfer; if (!isInitWrite) return; @@ -2716,6 +3018,15 @@ namespace ctrace::stack::analysis if (resolveAccessFromPointer(MI->getDest(), writeSize, tracked, DL, access)) { const TrackedMemoryObject& obj = tracked.objects[access.objectIdx]; + if (isMemTransfer && obj.sizeBytes > 0 && + (access.end - access.begin) != writeSize) + { + // For memcpy/memmove, clipped ranges indicate a potential + // out-of-bounds length; do not mark full initialization from + // an unsafe transfer. + return; + } + addRange(initialized[access.objectIdx], access.begin, access.end); if (isAllocaObject(obj)) { @@ -2731,6 +3042,13 @@ namespace ctrace::stack::analysis } } + if (isMemTransfer) + { + // Unknown-size or unresolved memcpy/memmove destinations are not modeled + // as definitive initialization. + return; + } + unsigned objectIdx = 0; std::uint64_t offset = 0; bool hasConstOffset = false; @@ -2779,6 +3097,9 @@ namespace ctrace::stack::analysis { applyKnownCallWriteEffects(*CB, callee, tracked, DL, initialized, writeSeen, constructedSeen, defaultCtorSeen, currentSummary); + applyExternalDeclarationCallReadEffects(*CB, callee, tracked, DL, initialized, + readBeforeInitSeen, currentSummary, + emittedIssues); applyExternalDeclarationCallWriteEffects(*CB, callee, tracked, DL, initialized, writeSeen, currentSummary); applyUnsummarizedDefinedCallWriteEffects(*CB, callee, tracked, DL, initialized, diff --git a/src/analyzer/AnalysisPipeline.cpp b/src/analyzer/AnalysisPipeline.cpp index 8fcbce3..75cca2d 100644 --- a/src/analyzer/AnalysisPipeline.cpp +++ b/src/analyzer/AnalysisPipeline.cpp @@ -5,15 +5,21 @@ #include "analysis/AllocaUsage.hpp" #include "analysis/ConstParamAnalysis.hpp" +#include "analysis/CommandInjectionAnalysis.hpp" #include "analysis/DuplicateIfCondition.hpp" #include "analysis/DynamicAlloca.hpp" +#include "analysis/IntegerOverflowAnalysis.hpp" #include "analysis/InvalidBaseReconstruction.hpp" #include "analysis/MemIntrinsicOverflow.hpp" +#include "analysis/NullDerefAnalysis.hpp" +#include "analysis/OOBReadAnalysis.hpp" #include "analysis/ResourceLifetimeAnalysis.hpp" #include "analysis/SizeMinusKWrites.hpp" #include "analysis/StackBufferAnalysis.hpp" #include "analysis/StackComputation.hpp" #include "analysis/StackPointerEscape.hpp" +#include "analysis/TOCTOUAnalysis.hpp" +#include "analysis/TypeConfusionAnalysis.hpp" #include "analysis/UninitializedVarAnalysis.hpp" #include "passes/ModulePasses.hpp" @@ -130,11 +136,21 @@ namespace ctrace::stack::analyzer { return state.prepared->ctx.shouldAnalyze(F); }; const llvm::DataLayout& dataLayout = *state.prepared->ctx.dataLayout; const std::vector issues = - analysis::analyzeMemIntrinsicOverflows(state.mod, dataLayout, - shouldAnalyze); + analysis::analyzeMemIntrinsicOverflows( + state.mod, dataLayout, shouldAnalyze, + state.config.bufferModelPath); appendMemIntrinsicDiagnostics(state.result, issues); }}); + steps.push_back({"Integer overflows", [](PipelineData& state) + { + auto shouldAnalyze = [&](const llvm::Function& F) -> bool + { return state.prepared->ctx.shouldAnalyze(F); }; + const std::vector issues = + analysis::analyzeIntegerOverflows(state.mod, shouldAnalyze); + appendIntegerOverflowDiagnostics(state.result, issues); + }}); + steps.push_back({"Size-minus-k writes", [](PipelineData& state) { auto shouldAnalyze = [&](const llvm::Function& F) -> bool @@ -206,6 +222,54 @@ namespace ctrace::stack::analyzer appendConstParamDiagnostics(state.result, issues); }}); + steps.push_back({"Null pointer dereferences", [](PipelineData& state) + { + auto shouldAnalyze = [&](const llvm::Function& F) -> bool + { return state.prepared->ctx.shouldAnalyze(F); }; + const std::vector issues = + analysis::analyzeNullDereferences(state.mod, shouldAnalyze); + appendNullDerefDiagnostics(state.result, issues); + }}); + + steps.push_back({"Out-of-bounds reads", [](PipelineData& state) + { + auto shouldAnalyze = [&](const llvm::Function& F) -> bool + { return state.prepared->ctx.shouldAnalyze(F); }; + const llvm::DataLayout& dataLayout = *state.prepared->ctx.dataLayout; + const std::vector issues = + analysis::analyzeOOBReads(state.mod, dataLayout, shouldAnalyze); + appendOOBReadDiagnostics(state.result, issues); + }}); + + steps.push_back({"Command injection", [](PipelineData& state) + { + auto shouldAnalyze = [&](const llvm::Function& F) -> bool + { return state.prepared->ctx.shouldAnalyze(F); }; + const std::vector issues = + analysis::analyzeCommandInjection(state.mod, shouldAnalyze); + appendCommandInjectionDiagnostics(state.result, issues); + }}); + + steps.push_back({"TOCTOU", [](PipelineData& state) + { + auto shouldAnalyze = [&](const llvm::Function& F) -> bool + { return state.prepared->ctx.shouldAnalyze(F); }; + const std::vector issues = + analysis::analyzeTOCTOU(state.mod, shouldAnalyze); + appendTOCTOUDiagnostics(state.result, issues); + }}); + + steps.push_back({"Type confusion", [](PipelineData& state) + { + auto shouldAnalyze = [&](const llvm::Function& F) -> bool + { return state.prepared->ctx.shouldAnalyze(F); }; + const llvm::DataLayout& dataLayout = *state.prepared->ctx.dataLayout; + const std::vector issues = + analysis::analyzeTypeConfusions(state.mod, dataLayout, + shouldAnalyze); + appendTypeConfusionDiagnostics(state.result, issues); + }}); + steps.push_back({"Resource lifetime", [](PipelineData& state) { auto shouldAnalyze = [&](const llvm::Function& F) -> bool diff --git a/src/analyzer/DiagnosticEmitter.cpp b/src/analyzer/DiagnosticEmitter.cpp index 83319e3..ffcb50e 100644 --- a/src/analyzer/DiagnosticEmitter.cpp +++ b/src/analyzer/DiagnosticEmitter.cpp @@ -373,7 +373,11 @@ namespace ctrace::stack::analyzer else { builder.errCode(DescriptiveErrorCode::StackBufferOverflow); - body << "\t[ !!Warn ] potential stack buffer overflow on variable '" + const bool isGlobalStorage = + issue.storageClass == analysis::BufferStorageClass::Global; + body << "\t[ !!Warn ] potential " + << (isGlobalStorage ? "buffer overflow on global variable '" + : "stack buffer overflow on variable '") << issue.varName << "' (size " << issue.arraySize << ")\n"; if (!issue.aliasPath.empty()) body << "\t\t ↳ alias path: " << issue.aliasPath << "\n"; @@ -542,7 +546,16 @@ namespace ctrace::stack::analyzer << " potential stack buffer overflow in " << issue.intrinsicName << " on variable '" << issue.varName << "'\n"; body << "\t\t ↳ destination stack buffer size: " << issue.destSizeBytes << " bytes\n"; - body << "\t\t ↳ requested " << issue.lengthBytes << " bytes to be copied/initialized\n"; + if (issue.hasExplicitLength) + { + body << "\t\t ↳ requested " << issue.lengthBytes + << " bytes to be copied/initialized\n"; + } + else + { + body << "\t\t ↳ this API has no explicit size argument; " + "destination fit cannot be proven statically\n"; + } DiagnosticBuilder builder; builder.function(issue.funcName) @@ -589,6 +602,67 @@ namespace ctrace::stack::analyzer } } + void appendIntegerOverflowDiagnostics(AnalysisResult& result, + const std::vector& issues) + { + for (const auto& issue : issues) + { + const ResolvedLocation loc = resolveFromInstruction(issue.inst, true); + + DiagnosticBuilder builder; + builder.function(issue.funcName) + .filePath(issue.filePath) + .severity(DiagnosticSeverity::Warning) + .errCode(DescriptiveErrorCode::IntegerOverflow) + .location(loc) + .confidence(0.70); + + std::ostringstream body; + switch (issue.kind) + { + case analysis::IntegerOverflowIssueKind::ArithmeticInSizeComputation: + builder.ruleId("IntegerOverflow.SizeComputation").cwe("CWE-190"); + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " potential integer overflow in size computation before '" << issue.sinkName + << "'\n"; + body << kDiagIndentArrow << "operation: " << issue.operation << "\n"; + body << kDiagIndentArrow + << "overflowed size may under-allocate memory or make bounds checks unsound\n"; + break; + case analysis::IntegerOverflowIssueKind::SignedToUnsignedSize: + builder.ruleId("IntegerConversion.SignedToSize").cwe("CWE-195"); + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " potential signed-to-size conversion before '" << issue.sinkName << "'\n"; + body << kDiagIndentArrow + << "a possibly negative signed value is converted to an unsigned length\n"; + body + << kDiagIndentArrow + << "this can become a very large size value and trigger out-of-bounds access\n"; + break; + case analysis::IntegerOverflowIssueKind::TruncationInSizeComputation: + builder.ruleId("IntegerTruncation.SizeComputation").cwe("CWE-197"); + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " potential integer truncation in size computation before '" + << issue.sinkName << "'\n"; + body << kDiagIndentArrow + << "narrowing conversion may drop high bits and produce a smaller buffer " + "size\n"; + break; + case analysis::IntegerOverflowIssueKind::SignedArithmeticOverflow: + builder.ruleId("IntegerOverflow.SignedArithmetic").cwe("CWE-190"); + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " potential signed integer overflow in arithmetic operation\n"; + body << kDiagIndentArrow << "operation: " << issue.operation << "\n"; + body << kDiagIndentArrow + << "result is returned without a provable non-overflow bound\n"; + break; + } + + builder.message(body.str()); + result.diagnostics.push_back(builder.build()); + } + } + void appendMultipleStoreDiagnostics(AnalysisResult& result, const std::vector& issues) { @@ -672,6 +746,9 @@ namespace ctrace::stack::analyzer } std::ostringstream body; + std::string ruleId = "UninitializedLocalRead"; + std::string cwe = "CWE-457"; + double confidence = 0.90; if (issue.kind == analysis::UninitializedLocalIssueKind::ReadBeforeDefiniteInit) { body << "\t[ !!Warn ] potential read of uninitialized local variable '" @@ -689,8 +766,24 @@ namespace ctrace::stack::analyzer body << " in '" << issue.calleeName << "'"; body << "\n"; } + else if (issue.kind == + analysis::UninitializedLocalIssueKind::ExposedUninitializedBytesViaSink) + { + ruleId = "InformationExposure.UninitializedStackBytes"; + cwe = "CWE-200"; + confidence = 0.80; + body << "\t[ !!Warn ] potential information leak: local variable '" << issue.varName + << "' may expose uninitialized bytes through external sink"; + if (!issue.calleeName.empty()) + body << " '" << issue.calleeName << "'"; + body << "\n"; + body << "\t\t ↳ transmitted range is not fully initialized on all control-flow " + "paths\n"; + } else { + ruleId = "UninitializedLocalVariable"; + confidence = 0.75; body << "\t[ !!Warn ] local variable '" << issue.varName << "' is never initialized\n"; body << "\t\t ↳ declared without initializer and no definite write was found " @@ -706,17 +799,7 @@ namespace ctrace::stack::analyzer if (haveLoc) builder.lineColumn(line, column); - builder - .ruleId( - (issue.kind == analysis::UninitializedLocalIssueKind::ReadBeforeDefiniteInit || - issue.kind == - analysis::UninitializedLocalIssueKind::ReadBeforeDefiniteInitViaCall) - ? "UninitializedLocalRead" - : "UninitializedLocalVariable") - .confidence((issue.kind == analysis::UninitializedLocalIssueKind::NeverInitialized) - ? 0.75 - : 0.90) - .cwe("CWE-457"); + builder.ruleId(ruleId).confidence(confidence).cwe(cwe); result.diagnostics.push_back(builder.build()); } @@ -889,6 +972,197 @@ namespace ctrace::stack::analyzer } } + void + appendCommandInjectionDiagnostics(AnalysisResult& result, + const std::vector& issues) + { + for (const auto& issue : issues) + { + const ResolvedLocation loc = resolveFromInstruction(issue.inst, true); + + std::ostringstream body; + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " potential command injection: non-literal command reaches '" << issue.sinkName + << "'\n"; + body << kDiagIndentArrow + << "the command argument is not a compile-time string literal\n"; + body << kDiagIndentArrow + << "validate/sanitize external input or avoid shell command composition\n"; + + DiagnosticBuilder builder; + builder.function(issue.funcName) + .filePath(issue.filePath) + .severity(DiagnosticSeverity::Warning) + .errCode(DescriptiveErrorCode::CommandInjection) + .ruleId("CommandInjection.NonLiteralCommand") + .cwe("CWE-78") + .confidence(0.70) + .location(loc) + .message(body.str()); + result.diagnostics.push_back(builder.build()); + } + } + + void appendTOCTOUDiagnostics(AnalysisResult& result, + const std::vector& issues) + { + for (const auto& issue : issues) + { + const ResolvedLocation loc = resolveFromInstruction(issue.inst, true); + + std::ostringstream body; + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " potential TOCTOU race: path checked with '" << issue.checkApi + << "' then used with '" << issue.useApi << "'\n"; + body << kDiagIndentArrow + << "the file target may change between check and use operations\n"; + body << kDiagIndentArrow + << "prefer descriptor-based validation (open + fstat) on the same handle\n"; + + DiagnosticBuilder builder; + builder.function(issue.funcName) + .filePath(issue.filePath) + .severity(DiagnosticSeverity::Warning) + .errCode(DescriptiveErrorCode::TOCTOURace) + .ruleId("TOCTOU.PathCheckThenUse") + .cwe("CWE-367") + .confidence(0.70) + .location(loc) + .message(body.str()); + result.diagnostics.push_back(builder.build()); + } + } + + void appendNullDerefDiagnostics(AnalysisResult& result, + const std::vector& issues) + { + for (const auto& issue : issues) + { + const ResolvedLocation loc = resolveFromInstruction(issue.inst, true); + + DiagnosticSeverity severity = DiagnosticSeverity::Error; + double confidence = 0.75; + std::ostringstream body; + body << "\t" << prefixForSeverity(DiagnosticSeverity::Error) + << " potential null pointer dereference on '" << issue.pointerName << "'\n"; + + switch (issue.kind) + { + case analysis::NullDerefIssueKind::DirectNullPointer: + body << kDiagIndentArrow + << "the dereferenced pointer is directly null at this instruction\n"; + break; + case analysis::NullDerefIssueKind::NullBranchDereference: + body << kDiagIndentArrow + << "control flow proves pointer is null on this branch before dereference\n"; + break; + case analysis::NullDerefIssueKind::NullStoredInLocalSlot: + body << kDiagIndentArrow + << "a preceding local-slot store sets the pointer to null before use\n"; + break; + case analysis::NullDerefIssueKind::UncheckedAllocatorResult: + severity = DiagnosticSeverity::Warning; + confidence = 0.70; + body.str(""); + body.clear(); + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " potential null pointer dereference on '" << issue.pointerName << "'\n"; + body << kDiagIndentArrow + << "pointer comes from allocator return value and is dereferenced without a " + "provable null-check\n"; + break; + } + + DiagnosticBuilder builder; + builder.function(issue.funcName) + .filePath(issue.filePath) + .severity(severity) + .errCode(DescriptiveErrorCode::NullPointerDereference) + .ruleId("NullPointerDereference") + .cwe("CWE-476") + .confidence(confidence) + .location(loc) + .message(body.str()); + result.diagnostics.push_back(builder.build()); + } + } + + void appendTypeConfusionDiagnostics(AnalysisResult& result, + const std::vector& issues) + { + for (const auto& issue : issues) + { + const ResolvedLocation loc = resolveFromInstruction(issue.inst, true); + + std::ostringstream body; + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " potential type confusion: incompatible struct views on the same pointer\n"; + body << kDiagIndentArrow << "smaller observed view: '" << issue.smallerViewType << "' (" + << issue.smallerViewSizeBytes << " bytes)\n"; + body << kDiagIndentArrow << "accessed view: '" << issue.accessedViewType + << "' at byte offset " << issue.accessOffsetBytes << "\n"; + body << kDiagIndentArrow + << "field access may read/write outside the actual object layout\n"; + + DiagnosticBuilder builder; + builder.function(issue.funcName) + .filePath(issue.filePath) + .severity(DiagnosticSeverity::Warning) + .errCode(DescriptiveErrorCode::TypeConfusion) + .ruleId("TypeConfusion.IncompatibleStructView") + .cwe("CWE-843") + .confidence(0.65) + .location(loc) + .message(body.str()); + result.diagnostics.push_back(builder.build()); + } + } + + void appendOOBReadDiagnostics(AnalysisResult& result, + const std::vector& issues) + { + for (const auto& issue : issues) + { + const ResolvedLocation loc = resolveFromInstruction(issue.inst, true); + + DiagnosticBuilder builder; + builder.function(issue.funcName) + .filePath(issue.filePath) + .severity(DiagnosticSeverity::Warning) + .errCode(DescriptiveErrorCode::OutOfBoundsRead) + .location(loc) + .confidence(0.70) + .cwe("CWE-125"); + + std::ostringstream body; + if (issue.kind == analysis::OOBReadIssueKind::MissingNullTerminator) + { + builder.ruleId("OutOfBoundsRead.MissingNullTerminator"); + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " potential out-of-bounds read: string buffer '" << issue.bufferName + << "' may be missing a null terminator before '" << issue.apiName << "'\n"; + body << kDiagIndentArrow << "buffer size: " << issue.bufferSizeBytes + << " bytes, last write size: " << issue.writeSizeBytes << " bytes\n"; + body << kDiagIndentArrow + << "unterminated strings can make read APIs scan past buffer bounds\n"; + } + else + { + builder.ruleId("OutOfBoundsRead.HeapIndex"); + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " potential out-of-bounds read on heap buffer '" << issue.bufferName + << "' via unchecked index\n"; + body << kDiagIndentArrow << "inferred heap capacity: " << issue.capacityElements + << " element(s)\n"; + body << kDiagIndentArrow + << "index value is not proven to be within [0, capacity-1]\n"; + } + + builder.message(body.str()); + result.diagnostics.push_back(builder.build()); + } + } + void appendResourceLifetimeDiagnostics(AnalysisResult& result, const std::vector& issues) @@ -954,6 +1228,26 @@ namespace ctrace::stack::analyzer << "include callee definitions in inputs or extend --resource-model to " "improve precision\n"; break; + case analysis::ResourceLifetimeIssueKind::UseAfterRelease: + builder.severity(DiagnosticSeverity::Error) + .ruleId("ResourceLifetime.UseAfterRelease") + .cwe("CWE-416"); + body << "\t" << prefixForSeverity(DiagnosticSeverity::Error) + << " potential use-after-release: '" << issue.resourceKind << "' handle '" + << issue.handleName << "' is used after a release in this function\n"; + body << kDiagIndentArrow + << "a later dereference/call argument use may access invalid memory\n"; + break; + case analysis::ResourceLifetimeIssueKind::ReleasedHandleEscapes: + builder.severity(DiagnosticSeverity::Warning) + .ruleId("ResourceLifetime.ReleasedHandleEscapes") + .cwe("CWE-416"); + body << "\t" << prefixForSeverity(DiagnosticSeverity::Warning) + << " released handle derived from '" << issue.handleName + << "' may escape through a returned owner object\n"; + body << kDiagIndentArrow + << "caller-visible object may contain dangling pointer state\n"; + break; } builder.message(body.str()); diff --git a/src/app/AnalyzerApp.cpp b/src/app/AnalyzerApp.cpp index 60dd1db..8d0f4ab 100644 --- a/src/app/AnalyzerApp.cpp +++ b/src/app/AnalyzerApp.cpp @@ -431,6 +431,7 @@ struct LoadedInputModule std::string filename; std::unique_ptr context; std::unique_ptr module; + std::vector frontendDiagnostics; }; using AnalysisEntry = std::pair; @@ -730,7 +731,8 @@ static AppStatus analyzeWithSharedModuleLoading(const std::vector& loadErrors[index] = std::move(err); return; } - loadedModules[index] = {inputFilename, std::move(moduleContext), std::move(load.module)}; + loadedModules[index] = {inputFilename, std::move(moduleContext), std::move(load.module), + std::move(load.frontendDiagnostics)}; loadSucceeded[index] = 1; }; @@ -793,6 +795,11 @@ static AppStatus analyzeWithSharedModuleLoading(const std::vector& for (auto& loaded : loadedModules) { AnalysisResult result = analyzeModule(*loaded.module, cfg); + if (!loaded.frontendDiagnostics.empty()) + { + result.diagnostics.insert(result.diagnostics.end(), loaded.frontendDiagnostics.begin(), + loaded.frontendDiagnostics.end()); + } stampResultFilePaths(result, loaded.filename); const std::string emptyMsg = noFunctionMessage(result, loaded.filename, hasFilter); if (!emptyMsg.empty()) @@ -836,6 +843,12 @@ static AppStatus analyzeWithoutSharedModuleLoading(const std::vector kCandidates = { + static constexpr std::array kCandidates = { {{"-h", "-h"}, {"--help", "--help"}, {"--demangle", "--demangle"}, @@ -66,6 +66,7 @@ namespace ctrace::stack::cli {"--timing", "--timing"}, {"--resource-model", "--resource-model"}, {"--escape-model", "--escape-model"}, + {"--buffer-model", "--buffer-model"}, {"--resource-cross-tu", "--resource-cross-tu"}, {"--no-resource-cross-tu", "--no-resource-cross-tu"}, {"--uninitialized-cross-tu", "--uninitialized-cross-tu"}, @@ -739,6 +740,17 @@ namespace ctrace::stack::cli continue; } } + { + std::string value; + std::string error; + if (consumeLongOptionValue(argStr, "--buffer-model", i, argc, argv, value, error)) + { + if (!error.empty()) + return makeError(error); + cfg.bufferModelPath = std::move(value); + continue; + } + } if (argStr == "--resource-cross-tu") { cfg.resourceCrossTU = true; diff --git a/test/alloca/recursive-controlled-alloca.c b/test/alloca/recursive-controlled-alloca.c index f08a6c7..4c1e634 100644 --- a/test/alloca/recursive-controlled-alloca.c +++ b/test/alloca/recursive-controlled-alloca.c @@ -24,3 +24,6 @@ int main(void) { return rec(4); } + +// at line 20, column 14 +// [ !!Warn ] potential signed integer overflow in arithmetic operation diff --git a/test/alloca/recursive-infinite-alloca.c b/test/alloca/recursive-infinite-alloca.c index 3a1d8b7..7d14cf4 100644 --- a/test/alloca/recursive-infinite-alloca.c +++ b/test/alloca/recursive-infinite-alloca.c @@ -27,3 +27,6 @@ int main(void) boom(32); return 0; } + +// at line 21, column 22 +// [!!!Error] unconditional self recursion detected (no base case) diff --git a/test/bound-storage/bound-storage-for-statement.c b/test/bound-storage/bound-storage-for-statement.c index 58cd9f6..a4c2449 100644 --- a/test/bound-storage/bound-storage-for-statement.c +++ b/test/bound-storage/bound-storage-for-statement.c @@ -54,4 +54,4 @@ int main(void) // ↳ size does not appear user-controlled but remains runtime-dependent // [!Info!] multiple stores to stack buffer 'test' in this function (4 store instruction(s), 4 distinct index expression(s)) -// [!Info!] stores use different index expressions; verify indices are correct and non-overlapping \ No newline at end of file +// [!Info!] stores use different index expressions; verify indices are correct and non-overlapping diff --git a/test/bound-storage/bound-storage-if-statement.c b/test/bound-storage/bound-storage-if-statement.c index be6b045..3c18a5a 100644 --- a/test/bound-storage/bound-storage-if-statement.c +++ b/test/bound-storage/bound-storage-if-statement.c @@ -27,7 +27,7 @@ int main(void) // at line 34, column 18 // [ !!Warn ] potential stack buffer overflow on variable 'test1' (size 10) // ↳ alias path: test1 - // ↳ index variable may go up to 10 (array last valid index: 9) + // ↳ index variable may go up to 11 (array last valid index: 9) // ↳ (this is a write access) char test2[10]; if (i > 10) @@ -35,3 +35,6 @@ int main(void) return 0; } + +// at line 32, column 1 +// [ !!Warn ] local variable 'test2' is never initialized diff --git a/test/bound-storage/bound-storage.c b/test/bound-storage/bound-storage.c index c45d016..5f7614b 100644 --- a/test/bound-storage/bound-storage.c +++ b/test/bound-storage/bound-storage.c @@ -24,3 +24,7 @@ int main(void) return 0; } + +// at line 21, column 5 +// [ !!Warn ] potential UB: invalid base reconstruction via offsetof/container_of +// [!!!Error] derived pointer points OUTSIDE the valid object range diff --git a/test/bound-storage/global-array-overflow.c b/test/bound-storage/global-array-overflow.c new file mode 100644 index 0000000..6e8be3e --- /dev/null +++ b/test/bound-storage/global-array-overflow.c @@ -0,0 +1,20 @@ +int gbuf[10]; + +int main(void) +{ + int i = 0; + for (; i < 10; ++i) + { + gbuf[i] = i; + } + + // at line 17, column 13 + // [ !!Warn ] potential buffer overflow on global variable 'gbuf' (size 10) + // ↳ alias path: gbuf + // ↳ index variable may go up to 10 (array last valid index: 9) + // ↳ (this is a write access) + // ↳ [info] this access appears unreachable at runtime (condition is always false for this branch) + gbuf[i] = i; + + return gbuf[0]; +} diff --git a/test/bound-storage/indirection-profonde-aliasing.c b/test/bound-storage/indirection-profonde-aliasing.c index 0d4b6bb..5291866 100644 --- a/test/bound-storage/indirection-profonde-aliasing.c +++ b/test/bound-storage/indirection-profonde-aliasing.c @@ -56,3 +56,10 @@ int main(void) char buf[n]; // alloca variable return 0; } + +// at line 56, column 5 +// [ !!Warn ] dynamic alloca on the stack for variable 'vla' + +// at line 13, column 5 +// [ !!Warn ] potential UB: invalid base reconstruction via offsetof/container_of +// [!!!Error] derived pointer points OUTSIDE the valid object range diff --git a/test/bound-storage/ne-narrowing-unsound.c b/test/bound-storage/ne-narrowing-unsound.c new file mode 100644 index 0000000..1b3f890 --- /dev/null +++ b/test/bound-storage/ne-narrowing-unsound.c @@ -0,0 +1,21 @@ +void ne_must_not_narrow_range(int i) +{ + char buf[200]; + + // at line 13, column 20 + // [ !!Warn ] potential stack buffer overflow on variable 'buf' (size 200) + // ↳ alias path: buf + // ↳ index variable may go up to 301 (array last valid index: 199) + // ↳ (this is a write access) + if (i > 300) + { + if (i != 100) + buf[i] = 1; + } +} + +int main(void) +{ + ne_must_not_narrow_range(400); + return 0; +} diff --git a/test/bound-storage/ranges_test.c b/test/bound-storage/ranges_test.c index c15e499..47f3f95 100644 --- a/test/bound-storage/ranges_test.c +++ b/test/bound-storage/ranges_test.c @@ -187,7 +187,7 @@ void alias_lb_ub(int i) // at line 194, column 14 // [!!] potential negative index on variable 'buf' (size 10) - // ↳ alias path: p -> arraydecay -> buf + // ↳ alias path: buf -> arraydecay -> p // ↳ inferred lower bound for index expression: -2 (index may be < 0) // ↳ (this is a write access) if (i >= -2 && i <= 12) diff --git a/test/coretrace-legacy/AvoidDefaultArgumentsOnVirtualMethods.hh b/test/coretrace-legacy/AvoidDefaultArgumentsOnVirtualMethods.hh new file mode 100644 index 0000000..b43f8c6 --- /dev/null +++ b/test/coretrace-legacy/AvoidDefaultArgumentsOnVirtualMethods.hh @@ -0,0 +1,23 @@ +class Base +{ + public: + virtual void func(int x = 10) + { + printf("Base: %d\n", x); + } // Argument par défaut dans une méthode virtuelle. +}; + +class Derived : public Base +{ + public: + void func(int x) override + { + printf("Derived: %d\n", x); + } +}; + +int main(void) +{ + Base* b = new Derived(); + b->func(); // Comportement ambigu : l'argument par défaut de Base est utilisé. +} diff --git a/test/coretrace-legacy/DestructorOfVirtualClass.hh b/test/coretrace-legacy/DestructorOfVirtualClass.hh new file mode 100644 index 0000000..94d9c5b --- /dev/null +++ b/test/coretrace-legacy/DestructorOfVirtualClass.hh @@ -0,0 +1,19 @@ +class Base +{ + public: + ~Base() {} // Destructeur non virtuel dans une classe de base avec des classes dérivées. +}; + +class Derived : public Base +{ + public: + ~Derived() + { /* Libération de ressources */ + } +}; + +int main() +{ + Base* obj = new Derived(); + delete obj; // Fuite mémoire : le destructeur de Derived n'est pas appelé. +} diff --git a/test/coretrace-legacy/EmptyForStatement.cc b/test/coretrace-legacy/EmptyForStatement.cc new file mode 100644 index 0000000..99b5d1a --- /dev/null +++ b/test/coretrace-legacy/EmptyForStatement.cc @@ -0,0 +1,10 @@ +#include + +int main() +{ + for (int i = 0; i < 10; i++) + ; // Boucle vide : le point-virgule termine la boucle immédiatement. + { + printf("Hello\n"); + } +} diff --git a/test/coretrace-legacy/RedundantIfStatement.c b/test/coretrace-legacy/RedundantIfStatement.c new file mode 100644 index 0000000..b96632b --- /dev/null +++ b/test/coretrace-legacy/RedundantIfStatement.c @@ -0,0 +1,14 @@ +#include + +bool isPositive(int x_test) +{ + if (x_test > 0) + { + return true; // Condition redondante : peut être simplifiée. + } + else + { + return false; + } + // Mieux : return x > 0; +} diff --git a/test/coretrace-legacy/RedundantIfStatement.cc b/test/coretrace-legacy/RedundantIfStatement.cc new file mode 100644 index 0000000..7969a93 --- /dev/null +++ b/test/coretrace-legacy/RedundantIfStatement.cc @@ -0,0 +1,12 @@ +bool isPositive(int x_test) +{ + if (x_test > 0) + { + return true; // Condition redondante : peut être simplifiée. + } + else + { + return false; + } + // Mieux : return x > 0; +} diff --git a/test/coretrace-legacy/bad_function_pointer.cc b/test/coretrace-legacy/bad_function_pointer.cc new file mode 100644 index 0000000..6ffda92 --- /dev/null +++ b/test/coretrace-legacy/bad_function_pointer.cc @@ -0,0 +1,20 @@ +#include + +typedef void (*FuncPtr)(int); + +void myFunction(int x) +{ + printf("Value: %d\n", x); +} + +void anotherFunction(double y) +{ + printf("Double value: %f\n", y); +} + +int main() +{ + FuncPtr fptr = (FuncPtr)anotherFunction; // Mauvaise conversion de type + fptr(42); // Appel incorrect : un int est interprété comme un double + return 0; +} diff --git a/test/coretrace-legacy/bound_index.cc b/test/coretrace-legacy/bound_index.cc new file mode 100644 index 0000000..9795151 --- /dev/null +++ b/test/coretrace-legacy/bound_index.cc @@ -0,0 +1,20 @@ +#include + +int a[10]; + +int main(int argc, char* argv[]) +{ + size_t i = 0; + for (; i < 10; i++) + { + a[i] = i; + } + a[i] = i; + printf("%i", a[i]); +} + +// at line 12, column 10 +// [ !!Warn ] potential buffer overflow on global variable 'a' (size 10) + +// at line 13, column 18 +// [ !!Warn ] potential buffer overflow on global variable 'a' (size 10) diff --git a/test/coretrace-legacy/buffer_overflow.cc b/test/coretrace-legacy/buffer_overflow.cc new file mode 100644 index 0000000..9396932 --- /dev/null +++ b/test/coretrace-legacy/buffer_overflow.cc @@ -0,0 +1,27 @@ +#include +#include + +void vulnerable_function(char* input) +{ + char buffer[8]; // Un petit buffer de 8 octets + printf("Adresse du buffer : %p\n", (void*)buffer); + + // Copie de l'input sans vérification -> peut provoquer un buffer overflow + strcpy(buffer, input); + + printf("Contenu du buffer : %s\n", buffer); +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) + { + printf("Usage: %s \n", argv[0]); + return 1; + } + + vulnerable_function(argv[1]); + + printf("Programme terminé normalement !\n"); + return 0; +} diff --git a/test/coretrace-legacy/dead_code.cc b/test/coretrace-legacy/dead_code.cc new file mode 100644 index 0000000..63d0e40 --- /dev/null +++ b/test/coretrace-legacy/dead_code.cc @@ -0,0 +1,52 @@ +int compute(int x) +{ + int y = x + 1; + if (x > 0) + { + return x * 2; + } + return x * 3; // Code mort : jamais atteint car toutes les branches retournent avant. +} + +int over_run_compute(int x) +{ + int y = x + 1; + if (x > 0) + { + return x * 2; // Code mort : jamais atteint car toutes les branches retournent avant. + } + return x * 3; +} + +int single_compute(void) +{ + int x = 5; + + if (x > 0) + { + return 2; // Code mort : jamais atteint car toutes les branches retournent avant. + } + return 3; +} + +int main() +{ + int a = 5; + int b = compute(a); + a = -5; + int c = over_run_compute(a); + int d = single_compute(); + return b + c + d; +} + +// at line 6, column 18 +// [ !!Warn ] potential signed integer overflow in arithmetic operation + +// at line 8, column 14 +// [ !!Warn ] potential signed integer overflow in arithmetic operation + +// at line 16, column 18 +// [ !!Warn ] potential signed integer overflow in arithmetic operation + +// at line 18, column 14 +// [ !!Warn ] potential signed integer overflow in arithmetic operation diff --git a/test/coretrace-legacy/double_free.c b/test/coretrace-legacy/double_free.c new file mode 100644 index 0000000..7370462 --- /dev/null +++ b/test/coretrace-legacy/double_free.c @@ -0,0 +1,14 @@ +// double_free.c +#include + +int main(void) +{ + char* ptr = (char*)malloc(10); // Allocation de 10 octets + if (ptr == NULL) + return 1; // Vérification basique + + free(ptr); // Première libération + free(ptr); // Deuxième libération (double free) + + return 0; +} diff --git a/test/coretrace-legacy/format_problem.c b/test/coretrace-legacy/format_problem.c new file mode 100644 index 0000000..89f0efe --- /dev/null +++ b/test/coretrace-legacy/format_problem.c @@ -0,0 +1,21 @@ +#include + +void print_input(char* input) +{ + printf(input); +} + +int main() +{ + char user_input[100]; + printf("Entrez une chaîne : "); + fgets(user_input, 100, stdin); + print_input(user_input); + return 0; +} + +// at line 5, column 12 +// [ !!Warn ] non-literal format string may allow format injection + +// at line 10, column 1 +// [ !!Warn ] local variable 'user_input' is never initialized diff --git a/test/coretrace-legacy/if_collapse.c b/test/coretrace-legacy/if_collapse.c new file mode 100644 index 0000000..1520fe1 --- /dev/null +++ b/test/coretrace-legacy/if_collapse.c @@ -0,0 +1,14 @@ +int main() +{ + int iii = 0, jjj = 1; + + if (jjj) + { + if (iii) + { + jjj = 0; + return 1; + } + } + return 0; +} diff --git a/test/coretrace-legacy/if_constant_expr.c b/test/coretrace-legacy/if_constant_expr.c new file mode 100644 index 0000000..4eb067d --- /dev/null +++ b/test/coretrace-legacy/if_constant_expr.c @@ -0,0 +1,11 @@ +int main() +{ + const int c_test = 1; + + if (c_test) + { + return 1; + } + + return 0; +} diff --git a/test/coretrace-legacy/null_pointer.c b/test/coretrace-legacy/null_pointer.c new file mode 100644 index 0000000..30c33a5 --- /dev/null +++ b/test/coretrace-legacy/null_pointer.c @@ -0,0 +1,27 @@ +// null_pointer_subtle.c +#include + +int toto(void) +{ + int* ptr = NULL; // Pointeur initialisé à NULL + *ptr = 42; // Déréférencement de pointeur nul + + return 0; +} + +int main(void) +{ + int* ptr = (int*)malloc(sizeof(int)); // Allocation qui peut échouer + + if (ptr != NULL) + { + *ptr = 10; // Accès valide si l'allocation réussit + } + *ptr = 42; // Déréférencement potentiel de NULL si malloc échoue + free(ptr); + toto(); + return 0; +} + +// at line 7, column 10 +// [!!!Error] potential null pointer dereference on '' diff --git a/test/coretrace-legacy/partitioning.cc b/test/coretrace-legacy/partitioning.cc new file mode 100644 index 0000000..a839a0f --- /dev/null +++ b/test/coretrace-legacy/partitioning.cc @@ -0,0 +1,53 @@ +// init_with_error.c +#include + +int* xxx() +{ + return (int*)malloc(sizeof(int)); // Allocation réussie ou NULL +} + +int yyy(int* ptr) +{ + if (ptr == NULL) + return -1; // Erreur si ptr est NULL + *ptr = 42; //Ascending; // Déréférencement potentiel de NULL + return 0; +} + +int zzz(int* ptr) +{ + *ptr = 100; // Déréférencement potentiel de NULL + return 0; +} + +int init(void) +{ + int* ptr = xxx(); // Peut retourner NULL + if (ptr == NULL) + { + return -1; // Erreur dans xxx + } + + int status = yyy(ptr); + if (status < 0) + { + free(ptr); + return -2; // Erreur dans yyy + } + + zzz(ptr); + free(ptr); + return 0; // Succès +} + +int main(void) +{ + int result = init(); + if (result == 0) + { + return 1; + } + return 0; +} + +// not working \ No newline at end of file diff --git a/test/coretrace-legacy/pointer_comparison_analysis.cc b/test/coretrace-legacy/pointer_comparison_analysis.cc new file mode 100644 index 0000000..1cfdc95 --- /dev/null +++ b/test/coretrace-legacy/pointer_comparison_analysis.cc @@ -0,0 +1,40 @@ +// pointer_cmp.c +#include +#include + +void bad_comparison(int* p, int* q) +{ + if (p < q) + { // Comparaison entre pointeurs d'objets différents + printf("p < q\n"); + } + else + { + printf("p >= q\n"); + } +} + +int main(void) +{ + int* p = (int*)malloc(sizeof(int)); // Première allocation + int* q = (int*)malloc(sizeof(int)); // Deuxième allocation (objet différent) + + if (p == NULL || q == NULL) + return 1; // Vérification basique + + *p = 10; + *q = 20; + + if (p < q) + { // Comparaison entre pointeurs d'objets différents + printf("p < q\n"); + } + else + { + printf("p >= q\n"); + } + bad_comparison(p, q); + free(p); + free(q); + return 0; +} diff --git a/test/coretrace-legacy/pointer_overflow.cc b/test/coretrace-legacy/pointer_overflow.cc new file mode 100644 index 0000000..71042a1 --- /dev/null +++ b/test/coretrace-legacy/pointer_overflow.cc @@ -0,0 +1,16 @@ +#include + +int main(void) +{ + int* array = (int*)malloc(5 * sizeof(int)); + if (array == NULL) + return 1; + int* ptr = array; + for (int i = 0; i < 10; i++) + { + ptr = ptr + 1; // Déborde après i = 5 + *ptr = i; + } + free(array); + return 0; +} diff --git a/test/coretrace-legacy/too_many_methods.cc b/test/coretrace-legacy/too_many_methods.cc new file mode 100644 index 0000000..663616d --- /dev/null +++ b/test/coretrace-legacy/too_many_methods.cc @@ -0,0 +1,15 @@ +class MyClass +{ + public: + void method1() {} + void method2() {} + void method3() {} + void method4() {} + void method5() {} + void method6() {} + void method7() {} + void method8() {} + void method9() {} + void method10() {} + // Trop de méthodes : la classe devient difficile à maintenir. +}; \ No newline at end of file diff --git a/test/coretrace-legacy/unaligned_dereferencing.cc b/test/coretrace-legacy/unaligned_dereferencing.cc new file mode 100644 index 0000000..2794760 --- /dev/null +++ b/test/coretrace-legacy/unaligned_dereferencing.cc @@ -0,0 +1,13 @@ +// unaligned.c +#include + +int main(void) +{ + char buffer[8] = {0}; // Buffer de 8 octets + char* ptr = buffer + 1; // Pointe vers une adresse non alignée (offset de 1) + int* unaligned = (int*)ptr; // Conversion en pointeur int (taille 4 octets) + + *unaligned = 42; // Déréférencement non aligné + printf("Value: %d\n", *unaligned); + return 0; +} diff --git a/test/cpy-buffer/bad-usage-memcpy.c b/test/cpy-buffer/bad-usage-memcpy.c index 7bf472e..c8a431a 100644 --- a/test/cpy-buffer/bad-usage-memcpy.c +++ b/test/cpy-buffer/bad-usage-memcpy.c @@ -22,3 +22,9 @@ int main(void) // // at line 5, column 1 // // [ !!Warn ] local variable 'buf' is never initialized // // ↳ declared without initializer and no definite write was found in this function + +// at line 6, column 5 +// [ !!Warn ] potential stack buffer overflow in memcpy on variable 'buf' + +// at line 5, column 1 +// [ !!Warn ] local variable 'buf' is never initialized diff --git a/test/cpy-buffer/bad-usage-memset.c b/test/cpy-buffer/bad-usage-memset.c index 864966c..38d9dab 100644 --- a/test/cpy-buffer/bad-usage-memset.c +++ b/test/cpy-buffer/bad-usage-memset.c @@ -29,4 +29,7 @@ int main(void) // at line 7, column 0 // [!]ConstParameterNotModified.Pointer: parameter 'src' in function 'foo' is never used to modify the pointed object // current type: char *src -// suggested type: const char *src \ No newline at end of file +// suggested type: const char *src + +// at line 10, column 5 +// [ !!Warn ] potential stack buffer overflow in memset on variable 'buf' diff --git a/test/cpy-buffer/memcpy-name-false-positive.c b/test/cpy-buffer/memcpy-name-false-positive.c new file mode 100644 index 0000000..4ba1be6 --- /dev/null +++ b/test/cpy-buffer/memcpy-name-false-positive.c @@ -0,0 +1,16 @@ +#include + +void telemetry_memcpy(char* dst, const char* src, size_t n) +{ + (void)src; + (void)n; + dst[0] = 'X'; +} + +void run_name_false_positive_case(const char* src) +{ + char buf[8] = {0}; + telemetry_memcpy(buf, src, 999); +} + +// not contains: potential stack buffer overflow in telemetry_memcpy diff --git a/test/cpy-buffer/models/custom-wrapper-buffer-model.txt b/test/cpy-buffer/models/custom-wrapper-buffer-model.txt new file mode 100644 index 0000000..602a9d8 --- /dev/null +++ b/test/cpy-buffer/models/custom-wrapper-buffer-model.txt @@ -0,0 +1,5 @@ +# Wrapper-specific rules used by regression fixtures. + +bounded_write custom_memcpy_wrapper 0 2 +bounded_write custom_memset_wrapper 0 2 +bounded_write custom_memmove_wrapper 0 2 diff --git a/test/cpy-buffer/name-contains-memintrinsics.c b/test/cpy-buffer/name-contains-memintrinsics.c index 02adee0..bd4c558 100644 --- a/test/cpy-buffer/name-contains-memintrinsics.c +++ b/test/cpy-buffer/name-contains-memintrinsics.c @@ -1,5 +1,7 @@ #include +// buffer-model: test/cpy-buffer/models/custom-wrapper-buffer-model.txt + extern void custom_memcpy_wrapper(void* dst, const void* src, size_t n); extern void custom_memset_wrapper(void* dst, int value, size_t n); extern void custom_memmove_wrapper(void* dst, const void* src, size_t n); @@ -7,8 +9,8 @@ extern void custom_memmove_wrapper(void* dst, const void* src, size_t n); void test_memcpy_name(const char* src) { char buf[8]; - // at line 14, column 5 - // [ !!Warn ] potential stack buffer overflow in memcpy on variable 'buf' + // at line 16, column 5 + // [ !!Warn ] potential stack buffer overflow in custom_memcpy_wrapper on variable 'buf' // ↳ destination stack buffer size: 8 bytes // ↳ requested 16 bytes to be copied/initialized custom_memcpy_wrapper(buf, src, 16); @@ -17,8 +19,8 @@ void test_memcpy_name(const char* src) void test_memset_name(void) { char buf[10]; - // at line 24, column 5 - // [ !!Warn ] potential stack buffer overflow in memset on variable 'buf' + // at line 26, column 5 + // [ !!Warn ] potential stack buffer overflow in custom_memset_wrapper on variable 'buf' // ↳ destination stack buffer size: 10 bytes // ↳ requested 24 bytes to be copied/initialized custom_memset_wrapper(buf, 0, 24); @@ -27,8 +29,8 @@ void test_memset_name(void) void test_memmove_name(const char* src) { char buf[12]; - // at line 34, column 5 - // [ !!Warn ] potential stack buffer overflow in memmove on variable 'buf' + // at line 36, column 5 + // [ !!Warn ] potential stack buffer overflow in custom_memmove_wrapper on variable 'buf' // ↳ destination stack buffer size: 12 bytes // ↳ requested 20 bytes to be copied/initialized custom_memmove_wrapper(buf, src, 20); diff --git a/test/cpy-buffer/unbounded-strcpy-model.c b/test/cpy-buffer/unbounded-strcpy-model.c new file mode 100644 index 0000000..f02db45 --- /dev/null +++ b/test/cpy-buffer/unbounded-strcpy-model.c @@ -0,0 +1,24 @@ +#include + +// buffer-model: models/buffer-overflow/generic.txt + +void foo(char* src) +{ + char buf[8]; + + // at line 13, column 5 + // [ !!Warn ] potential stack buffer overflow in strcpy on variable 'buf' + // ↳ destination stack buffer size: 8 bytes + // ↳ this API has no explicit size argument; destination fit cannot be proven statically + strcpy(buf, src); +} + +int main(void) +{ + char src[16] = "0123456789abcdef"; + foo(src); + return 0; +} + +// at line 7, column 1 +// [ !!Warn ] local variable 'buf' is never initialized diff --git a/test/escape-stack/indirect-callback-unknown-target-with-safe-candidate.c b/test/escape-stack/indirect-callback-unknown-target-with-safe-candidate.c index 5a94571..293edfc 100644 --- a/test/escape-stack/indirect-callback-unknown-target-with-safe-candidate.c +++ b/test/escape-stack/indirect-callback-unknown-target-with-safe-candidate.c @@ -19,3 +19,6 @@ void call_unknown_callback(cb_t cb) // at line 16, column 5 // [ !!Warn ] stack pointer escape: address of variable 'buf' escapes this function // ↳ address passed as argument to an indirect call (callback may capture the pointer beyond this function) + +// at line 15, column 1 +// [ !!Warn ] local variable 'buf' is never initialized diff --git a/test/escape-stack/return-stack-address-through-pointer-slot.c b/test/escape-stack/return-stack-address-through-pointer-slot.c index 396b73b..47d9a54 100644 --- a/test/escape-stack/return-stack-address-through-pointer-slot.c +++ b/test/escape-stack/return-stack-address-through-pointer-slot.c @@ -8,3 +8,6 @@ char* leak_through_slot(void) // at line 5, column 5 // [ !!Warn ] stack pointer escape: address of variable 'buf' escapes this function // ↳ escape via return statement (pointer to stack returned to caller) + +// at line 3, column 1 +// [ !!Warn ] local variable 'buf' is never initialized diff --git a/test/escape-stack/stack_escape.c b/test/escape-stack/stack_escape.c index 8afa3ce..6bb2153 100644 --- a/test/escape-stack/stack_escape.c +++ b/test/escape-stack/stack_escape.c @@ -71,3 +71,36 @@ int main(void) store_in_global_field(); return 0; } + +// at line 13, column 1 +// [ !!Warn ] local variable 'buf' is never initialized + +// at line 14, column 5 +// [ !!Warn ] stack pointer escape: address of variable 'buf' escapes this function + +// at line 19, column 1 +// [ !!Warn ] local variable 'buf' is never initialized + +// at line 20, column 11 +// [ !!Warn ] stack pointer escape: address of variable 'buf' escapes this function + +// at line 25, column 1 +// [ !!Warn ] local variable 'buf' is never initialized + +// at line 26, column 9 +// [ !!Warn ] stack pointer escape: address of variable 'buf' escapes this function + +// at line 31, column 1 +// [ !!Warn ] local variable 'buf' is never initialized + +// at line 32, column 10 +// [ !!Warn ] stack pointer escape: address of variable 'buf' escapes this function + +// at line 43, column 1 +// [ !!Warn ] local variable 'buf' is never initialized + +// at line 44, column 5 +// [ !!Warn ] stack pointer escape: address of variable 'buf' escapes this function + +// at line 57, column 1 +// [ !!Warn ] local variable 'buf' is never initialized diff --git a/test/escape-stack/virtual-strategy-local-no-escape.cpp b/test/escape-stack/virtual-strategy-local-no-escape.cpp index bdd45c6..3a62837 100644 --- a/test/escape-stack/virtual-strategy-local-no-escape.cpp +++ b/test/escape-stack/virtual-strategy-local-no-escape.cpp @@ -128,3 +128,6 @@ int runWithStrategies(int mode) // not contains: stack pointer escape: address of variable 'plan' escapes this function // not contains: stack pointer escape: address of variable 'results' escapes this function // not contains: stack pointer escape: address of variable 'executionStatus' escapes this function + +// at line 76, column 49 +// [ !!Warn ] potential signed integer overflow in arithmetic operation diff --git a/test/false-positif/unique_ptr_state.cpp b/test/false-positif/unique_ptr_state.cpp index 8f957cd..593b408 100644 --- a/test/false-positif/unique_ptr_state.cpp +++ b/test/false-positif/unique_ptr_state.cpp @@ -22,3 +22,6 @@ int main(void) } // not contains: stack pointer escape + +// at line 13, column 1 +// [ !!Warn ] local variable 'buf' is never initialized diff --git a/test/false-positive-repro/stb-like-next-code-uninitialized.c b/test/false-positive-repro/stb-like-next-code-uninitialized.c index 4cb2c9b..5e1a79d 100644 --- a/test/false-positive-repro/stb-like-next-code-uninitialized.c +++ b/test/false-positive-repro/stb-like-next-code-uninitialized.c @@ -55,3 +55,9 @@ int fp_stb_like_next_code_uninitialized(Huff* z, const u8* sizelist, int num) // at line 22, column 9 // [ !!Warn ] potential read of uninitialized local variable 'sizes' + +// at line 30, column 24 +// [ !!Warn ] potential read of uninitialized local variable 'sizes' + +// at line 33, column 14 +// [ !!Warn ] potential read of uninitialized local variable 'sizes' diff --git a/test/files/Makefile b/test/files/Makefile new file mode 100644 index 0000000..4dcb14d --- /dev/null +++ b/test/files/Makefile @@ -0,0 +1,4 @@ +.PHONY: all analyze scan-build asan clean + +all analyze scan-build asan clean: + $(MAKE) -C ../security $@ diff --git a/test/files/README.md b/test/files/README.md new file mode 100644 index 0000000..9d11d87 --- /dev/null +++ b/test/files/README.md @@ -0,0 +1,10 @@ +# Legacy location + +The numbered security fixtures were reorganized by vulnerability type under: + +`test/security/` + +See: + +- `test/security/README.md` +- `test/security/Makefile` diff --git a/test/integer-overflow/cross-tu-tricky-def.c b/test/integer-overflow/cross-tu-tricky-def.c new file mode 100644 index 0000000..2230d53 --- /dev/null +++ b/test/integer-overflow/cross-tu-tricky-def.c @@ -0,0 +1,59 @@ +#include +#include +#include + +int io_cross_signed_overflow(int a, int b, int gate1, int gate2) +{ + if (gate1) + { + if (gate2) + { + return a + b; + } + } + return a; +} + +void io_cross_truncation_alloc(int cond) +{ + unsigned long big = 0x1FFFFFFFFUL; + unsigned int small = cond ? (unsigned int)big : 32u; + char* buf = (char*)malloc((size_t)small); + if (buf) + { + free(buf); + } +} + +void io_cross_signed_to_size_copy(int len, int gate_outer, int gate_inner) +{ + char dst[32]; + for (int i = 0; i < 2; ++i) + { + if (gate_outer) + { + if (((i & 1) == 0) && gate_inner) + { + memcpy(dst, "BBBB", (size_t)len); + } + } + } +} + +// at line 8, column 22 +// [ !!Warn ] potential signed integer overflow in arithmetic operation +// ↳ operation: add +// ↳ result is returned without a provable non-overflow bound + +// at line 17, column 25 +// [ !!Warn ] potential integer truncation in size computation before 'malloc' +// ↳ narrowing conversion may drop high bits and produce a smaller buffer size + +// at line 28, column 17 +// [ !!Warn ] potential signed-to-size conversion before 'memcpy' +// ↳ a possibly negative signed value is converted to an unsigned length +// ↳ this can become a very large size value and trigger out-of-bounds access + +// at line 24, column 1 +// [ !!Warn ] local variable 'dst' is never initialized +// ↳ declared without initializer and no definite write was found in this function diff --git a/test/integer-overflow/cross-tu-tricky-use.c b/test/integer-overflow/cross-tu-tricky-use.c new file mode 100644 index 0000000..7517460 --- /dev/null +++ b/test/integer-overflow/cross-tu-tricky-use.c @@ -0,0 +1,20 @@ +#include + +int io_cross_signed_overflow(int a, int b, int gate1, int gate2); +void io_cross_truncation_alloc(int cond); +void io_cross_signed_to_size_copy(int len, int gate_outer, int gate_inner); + +int io_cross_driver(int n) +{ + int acc = 0; + for (int i = 0; i < n; ++i) + { + if ((i % 2) == 0) + { + acc += io_cross_signed_overflow(INT_MAX, 1, 1, 1); + io_cross_truncation_alloc(1); + io_cross_signed_to_size_copy(-1, 1, 1); + } + } + return acc; +} diff --git a/test/local-storage/c/stack-exhaustion-large-frame.c b/test/local-storage/c/stack-exhaustion-large-frame.c index 4680fe9..882ed31 100644 --- a/test/local-storage/c/stack-exhaustion-large-frame.c +++ b/test/local-storage/c/stack-exhaustion-large-frame.c @@ -12,3 +12,7 @@ int main(void) return 0; } + +// at line 11, column 1 +// [ !!Warn ] local variable 'test' is never initialized +// ↳ declared without initializer and no definite write was found in this function diff --git a/test/pointer_reference-const_correctness/const-mixed.c b/test/pointer_reference-const_correctness/const-mixed.c index 9db0f95..6a9de57 100644 --- a/test/pointer_reference-const_correctness/const-mixed.c +++ b/test/pointer_reference-const_correctness/const-mixed.c @@ -2,8 +2,10 @@ #include #include #include +#include +#include -// at line 10, column 0 +// at line 11, column 0 // [ !Info! ] ConstParameterNotModified.Pointer: parameter 'values' in function 'print_sum' is never used to modify the pointed object // ↳ current type: int *values // ↳ suggested type: const int *values @@ -27,12 +29,12 @@ void read_data(char* const buffer) printf("%s\n", buffer); } -// at line 39, column 0 +// at line 40, column 0 // [ !Info! ] ConstParameterNotModified.Pointer: parameter 'a' in function 'get_max' is never used to modify the pointed object // ↳ current type: int *a // ↳ suggested type: const int *a -// at line 39, column 0 +// at line 40, column 0 // [ !Info! ] ConstParameterNotModified.Pointer: parameter 'b' in function 'get_max' is never used to modify the pointed object // ↳ current type: int *b // ↳ suggested type: const int *b @@ -46,12 +48,12 @@ struct Point int x, y; }; -// at line 58, column 0 +// at line 59, column 0 // [ !Info! ] ConstParameterNotModified.Pointer: parameter 'p1' in function 'distance' is never used to modify the pointed object // ↳ current type: Point *p1 // ↳ suggested type: const Point *p1 -// at line 58, column 0 +// at line 59, column 0 // [ !Info! ] ConstParameterNotModified.Pointer: parameter 'p2' in function 'distance' is never used to modify the pointed object // ↳ current type: Point *p2 // ↳ suggested type: const Point *p2 @@ -59,3 +61,20 @@ int distance(struct Point* p1, struct Point* p2) { return abs(p1->x - p2->x) + abs(p1->y - p2->y); } + +int distancePatched(const struct Point* p1, const struct Point* p2) +{ + int64_t dx = (int64_t)p1->x - (int64_t)p2->x; + int64_t dy = (int64_t)p1->y - (int64_t)p2->y; + + uint64_t manhattan = (uint64_t)llabs(dx) + (uint64_t)llabs(dy); + if (manhattan > (uint64_t)INT_MAX) + return INT_MAX; // policy: saturation + + return (int)manhattan; +} + +// at line 61, column 31 +// [ !!Warn ] potential signed integer overflow in arithmetic operation +// ↳ operation: add +// ↳ result is returned without a provable non-overflow bound diff --git a/test/resource-lifetime/cxa-demangle-balanced-no-incomplete.cpp b/test/resource-lifetime/cxa-demangle-balanced-no-incomplete.cpp new file mode 100644 index 0000000..8455da1 --- /dev/null +++ b/test/resource-lifetime/cxa-demangle-balanced-no-incomplete.cpp @@ -0,0 +1,15 @@ +extern "C" char* __cxa_demangle(const char* mangled, char* outputBuffer, unsigned long* length, + int* status); +extern "C" void free(void*); + +// resource-model: models/resource-lifetime/generic.txt +int resource_lifetime_cxa_demangle_balanced_no_incomplete(const char* symbol) +{ + int status = 0; + char* demangled = __cxa_demangle(symbol, nullptr, nullptr, &status); + free(demangled); + return status; +} + +// not contains: inter-procedural resource analysis incomplete +// not contains: potential double release diff --git a/test/resource-lifetime/external-wrapper-known-no-acquire-double-release.c b/test/resource-lifetime/external-wrapper-known-no-acquire-double-release.c index f0b20c8..3c31a88 100644 --- a/test/resource-lifetime/external-wrapper-known-no-acquire-double-release.c +++ b/test/resource-lifetime/external-wrapper-known-no-acquire-double-release.c @@ -18,3 +18,6 @@ int external_wrapper_known_no_acquire_double_release(void) // at line 15, column 5 // [!!!Error] potential double release: 'GenericHandle' handle 'h' is released without a matching acquire in this function + +// at line 15, column 20 +// [ !!Warn ] potential read of uninitialized local variable 'h' diff --git a/test/resource-lifetime/external-wrapper-unknown-out-no-double-release.c b/test/resource-lifetime/external-wrapper-unknown-out-no-double-release.c index cb9b33a..bda3c46 100644 --- a/test/resource-lifetime/external-wrapper-unknown-out-no-double-release.c +++ b/test/resource-lifetime/external-wrapper-unknown-out-no-double-release.c @@ -21,3 +21,6 @@ int external_wrapper_unknown_out_no_double_release(void) // not contains: potential double release: 'GenericHandle' handle 'h' is released without a matching acquire in this function // at line 17, column 5 // [ !!Warn ] inter-procedural resource analysis incomplete: handle 'h' may be acquired by an unmodeled/external callee before release + +// at line 17, column 20 +// [ !!Warn ] potential read of uninitialized local variable 'h' diff --git a/test/resource-lifetime/external-wrapper-unknown-ref-out-no-double-release.cpp b/test/resource-lifetime/external-wrapper-unknown-ref-out-no-double-release.cpp index 3c64598..404e445 100644 --- a/test/resource-lifetime/external-wrapper-unknown-ref-out-no-double-release.cpp +++ b/test/resource-lifetime/external-wrapper-unknown-ref-out-no-double-release.cpp @@ -20,3 +20,9 @@ int external_wrapper_unknown_ref_out_no_double_release() } // not contains: potential double release: 'GenericHandle' handle 'h' is released without a matching acquire in this function + +// at line 18, column 20 +// [ !!Warn ] potential read of uninitialized local variable 'h' + +// at line 18, column 5 +// [ !!Warn ] inter-procedural resource analysis incomplete: handle 'h' may be acquired by an unmodeled/external callee before release diff --git a/test/resource-lifetime/local-non-escaping-no-incomplete.cpp b/test/resource-lifetime/local-non-escaping-no-incomplete.cpp new file mode 100644 index 0000000..8032327 --- /dev/null +++ b/test/resource-lifetime/local-non-escaping-no-incomplete.cpp @@ -0,0 +1,18 @@ +typedef void* handle_t; +extern handle_t acquire_handle(void); +extern void release_handle(handle_t); + +// resource-model: models/resource-lifetime/generic.txt +int local_non_escaping_no_incomplete(void) +{ + handle_t h = acquire_handle(); + // local_copy is never passed to any call; its address never escapes. + // isNonEscapingLocalObject should prove this immediately. + handle_t local_copy = h; + release_handle(h); + (void)local_copy; + return 0; +} + +// not contains: inter-procedural resource analysis incomplete +// not contains: potential double release diff --git a/test/resource-lifetime/new-double-delete.cpp b/test/resource-lifetime/new-double-delete.cpp index 9eb4ee2..e087e18 100644 --- a/test/resource-lifetime/new-double-delete.cpp +++ b/test/resource-lifetime/new-double-delete.cpp @@ -9,3 +9,6 @@ int new_double_delete() // at line 6, column 5 // [!!!Error] potential double release: 'CppHeap' handle 'p' is released without a matching acquire in this function + +// at line 6, column 12 +// [!!!Error] potential use-after-release: 'CppHeap' handle 'p' is used after a release in this function diff --git a/test/resource-lifetime/nocapture-local-handle-no-incomplete.cpp b/test/resource-lifetime/nocapture-local-handle-no-incomplete.cpp new file mode 100644 index 0000000..5c17d22 --- /dev/null +++ b/test/resource-lifetime/nocapture-local-handle-no-incomplete.cpp @@ -0,0 +1,21 @@ +typedef void* handle_t; +extern handle_t acquire_handle(void); +extern void release_handle(handle_t); + +// External function that only reads the handle value without capturing it. +// The byval/nocapture-like lowering depends on the target; here we rely on +// the pointer being passed by value (not by address) so the local slot +// that holds it does not escape. +extern int inspect_handle(handle_t h); + +// resource-model: models/resource-lifetime/generic.txt +int nocapture_local_handle_no_incomplete(void) +{ + handle_t h = acquire_handle(); + int status = inspect_handle(h); + release_handle(h); + return status; +} + +// not contains: inter-procedural resource analysis incomplete +// not contains: potential double release diff --git a/test/resource-lifetime/release-without-acquire-still-errors.cpp b/test/resource-lifetime/release-without-acquire-still-errors.cpp index 7486103..d3070db 100644 --- a/test/resource-lifetime/release-without-acquire-still-errors.cpp +++ b/test/resource-lifetime/release-without-acquire-still-errors.cpp @@ -14,3 +14,7 @@ int resource_lifetime_release_without_acquire_still_errors(VkDevice device) // not contains: inter-procedural resource analysis incomplete: handle 'stagingBuffer' // at line 10, column 5 // [!!!Error] potential double release: 'VkBuffer' handle 'stagingBuffer' is released without a matching acquire in this function + +// at line 10, column 29 +// [ !!Warn ] potential read of uninitialized local variable 'stagingBuffer' +// ↳ this load may execute before any definite initialization on all control-flow paths diff --git a/test/resource-lifetime/summary-release-aggregate-field-no-incomplete.cpp b/test/resource-lifetime/summary-release-aggregate-field-no-incomplete.cpp new file mode 100644 index 0000000..d42d5a0 --- /dev/null +++ b/test/resource-lifetime/summary-release-aggregate-field-no-incomplete.cpp @@ -0,0 +1,26 @@ +extern "C" void* malloc(unsigned long); +extern "C" void free(void*); + +struct Container +{ + void* buffer; + int size; +}; + +static void destroy_container(Container* c) +{ + free(c->buffer); +} + +// resource-model: models/resource-lifetime/generic.txt +int aggregate_local_from_summary_no_incomplete(void) +{ + Container c; + c.buffer = malloc(64); + c.size = 64; + destroy_container(&c); + return 0; +} + +// not contains: inter-procedural resource analysis incomplete: handle 'c' +// not contains: potential double release diff --git a/test/resource-lifetime/summary-release-aggregate-local-no-incomplete.cpp b/test/resource-lifetime/summary-release-aggregate-local-no-incomplete.cpp new file mode 100644 index 0000000..154ccf3 --- /dev/null +++ b/test/resource-lifetime/summary-release-aggregate-local-no-incomplete.cpp @@ -0,0 +1,22 @@ +extern "C" void free(void*); + +struct Holder +{ + void* ptr; +}; + +static void release_holder(Holder* holder) +{ + free(holder->ptr); +} + +// resource-model: models/resource-lifetime/generic.txt +int resource_lifetime_summary_release_aggregate_local_no_incomplete(void) +{ + Holder holder = {nullptr}; + release_holder(&holder); + return 0; +} + +// not contains: inter-procedural resource analysis incomplete: handle 'holder' +// not contains: potential double release: 'HeapAlloc' handle 'holder' diff --git a/test/security/Makefile b/test/security/Makefile new file mode 100644 index 0000000..d79c2fd --- /dev/null +++ b/test/security/Makefile @@ -0,0 +1,49 @@ +# ============================================================================= +# Makefile — Security fixture corpus (grouped by vulnerability type) +# ============================================================================= + +CC ?= gcc +CLANG ?= clang +CFLAGS ?= -Wall -Wextra -Wpedantic -g -std=c11 +ASANFLAGS ?= -fsanitize=address,undefined -fno-omit-frame-pointer + +SRCS := $(shell find . -type f -name '*.c' | sort) +BINS := $(patsubst %.c,build/%,$(SRCS)) +ASAN_BINS := $(patsubst %.c,build/%_asan,$(SRCS)) + +.PHONY: all analyze scan-build asan clean + +all: $(BINS) + +build/%: %.c + @mkdir -p $(dir $@) + $(CC) $(CFLAGS) -o $@ $< + +analyze: + @echo "=========================================" + @echo " Clang Static Analyzer (security fixtures)" + @echo "=========================================" + @for f in $(SRCS); do \ + echo "\n--- Analyzing $$f ---"; \ + $(CLANG) --analyze -Xanalyzer -analyzer-checker=core,unix,deadcode,security \ + -Xanalyzer -analyzer-output=text $$f 2>&1 || true; \ + done + +scan-build: + @echo "=========================================" + @echo " scan-build (security fixtures)" + @echo "=========================================" + @for f in $(SRCS); do \ + echo "\n--- scan-build $$f ---"; \ + scan-build $(CC) $(CFLAGS) -c $$f -o /dev/null 2>&1 || true; \ + done + +asan: $(ASAN_BINS) + +build/%_asan: %.c + @mkdir -p $(dir $@) + $(CC) $(CFLAGS) $(ASANFLAGS) -o $@ $< + +clean: + rm -rf build + rm -f ./*.plist diff --git a/test/security/README.md b/test/security/README.md new file mode 100644 index 0000000..2b16772 --- /dev/null +++ b/test/security/README.md @@ -0,0 +1,74 @@ +# Security Fixture Corpus + +This corpus groups the numbered security fixtures by vulnerability type. + +## Layout by type + +``` +buffer-overflow/ + 01_buffer_overflow.c + +command-injection/ + 08_command_injection.c + +format-string/ + 02_format_string.c + +integer-overflow/ + 04_integer_overflow.c + 17_integer_overflow_advanced.c + +memory-leak/ + 09_memory_leak.c + +null-dereference/ + 05_null_deref.c + 16_null_deref_nested.c + +oob-read/ + 12_oob_read.c + +sizeof-pitfall/ + 15_sizeof_pitfall.c + +stack-escape/ + 11_return_local.c + +toctou/ + 07_toctou.c + +type-confusion/ + 10_type_confusion.c + +uninitialized/ + 06_uninitialized.c + +unsafe-functions/ + 13_unsafe_functions.c + +use-after-free/ + 03_use_after_free.c + 18_use_after_free_advanced.c + +variadic-mismatch/ + 14_variadic_mismatch.c +``` + +## Quick usage + +```bash +# from repository root +python3 run_test.py + +# from this directory +make all +make analyze +make asan +make clean +``` + +## Notes + +- `run_test.py` now applies strict warning/error expectation count checks by + default across all fixture files under `test/` (this corpus included). +- Legacy `test/files` remains as a compatibility shim. diff --git a/test/security/buffer-overflow/01_buffer_overflow.c b/test/security/buffer-overflow/01_buffer_overflow.c new file mode 100644 index 0000000..9f7cb75 --- /dev/null +++ b/test/security/buffer-overflow/01_buffer_overflow.c @@ -0,0 +1,91 @@ +/** + * 01 - BUFFER OVERFLOWS (CWE-120, CWE-121, CWE-122, CWE-193) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=address 01_buffer_overflow.c -o 01_test + * Analyze: clang --analyze 01_buffer_overflow.c + */ + +#include +#include +#include + +/* 1a. Stack buffer overflow classique */ +void vuln_stack_bof(const char* input) +{ + char buf[16]; + strcpy(buf, input); /* CWE-120: pas de vérification de taille */ + printf("buf = %s\n", buf); +} + +/* 1b. Heap buffer overflow (off-by-one) */ +void vuln_heap_bof(size_t n) +{ + int* arr = (int*)malloc(n * sizeof(int)); + if (!arr) + return; + for (size_t i = 0; i <= n; i++) + { /* CWE-122: off-by-one, i <= n */ + arr[i] = (int)i; + } + free(arr); +} + +/* 1c. Buffer overflow via sprintf */ +void vuln_sprintf_bof(int user_id, const char* username) +{ + char log_entry[64]; + sprintf(log_entry, "User %d: %s logged in at ...", user_id, username); + /* CWE-120: sprintf ne vérifie pas la taille */ + puts(log_entry); +} + +/* 1d. Off-by-one dans une boucle */ +void vuln_off_by_one(void) +{ + char buf[10]; + for (int i = 0; i <= 10; i++) + { /* CWE-193: écrit buf[10] hors limites */ + buf[i] = 'A'; + } + buf[9] = '\0'; + puts(buf); +} + +int main(void) +{ + printf("=== 01: Buffer Overflow Tests ===\n"); + vuln_stack_bof("AAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + vuln_heap_bof(8); + vuln_sprintf_bof(1, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + vuln_off_by_one(); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 15, column 5 +// [ !!Warn ] potential stack buffer overflow in __strcpy_chk on variable 'buf' +// ↳ destination stack buffer size: 16 bytes +// ↳ this API has no explicit size argument; destination fit cannot be proven statically + +// at line 14, column 1 +// [ !!Warn ] local variable 'buf' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 21, column 23 +// [ !!Warn ] potential integer overflow in size computation before 'malloc' +// ↳ operation: mul +// ↳ overflowed size may under-allocate memory or make bounds checks unsound + +// at line 31, column 1 +// [ !!Warn ] local variable 'log_entry' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 41, column 16 +// [ !!Warn ] potential stack buffer overflow on variable 'buf' (size 10) +// ↳ alias path: buf +// ↳ index variable may go up to 10 (array last valid index: 9) +// ↳ (this is a write access) diff --git a/test/security/command-injection/08_command_injection.c b/test/security/command-injection/08_command_injection.c new file mode 100644 index 0000000..aa922d3 --- /dev/null +++ b/test/security/command-injection/08_command_injection.c @@ -0,0 +1,61 @@ +/** + * 08 - COMMAND INJECTION (CWE-78) + * + * Compile: gcc -Wall -Wextra -g 08_command_injection.c -o 08_test + * Analyze: clang --analyze 08_command_injection.c + */ + +#include +#include +#include + +/* 8a. Injection via system() */ +void vuln_command_injection(const char* filename) +{ + char cmd[256]; + snprintf(cmd, sizeof(cmd), "cat %s", filename); + system(cmd); /* CWE-78: filename peut contenir "; rm -rf /" */ +} + +/* 8b. Injection via popen() */ +void vuln_popen_injection(const char* host) +{ + char cmd[256]; + snprintf(cmd, sizeof(cmd), "ping -c 1 %s", host); + FILE* fp = popen(cmd, "r"); /* CWE-78: host = "8.8.8.8; cat /etc/shadow" */ + if (fp) + { + char buf[512]; + while (fgets(buf, sizeof(buf), fp)) + printf("%s", buf); + pclose(fp); + } +} + +int main(void) +{ + printf("=== 08: Command Injection Tests ===\n"); + /* Exemples inoffensifs pour la démonstration */ + vuln_command_injection("/etc/hostname"); + vuln_popen_injection("127.0.0.1"); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 14, column 1 +// [ !!Warn ] local variable 'cmd' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 16, column 5 +// [ !!Warn ] potential command injection: non-literal command reaches 'system' +// ↳ the command argument is not a compile-time string literal +// ↳ validate/sanitize external input or avoid shell command composition + +// at line 23, column 16 +// [ !!Warn ] potential command injection: non-literal command reaches 'popen' +// ↳ the command argument is not a compile-time string literal +// ↳ validate/sanitize external input or avoid shell command composition diff --git a/test/security/format-string/02_format_string.c b/test/security/format-string/02_format_string.c new file mode 100644 index 0000000..cb5ca50 --- /dev/null +++ b/test/security/format-string/02_format_string.c @@ -0,0 +1,52 @@ +/** + * 02 - FORMAT STRING (CWE-134) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=address 02_format_string.c -o 02_test + * Analyze: clang --analyze 02_format_string.c + */ + +#include +#include + +/* 2a. Format string directe */ +void vuln_format_string(const char* user_input) +{ + printf(user_input); /* CWE-134: l'attaquant contrôle le format */ +} + +/* 2b. Format string via snprintf + fprintf */ +void vuln_format_string_log(const char* msg) +{ + char buf[256]; + snprintf(buf, sizeof(buf), msg); /* CWE-134: format contrôlé */ + fprintf(stderr, buf); /* CWE-134: double vulnérabilité */ +} + +int main(void) +{ + printf("=== 02: Format String Tests ===\n"); + vuln_format_string("%x %x %x %x\n"); + vuln_format_string_log("Hello %s%s%s%s\n"); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 13, column 12 +// [ !!Warn ] non-literal format string may allow format injection +// ↳ clang: format string is not a string literal (potentially insecure) + +// at line 18, column 1 +// [ !!Warn ] local variable 'buf' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 19, column 32 +// [ !!Warn ] non-literal format string may allow format injection +// ↳ clang: format string is not a string literal (potentially insecure) + +// at line 20, column 21 +// [ !!Warn ] non-literal format string may allow format injection +// ↳ clang: format string is not a string literal (potentially insecure) diff --git a/test/security/integer-overflow/04_integer_overflow.c b/test/security/integer-overflow/04_integer_overflow.c new file mode 100644 index 0000000..dcb1c1a --- /dev/null +++ b/test/security/integer-overflow/04_integer_overflow.c @@ -0,0 +1,94 @@ +/** + * 04 - INTEGER OVERFLOW / UNDERFLOW (CWE-190, CWE-191, CWE-195, CWE-197) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=undefined 04_integer_overflow.c -o 04_test + * Analyze: clang --analyze 04_integer_overflow.c + */ + +#include +#include +#include +#include + +/* 4a. Integer overflow menant à un petit malloc */ +void vuln_integer_overflow_alloc(unsigned int count, unsigned int elem_size) +{ + unsigned int total = count * elem_size; /* CWE-190: overflow silencieux */ + char* buf = (char*)malloc(total); + if (!buf) + return; + memset(buf, 0, count * elem_size); /* écrit plus que total si overflow */ + free(buf); +} + +/* 4b. Signed integer overflow (UB) */ +int vuln_signed_overflow(int a, int b) +{ + return a + b; /* CWE-190: undefined behavior si overflow */ +} + +/* 4c. Troncature implicite lors d'un cast */ +void vuln_truncation(void) +{ + unsigned long big = 0x1FFFFFFFF; + unsigned int small = (unsigned int)big; /* CWE-197: perte de bits hauts */ + char* buf = (char*)malloc(small); /* allocation trop petite */ + if (buf) + { + memset(buf, 'A', big); /* heap overflow massif */ + free(buf); + } +} + +/* 4d. Signedness mismatch : int négatif → size_t énorme */ +void vuln_signedness(int len) +{ + char buf[100]; + if (len > 100) + return; /* semble sûr... */ + /* mais len négatif passe la vérif et est converti en size_t énorme */ + memcpy(buf, "AAAA", (size_t)len); /* CWE-195 */ +} + +int main(void) +{ + printf("=== 04: Integer Overflow Tests ===\n"); + vuln_integer_overflow_alloc(0x40000001, 4); + printf("signed overflow: %d\n", vuln_signed_overflow(INT_MAX, 1)); + /* vuln_truncation(); -- dangereux à exécuter */ + vuln_signedness(-1); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 16, column 25 +// [ !!Warn ] potential integer overflow in size computation before 'malloc' +// ↳ operation: mul +// ↳ overflowed size may under-allocate memory or make bounds checks unsound + +// at line 18, column 5 +// [ !!Warn ] potential integer overflow in size computation before 'memset' +// ↳ operation: mul +// ↳ overflowed size may under-allocate memory or make bounds checks unsound + +// at line 24, column 14 +// [ !!Warn ] potential signed integer overflow in arithmetic operation +// ↳ operation: add +// ↳ result is returned without a provable non-overflow bound + +// at line 31, column 25 +// [ !!Warn ] potential integer truncation in size computation before 'malloc' +// ↳ narrowing conversion may drop high bits and produce a smaller buffer size + +// at line 43, column 5 +// [ !!Warn ] potential signed-to-size conversion before 'memcpy' +// ↳ a possibly negative signed value is converted to an unsigned length +// ↳ this can become a very large size value and trigger out-of-bounds access + +// at line 40, column 1 +// [ !!Warn ] local variable 'buf' is never initialized +// ↳ declared without initializer and no definite write was found in this function diff --git a/test/security/integer-overflow/17_integer_overflow_advanced.c b/test/security/integer-overflow/17_integer_overflow_advanced.c new file mode 100644 index 0000000..955dfc1 --- /dev/null +++ b/test/security/integer-overflow/17_integer_overflow_advanced.c @@ -0,0 +1,78 @@ +/** + * 17 - ADVANCED INTEGER OVERFLOW CASES (nested/if/loop/tricky) + */ + +#include +#include +#include + +/* 17a. Signed overflow in nested if branch returned directly */ +int vuln_signed_overflow_nested_if(int a, int b, int gate1, int gate2) +{ + if (gate1) + { + if (gate2) + { + return a + b; + } + } + return 0; +} + +/* 17b. Truncation hidden in select expression before allocation */ +void vuln_truncation_select_tricky(int cond) +{ + unsigned long big = 0x1FFFFFFFFUL; + unsigned int small = cond ? (unsigned int)big : 64u; + char* buf = (char*)malloc((size_t)small); + if (buf) + { + free(buf); + } +} + +/* 17c. Signed-to-size conversion in nested loop and nested if */ +void vuln_signed_to_size_nested_loop(int len, int n) +{ + char dst[64]; + for (int i = 0; i < n; ++i) + { + if ((i & 1) == 0) + { + if (len <= 64) + { + memcpy(dst, "AAAA", (size_t)len); + } + } + } +} + +int main(void) +{ + (void)vuln_signed_overflow_nested_if(2147483647, 1, 1, 1); + vuln_truncation_select_tricky(1); + vuln_signed_to_size_nested_loop(-1, 2); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 13, column 22 +// [ !!Warn ] potential signed integer overflow in arithmetic operation +// ↳ operation: add +// ↳ result is returned without a provable non-overflow bound + +// at line 23, column 25 +// [ !!Warn ] potential integer truncation in size computation before 'malloc' +// ↳ narrowing conversion may drop high bits and produce a smaller buffer size + +// at line 35, column 17 +// [ !!Warn ] potential signed-to-size conversion before 'memcpy' +// ↳ a possibly negative signed value is converted to an unsigned length +// ↳ this can become a very large size value and trigger out-of-bounds access + +// at line 31, column 1 +// [ !!Warn ] local variable 'dst' is never initialized diff --git a/test/security/memory-leak/09_memory_leak.c b/test/security/memory-leak/09_memory_leak.c new file mode 100644 index 0000000..81b37fc --- /dev/null +++ b/test/security/memory-leak/09_memory_leak.c @@ -0,0 +1,95 @@ +/** + * 09 - MEMORY LEAKS (CWE-401) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=leak 09_memory_leak.c -o 09_test + * Analyze: clang --analyze 09_memory_leak.c + */ + +#include +#include +#include + +/* 9a. Fuite sur chemin d'erreur (early return sans free) */ +int vuln_leak_error_path(const char* data) +{ + char* buf = (char*)malloc(256); + if (!buf) + return -1; + + if (strlen(data) > 255) + { + return -1; /* CWE-401: buf jamais libéré sur ce chemin */ + } + strcpy(buf, data); + printf("%s\n", buf); + free(buf); + return 0; +} + +/* 9b. Fuite par écrasement de pointeur */ +void vuln_leak_overwrite(void) +{ + char* p = (char*)malloc(100); + if (!p) + return; + p = (char*)malloc(200); /* CWE-401: le premier bloc de 100 est perdu */ + if (p) + free(p); +} + +/* 9c. Fuite dans une boucle */ +void vuln_leak_loop(int n) +{ + for (int i = 0; i < n; i++) + { + char* tmp = (char*)malloc(64); + if (!tmp) + return; + snprintf(tmp, 64, "item_%d", i); + printf("%s\n", tmp); + /* CWE-401: free(tmp) manquant → fuite à chaque itération */ + } +} + +/* 9d. Fuite via realloc qui échoue */ +void vuln_leak_realloc(void) +{ + char* buf = (char*)malloc(16); + if (!buf) + return; + strcpy(buf, "hello"); + + /* Si realloc échoue, il retourne NULL mais ne libère pas buf */ + buf = (char*)realloc(buf, (size_t)-1); /* taille absurde → échec */ + /* CWE-401: si realloc échoue, l'ancien buf est perdu */ + free(buf); /* free(NULL) est safe, mais l'ancien bloc fuit */ +} + +int main(void) +{ + printf("=== 09: Memory Leak Tests ===\n"); + vuln_leak_error_path("short"); + vuln_leak_error_path("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAA"); + vuln_leak_overwrite(); + vuln_leak_loop(5); + vuln_leak_realloc(); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 28, column 23 +// [ !!Warn ] potential resource leak: 'HeapAlloc' acquired in handle 'p' is not released in this function +// ↳ no matching release call was found for the tracked handle + +// at line 37, column 29 +// [ !!Warn ] potential resource leak: 'HeapAlloc' acquired in handle 'tmp' is not released in this function +// ↳ no matching release call was found for the tracked handle diff --git a/test/security/null-dereference/05_null_deref.c b/test/security/null-dereference/05_null_deref.c new file mode 100644 index 0000000..05cfa48 --- /dev/null +++ b/test/security/null-dereference/05_null_deref.c @@ -0,0 +1,76 @@ +/** + * 05 - NULL POINTER DEREFERENCE (CWE-476) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=address 05_null_deref.c -o 05_test + * Analyze: clang --analyze 05_null_deref.c + */ + +#include +#include + +/* 5a. malloc sans vérification */ +void vuln_null_deref(size_t n) +{ + int* arr = (int*)malloc(n * sizeof(int)); + arr[0] = 42; /* CWE-476: arr peut être NULL si malloc échoue */ + free(arr); +} + +/* 5b. Déréférencement dans la branche NULL (logique inversée) */ +void vuln_null_deref_logic(int* ptr) +{ + if (ptr == NULL) + { + printf("val = %d\n", *ptr); /* CWE-476: déref garantie sur NULL */ + } +} + +/* 5c. NULL après free puis réutilisation */ +void vuln_null_after_free(void) +{ + int* p = (int*)malloc(sizeof(int)); + if (!p) + return; + *p = 10; + free(p); + p = NULL; + printf("val = %d\n", *p); /* CWE-476: déref de NULL explicite */ +} + +int main(void) +{ + printf("=== 05: NULL Pointer Dereference Tests ===\n"); + vuln_null_deref(0); /* malloc(0) peut retourner NULL */ + vuln_null_deref_logic(NULL); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 13, column 23 +// [ !!Warn ] potential integer overflow in size computation before 'malloc' +// ↳ operation: mul +// ↳ overflowed size may under-allocate memory or make bounds checks unsound + +// at line 14, column 12 +// [ !!Warn ] potential null pointer dereference on '' +// ↳ pointer comes from allocator return value and is dereferenced without a provable null-check + +// at line 21, column 30 +// [!!!Error] potential null pointer dereference on '' +// ↳ control flow proves pointer is null on this branch before dereference + +// at line 32, column 26 +// [!!!Error] potential null pointer dereference on '' +// ↳ a preceding local-slot store sets the pointer to null before use + +// at line 31, column 7 +// [!!!Error] potential use-after-release: 'HeapAlloc' handle 'p' is used after a release in this function +// ↳ a later dereference/call argument use may access invalid memory + +// at line 32, column 27 +// [!!!Error] potential use-after-release: 'HeapAlloc' handle 'p' is used after a release in this function +// ↳ a later dereference/call argument use may access invalid memory diff --git a/test/security/null-dereference/16_null_deref_nested.c b/test/security/null-dereference/16_null_deref_nested.c new file mode 100644 index 0000000..f81050a --- /dev/null +++ b/test/security/null-dereference/16_null_deref_nested.c @@ -0,0 +1,83 @@ +/** + * 16 - NULL DEREF IN NESTED CONTROL FLOW (CWE-476) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=address 16_null_deref_nested.c -o 16_test + * Analyze: clang --analyze 16_null_deref_nested.c + */ + +#include +#include +#include + +/* 16a. malloc non verifie dans un nested if */ +void vuln_nested_if_unchecked_malloc(int c1, int c2) +{ + if (c1) + { + if (c2) + { + int* arr = (int*)malloc(sizeof(int)); + arr[0] = 42; + free(arr); + } + } +} + +/* 16b. malloc non verifie dans un nested loop */ +void vuln_nested_loop_unchecked_malloc(int n) +{ + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < n; ++j) + { + int* p = (int*)malloc(sizeof(int)); + p[0] = i + j; + free(p); + } + } +} + +/* 16c. dereference dans une branche prouvant ptr == NULL en nested if */ +void vuln_nested_if_null_branch(int* ptr, int c1, int c2) +{ + if (c1) + { + if (c2) + { + if (ptr == NULL) + { + printf("val = %d\n", *ptr); + } + } + } +} + +int main(void) +{ + vuln_nested_if_unchecked_malloc(1, 1); + vuln_nested_loop_unchecked_malloc(1); + vuln_nested_if_null_branch(NULL, 1, 1); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 17, column 20 +// [ !!Warn ] potential null pointer dereference on '' +// ↳ pointer comes from allocator return value and is dereferenced without a provable null-check + +// at line 28, column 18 +// [ !!Warn ] potential null pointer dereference on '' +// ↳ pointer comes from allocator return value and is dereferenced without a provable null-check + +// at line 35, column 0 +// [ !Info! ] ConstParameterNotModified.Pointer: parameter 'ptr' in function 'vuln_nested_if_null_branch' is never used to modify the pointed object +// ↳ current type: int *ptr +// ↳ suggested type: const int *ptr + +// at line 39, column 38 +// [!!!Error] potential null pointer dereference on '' +// ↳ control flow proves pointer is null on this branch before dereference diff --git a/test/security/oob-read/12_oob_read.c b/test/security/oob-read/12_oob_read.c new file mode 100644 index 0000000..8a4e338 --- /dev/null +++ b/test/security/oob-read/12_oob_read.c @@ -0,0 +1,91 @@ +/** + * 12 - OUT-OF-BOUNDS READ (CWE-125) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=address 12_oob_read.c -o 12_test + * Analyze: clang --analyze 12_oob_read.c + */ + +#include +#include +#include + +/* 12a. Lecture hors limites d'un tableau (pas de bounds check) */ +int vuln_oob_read(int* arr, int size, int index) +{ + (void)size; /* ignoré volontairement */ + return arr[index]; /* CWE-125: index peut être >= size ou négatif */ +} + +/* 12b. strlen sur un buffer non terminé par \0 */ +size_t vuln_missing_null_term(void) +{ + char buf[8]; + memcpy(buf, "AAAAAAAA", 8); /* pas de '\0' terminal */ + return strlen(buf); /* CWE-125: lit au-delà du buffer */ +} + +/* 12c. Lecture heap hors bornes via index non validé */ +void vuln_heap_oob_read(int user_index) +{ + int* table = (int*)malloc(10 * sizeof(int)); + if (!table) + return; + for (int i = 0; i < 10; i++) + table[i] = i * 10; + + /* CWE-125: user_index pas vérifié */ + printf("value = %d\n", table[user_index]); + free(table); +} + +/* 12d. Lecture après la fin d'une chaîne courte dans un buffer fixe */ +void vuln_short_string_read(void) +{ + char buf[64]; + memset(buf, 0, sizeof(buf)); + strcpy(buf, "Hi"); + + /* Quelqu'un suppose que buf contient au moins 10 caractères */ + for (int i = 0; i < 10; i++) + { + printf("%02x ", (unsigned char)buf[i]); /* lit des zéros, pas un crash */ + } + /* Mais dans un vrai scénario, buf pourrait ne pas être zéro-initialisé → info leak */ + printf("\n"); +} + +int main(void) +{ + printf("=== 12: Out-of-Bounds Read Tests ===\n"); + + int arr[] = {10, 20, 30}; + printf("oob: %d\n", vuln_oob_read(arr, 3, 10)); + + printf("strlen no null: %zu\n", vuln_missing_null_term()); + + vuln_heap_oob_read(50); /* index 50, tableau de taille 10 */ + + vuln_short_string_read(); + + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 22, column 12 +// [ !!Warn ] potential out-of-bounds read: string buffer 'buf' may be missing a null terminator before 'strlen' +// ↳ buffer size: 8 bytes, last write size: 8 bytes +// ↳ unterminated strings can make read APIs scan past buffer bounds + +// at line 32, column 28 +// [ !!Warn ] potential out-of-bounds read on heap buffer 'call' via unchecked index +// ↳ inferred heap capacity: 10 element(s) +// ↳ index value is not proven to be within [0, capacity-1] + +// at line 40, column 5 +// [ !!Warn ] potential stack buffer overflow in __strcpy_chk on variable 'buf' +// ↳ destination stack buffer size: 64 bytes +// ↳ this API has no explicit size argument; destination fit cannot be proven statically diff --git a/test/security/sizeof-pitfall/15_sizeof_pitfall.c b/test/security/sizeof-pitfall/15_sizeof_pitfall.c new file mode 100644 index 0000000..1fc0962 --- /dev/null +++ b/test/security/sizeof-pitfall/15_sizeof_pitfall.c @@ -0,0 +1,102 @@ +/** + * 15 - SIZEOF PITFALLS (CWE-467) + * + * Compile: gcc -Wall -Wextra -g 15_sizeof_pitfall.c -o 15_test + * Analyze: clang --analyze 15_sizeof_pitfall.c + */ + +#include +#include +#include + +/* 15a. sizeof sur pointeur au lieu du tableau */ +void process(char* buf) +{ + /* sizeof(buf) == sizeof(char*) == 8 sur 64-bit, PAS 256 */ + memset(buf, 0, sizeof(buf)); /* CWE-467: efface seulement 8 octets */ +} + +void vuln_sizeof_pointer(void) +{ + char buffer[256]; + memset(buffer, 'A', sizeof(buffer)); + process(buffer); + /* On s'attend à un buffer vide, mais seuls 8 octets sont nuls */ + printf("buf[100] = 0x%02x (devrait être 0x00)\n", (unsigned char)buffer[100]); +} + +/* 15b. sizeof sur un tableau passé en paramètre */ +void vuln_array_param_sizeof(int arr[100]) +{ + /* arr est en fait un int*, sizeof(arr) == sizeof(int*) */ + size_t n = sizeof(arr) / sizeof(arr[0]); /* CWE-467: donne 2 (pas 100) */ + printf("calculated n = %zu (expected 100)\n", n); +} + +/* 15c. sizeof d'un pointeur pour une allocation */ +void vuln_sizeof_alloc(void) +{ + int* matrix; + /* Erreur classique : sizeof(matrix) au lieu de sizeof(*matrix) */ + matrix = (int*)malloc(10 * sizeof(matrix)); /* alloue 10 * 8 = 80 */ + /* mais on voulait 10 * sizeof(int) = 40 (gaspillage, ou sous-alloc si inversé) */ + if (matrix) + { + matrix[0] = 1; + free(matrix); + } +} + +/* 15d. sizeof sur un littéral chaîne vs pointeur */ +void vuln_sizeof_string(void) +{ + const char* str = "Hello"; + char arr[] = "Hello"; + + printf("sizeof(str) = %zu (pointeur: %zu attendu)\n", sizeof(str), sizeof(char*)); + printf("sizeof(arr) = %zu (tableau: 6 attendu, inclut \\0)\n", sizeof(arr)); + + /* Bug typique : utiliser sizeof(str) pour copier */ + char dest[32]; + memcpy(dest, str, sizeof(str)); /* copie 8 octets (pointeur), pas 6 */ + dest[sizeof(str)] = '\0'; + printf("dest = '%s' (peut être tronqué ou contenir du garbage)\n", dest); +} + +int main(void) +{ + printf("=== 15: sizeof Pitfall Tests ===\n"); + vuln_sizeof_pointer(); + + int big_array[100]; + vuln_array_param_sizeof(big_array); + + vuln_sizeof_alloc(); + vuln_sizeof_string(); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 15, column 27 +// [ !!Warn ] size computation appears to use pointer size instead of object size +// ↳ clang: 'memset' call operates on objects of type 'char' while the size is based on a different type 'char *' + +// at line 29, column 22 +// [ !!Warn ] size computation appears to use pointer size instead of object size +// ↳ clang: sizeof on array function parameter will return size of 'int *' instead of 'int[100]' + +// at line 29, column 28 +// [ !!Warn ] size computation appears to use pointer size instead of object size +// ↳ clang: 'sizeof (arr)' will return the size of the pointer, not the array itself + +// at line 55, column 30 +// [ !!Warn ] size computation appears to use pointer size instead of object size +// ↳ clang: 'memcpy' call operates on objects of type 'const char' while the size is based on a different type 'const char *' + +// at line 64, column 1 +// [ !!Warn ] local variable 'big_array' is never initialized +// ↳ declared without initializer and no definite write was found in this function diff --git a/test/security/stack-escape/11_return_local.c b/test/security/stack-escape/11_return_local.c new file mode 100644 index 0000000..aa538c9 --- /dev/null +++ b/test/security/stack-escape/11_return_local.c @@ -0,0 +1,86 @@ +/** + * 11 - RETURN POINTER TO LOCAL (CWE-562) + * + * Compile: gcc -Wall -Wextra -g 11_return_local.c -o 11_test + * Analyze: clang --analyze 11_return_local.c + */ + +#include +#include + +/* 11a. Retour d'adresse de buffer local */ +char* vuln_return_local(void) +{ + char buf[64]; + strcpy(buf, "data on stack"); + return buf; /* CWE-562: buf est détruit au retour de la fonction */ +} + +/* 11b. Retour d'adresse de variable locale via pointeur */ +int* vuln_return_local_int(void) +{ + int x = 42; + return &x; /* CWE-562: adresse de variable locale */ +} + +/* 11c. Plus subtil : tableau local dans un struct retourné par pointeur */ +typedef struct +{ + char* data; +} Wrapper; + +Wrapper vuln_return_local_struct(void) +{ + char tmp[128]; + strcpy(tmp, "temporary data"); + Wrapper w; + w.data = tmp; /* CWE-562: tmp sera invalide après le retour */ + return w; +} + +int main(void) +{ + printf("=== 11: Return Pointer to Local Tests ===\n"); + + char* s = vuln_return_local(); + printf("local string: %s\n", s); /* UB: accès à mémoire stack invalide */ + + int* p = vuln_return_local_int(); + printf("local int: %d\n", *p); /* UB */ + + Wrapper w = vuln_return_local_struct(); + printf("local struct data: %s\n", w.data); /* UB */ + + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 14, column 5 +// [ !!Warn ] potential stack buffer overflow in __strcpy_chk on variable 'buf' +// ↳ destination stack buffer size: 64 bytes +// ↳ this API has no explicit size argument; destination fit cannot be proven statically + +// at line 13, column 1 +// [ !!Warn ] local variable 'buf' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 15, column 5 +// [ !!Warn ] stack pointer escape: address of variable 'buf' escapes this function +// ↳ escape via return statement (pointer to stack returned to caller) + +// at line 21, column 5 +// [ !!Warn ] stack pointer escape: address of variable 'x' escapes this function +// ↳ escape via return statement (pointer to stack returned to caller) + +// at line 31, column 5 +// [ !!Warn ] potential stack buffer overflow in __strcpy_chk on variable 'tmp' +// ↳ destination stack buffer size: 128 bytes +// ↳ this API has no explicit size argument; destination fit cannot be proven statically + +// at line 30, column 1 +// [ !!Warn ] local variable 'tmp' is never initialized +// ↳ declared without initializer and no definite write was found in this function diff --git a/test/security/toctou/07_toctou.c b/test/security/toctou/07_toctou.c new file mode 100644 index 0000000..b9dea16 --- /dev/null +++ b/test/security/toctou/07_toctou.c @@ -0,0 +1,92 @@ +/** + * 07 - RACE CONDITION / TOCTOU (CWE-367) + * + * Compile: gcc -Wall -Wextra -g 07_toctou.c -o 07_test + * Analyze: clang --analyze 07_toctou.c + */ + +#include +#include +#include +#include + +/* 7a. TOCTOU classique : access() puis fopen() */ +void vuln_toctou(const char* filename) +{ + if (access(filename, R_OK) == 0) + { + /* CWE-367: le fichier peut être remplacé par un symlink + * entre access() et fopen() (race window) */ + FILE* f = fopen(filename, "r"); + if (f) + { + char buf[256]; + fgets(buf, sizeof(buf), f); + printf("content: %s\n", buf); + fclose(f); + } + } +} + +/* 7b. TOCTOU : stat() puis open() */ +void vuln_toctou_stat(const char* path) +{ + struct stat st; + if (stat(path, &st) == 0) + { + if (S_ISREG(st.st_mode)) + { + /* CWE-367: path peut avoir changé entre stat() et open() */ + int fd = open(path, O_RDONLY); + if (fd >= 0) + { + char buf[128]; + read(fd, buf, sizeof(buf)); + close(fd); + } + } + } +} + +/* Correction : utiliser open() + fstat() sur le fd */ +void safe_open(const char* path) +{ + int fd = open(path, O_RDONLY | O_NOFOLLOW); + if (fd < 0) + return; + struct stat st; + if (fstat(fd, &st) == 0 && S_ISREG(st.st_mode)) + { + char buf[128]; + read(fd, buf, sizeof(buf)); + } + close(fd); +} + +int main(void) +{ + printf("=== 07: TOCTOU Tests ===\n"); + vuln_toctou("/etc/hostname"); + vuln_toctou_stat("/etc/hostname"); + safe_open("/etc/hostname"); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 20, column 1 +// [ !!Warn ] local variable 'buf' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 18, column 19 +// [ !!Warn ] potential TOCTOU race: path checked with 'access' then used with 'fopen' +// ↳ the file target may change between check and use operations +// ↳ prefer descriptor-based validation (open + fstat) on the same handle + +// at line 34, column 22 +// [ !!Warn ] potential TOCTOU race: path checked with 'stat' then used with 'open' +// ↳ the file target may change between check and use operations +// ↳ prefer descriptor-based validation (open + fstat) on the same handle diff --git a/test/security/type-confusion/10_type_confusion.c b/test/security/type-confusion/10_type_confusion.c new file mode 100644 index 0000000..092a2c5 --- /dev/null +++ b/test/security/type-confusion/10_type_confusion.c @@ -0,0 +1,84 @@ +/** + * 10 - TYPE CONFUSION / MAUVAIS CAST (CWE-843) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=undefined 10_type_confusion.c -o 10_test + * Analyze: clang --analyze 10_type_confusion.c + */ + +#include +#include +#include + +typedef struct +{ + int type; + int value; +} BaseObj; +typedef struct +{ + int type; + int value; + char extra[64]; +} ExtObj; + +/* 10a. Cast non vérifié via void* */ +void vuln_type_confusion(void* obj) +{ + BaseObj* base = (BaseObj*)obj; + if (base->type == 1) + { + /* CWE-843: on suppose que c'est un ExtObj sans vérification réelle */ + ExtObj* ext = (ExtObj*)obj; + printf("extra = %s\n", ext->extra); /* lecture hors bornes si BaseObj */ + } +} + +/* 10b. Union type punning dangereux */ +typedef union +{ + int as_int; + float as_float; + char* as_ptr; +} Variant; + +void vuln_union_confusion(Variant v, int expected_type) +{ + /* Aucune vérification que expected_type correspond au champ actif */ + if (expected_type == 0) + printf("int: %d\n", v.as_int); + else if (expected_type == 1) + printf("float: %f\n", v.as_float); + else + printf("ptr: %s\n", v.as_ptr); /* CWE-843: crash si as_ptr invalide */ +} + +int main(void) +{ + printf("=== 10: Type Confusion Tests ===\n"); + + /* Passe un BaseObj là où un ExtObj est attendu */ + BaseObj b = {1, 42}; + vuln_type_confusion(&b); /* va lire ext->extra hors bornes */ + + /* Mauvais tag d'union */ + Variant v; + v.as_int = 12345; + vuln_union_confusion(v, 2); /* interprète un int comme un pointeur */ + + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 21, column 9 +// [ !!Warn ] potential type confusion: incompatible struct views on the same pointer +// ↳ smaller observed view: 'struct.BaseObj' (8 bytes) +// ↳ accessed view: 'struct.ExtObj' at byte offset 8 +// ↳ field access may read/write outside the actual object layout + +// at line 52, column 5 +// [ !!Warn ] potential read of uninitialized local variable 'v' +// ↳ this load may execute before any definite initialization on all control-flow paths diff --git a/test/security/uninitialized/06_uninitialized.c b/test/security/uninitialized/06_uninitialized.c new file mode 100644 index 0000000..a37c16a --- /dev/null +++ b/test/security/uninitialized/06_uninitialized.c @@ -0,0 +1,104 @@ +/** + * 06 - UNINITIALIZED MEMORY (CWE-457, CWE-908, CWE-200) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=memory 06_uninitialized.c -o 06_test + * (MemorySanitizer requiert clang: clang -fsanitize=memory) + * Analyze: clang --analyze 06_uninitialized.c + */ + +#include +#include +#include +#include + +/* 6a. Variable locale non initialisée */ +int vuln_uninit_var(int condition) +{ + int x; + if (condition) + x = 10; + return x; /* CWE-457: x non initialisé si condition == false */ +} + +/* 6b. Lecture de mémoire heap non initialisée */ +void vuln_uninit_heap(void) +{ + char* buf = (char*)malloc(64); + if (!buf) + return; + /* pas de memset / initialisation */ + if (buf[0] == 'A') + { /* CWE-908: lecture non initialisée */ + puts("Found A"); + } + free(buf); +} + +/* 6c. Struct partiellement initialisée → information leak */ +typedef struct +{ + int type; + char name[32]; + int padding; +} Packet; + +void vuln_info_leak(int fd) +{ + Packet pkt; + pkt.type = 1; + strcpy(pkt.name, "test"); + /* pkt.padding jamais initialisé -> fuite de données stack */ + write(fd, &pkt, sizeof(pkt)); /* CWE-200: info leak */ +} + +int vuln_info_leak2(int fd) +{ + Packet pkt; + pkt.type = 1; + strcpy(pkt.name, "test"); + /* pkt.padding jamais initialisé -> fuite de données stack */ + write(fd, &pkt, sizeof(pkt)); /* CWE-200: info leak */ + + return pkt.padding; /* CWE-457: read of uninitialized variable */ +} + +int main(void) +{ + printf("=== 06: Uninitialized Memory Tests ===\n"); + printf("uninit var: %d\n", vuln_uninit_var(0)); + vuln_uninit_heap(); + vuln_info_leak(STDOUT_FILENO); + vuln_info_leak2(STDOUT_FILENO); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 19, column 12 +// [ !!Warn ] potential read of uninitialized local variable 'x' +// ↳ this load may execute before any definite initialization on all control-flow paths + +// at line 43, column 5 +// [ !!Warn ] potential stack buffer overflow in __strcpy_chk on variable 'pkt' +// ↳ destination stack buffer size: 40 bytes +// ↳ this API has no explicit size argument; destination fit cannot be proven statically + +// at line 45, column 5 +// [ !!Warn ] potential information leak: local variable 'pkt' may expose uninitialized bytes through external sink 'write' +// ↳ transmitted range is not fully initialized on all control-flow paths + +// at line 51, column 5 +// [ !!Warn ] potential stack buffer overflow in __strcpy_chk on variable 'pkt' +// ↳ destination stack buffer size: 40 bytes +// ↳ this API has no explicit size argument; destination fit cannot be proven statically + +// at line 53, column 5 +// [ !!Warn ] potential information leak: local variable 'pkt' may expose uninitialized bytes through external sink 'write' +// ↳ transmitted range is not fully initialized on all control-flow paths + +// at line 55, column 16 +// [ !!Warn ] potential read of uninitialized local variable 'pkt' +// ↳ this load may execute before any definite initialization on all control-flow paths diff --git a/test/security/unsafe-functions/13_unsafe_functions.c b/test/security/unsafe-functions/13_unsafe_functions.c new file mode 100644 index 0000000..e0a7c76 --- /dev/null +++ b/test/security/unsafe-functions/13_unsafe_functions.c @@ -0,0 +1,101 @@ +/** + * 13 - UNSAFE / DEPRECATED FUNCTIONS (CWE-676) + * + * Compile: gcc -Wall -Wextra -g 13_unsafe_functions.c -o 13_test + * Analyze: clang --analyze 13_unsafe_functions.c + * + * Note: gets() est retiré depuis C11, certains compilateurs refusent de compiler. + */ + +#include +#include +#include + +/* 13a. gets() — jamais sûr, retiré en C11 */ +void vuln_gets(void) +{ + char buf[64]; + printf("Input: "); +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic warning "-Wimplicit-function-declaration" +#endif + gets(buf); /* CWE-676: aucune limite de taille, overflow garanti */ +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + printf("Got: %s\n", buf); +} + +/* 13b. strcat() sans vérification de taille */ +void vuln_strcat(const char* src) +{ + char buf[16] = "Hello "; + strcat(buf, src); /* CWE-676: pas de limite → overflow */ + puts(buf); +} + +/* 13c. strtok() — non réentrant, non thread-safe */ +void vuln_strtok(char* input) +{ + char* tok = strtok(input, " "); /* CWE-676: modifie input, état global */ + while (tok) + { + printf("token: %s\n", tok); + tok = strtok(NULL, " "); + } +} + +/* 13d. atoi() — pas de gestion d'erreur */ +void vuln_atoi(const char* input) +{ + int val = atoi(input); /* CWE-676: pas de détection d'erreur/overflow */ + printf("val = %d\n", val); + /* Utiliser strtol() avec vérification d'errno à la place */ +} + +/* 13e. strcpy() — classique */ +void vuln_strcpy(const char* input) +{ + char buf[8]; + strcpy(buf, input); /* CWE-676: pas de vérification de taille */ + puts(buf); +} + +int main(void) +{ + printf("=== 13: Unsafe Functions Tests ===\n"); + vuln_strcat("AAAAAAAAAAAAAAAAAAAAA"); + char data[] = "hello world foo bar"; + vuln_strtok(data); + vuln_atoi("not_a_number"); + vuln_strcpy("way too long for this tiny buffer"); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 16, column 1 +// [ !!Warn ] local variable 'buf' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 18, column 5 +// [ !!Warn ] deprecated unsafe function 'gets' is used +// ↳ clang: 'gets' is deprecated: This function is provided for compatibility reasons only. Due to security concerns inherent in the design of gets(3), it is highly recommended that you use fgets(3) instead. + +// at line 25, column 5 +// [ !!Warn ] potential stack buffer overflow in __strcat_chk on variable 'buf' +// ↳ destination stack buffer size: 16 bytes +// ↳ this API has no explicit size argument; destination fit cannot be proven statically + +// at line 48, column 5 +// [ !!Warn ] potential stack buffer overflow in __strcpy_chk on variable 'buf' +// ↳ destination stack buffer size: 8 bytes +// ↳ this API has no explicit size argument; destination fit cannot be proven statically + +// at line 47, column 1 +// [ !!Warn ] local variable 'buf' is never initialized +// ↳ declared without initializer and no definite write was found in this function diff --git a/test/security/use-after-free/03_use_after_free.c b/test/security/use-after-free/03_use_after_free.c new file mode 100644 index 0000000..f76de9b --- /dev/null +++ b/test/security/use-after-free/03_use_after_free.c @@ -0,0 +1,90 @@ +/** + * 03 - USE-AFTER-FREE / DOUBLE FREE (CWE-415, CWE-416) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=address 03_use_after_free.c -o 03_test + * Analyze: clang --analyze 03_use_after_free.c + */ + +#include +#include +#include + +/* 3a. Use-after-free simple */ +void vuln_use_after_free(void) +{ + char* ptr = (char*)malloc(64); + if (!ptr) + return; + strcpy(ptr, "hello"); + free(ptr); + printf("data = %s\n", ptr); /* CWE-416: accès après free */ +} + +/* 3b. Double free conditionnel */ +void vuln_double_free(int condition) +{ + char* p = (char*)malloc(128); + if (!p) + return; + if (condition) + { + free(p); + } + /* ... du code ... */ + free(p); /* CWE-415: double free si condition == true */ +} + +/* 3c. Dangling pointer dans une struct */ +typedef struct +{ + char* name; + int id; +} User; + +User* vuln_dangling_struct(void) +{ + User* u = (User*)malloc(sizeof(User)); + if (!u) + return NULL; + u->name = (char*)malloc(32); + if (!u->name) + { + free(u); + return NULL; + } + strcpy(u->name, "Alice"); + free(u->name); + /* u->name est maintenant dangling, mais u est retourné */ + return u; /* CWE-416: l'appelant accédera à u->name */ +} + +int main(void) +{ + printf("=== 03: Use-After-Free / Double Free Tests ===\n"); + vuln_use_after_free(); + vuln_double_free(1); + User* u = vuln_dangling_struct(); + if (u) + { + printf("name = %s\n", u->name); /* dangling */ + free(u); + } + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 18, column 27 +// [!!!Error] potential use-after-release: 'HeapAlloc' handle 'ptr' is used after a release in this function +// ↳ a later dereference/call argument use may access invalid memory + +// at line 29, column 5 +// [!!!Error] potential double release: 'HeapAlloc' handle 'p' is released without a matching acquire in this function +// ↳ this may indicate release-after-release or ownership mismatch + +// at line 44, column 5 +// [ !!Warn ] released handle derived from 'u' may escape through a returned owner object +// ↳ caller-visible object may contain dangling pointer state diff --git a/test/security/use-after-free/18_use_after_free_advanced.c b/test/security/use-after-free/18_use_after_free_advanced.c new file mode 100644 index 0000000..94f618c --- /dev/null +++ b/test/security/use-after-free/18_use_after_free_advanced.c @@ -0,0 +1,127 @@ +/** + * 18 - ADVANCED USE-AFTER-FREE / DOUBLE FREE CASES (nested if/loop/tricky) + * + * Compile: gcc -Wall -Wextra -g -fsanitize=address 18_use_after_free_advanced.c -o 18_test + * Analyze: clang --analyze 18_use_after_free_advanced.c + */ + +#include +#include +#include + +typedef struct +{ + char* name; + int id; +} User18; + +/* 18a. UAF dans un nested-if */ +void vuln_uaf_nested_if(int gate1, int gate2) +{ + char* p = (char*)malloc(32); + if (!p) + return; + strcpy(p, "nested-uaf"); + + if (gate1) + { + if (gate2) + { + free(p); + } + } + + if (gate1 && gate2) + { + printf("%s\n", p); + } + + if (!(gate1 && gate2)) + { + free(p); + } +} + +/* 18b. Double free dans un nested-loop */ +void vuln_double_free_nested_loop(int n) +{ + char* p = (char*)malloc(16); + if (!p) + return; + + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < n; ++j) + { + if (i == 0 && j == 0) + { + free(p); + } + } + } + + free(p); +} + +/* 18c. Dangling field via nested-if puis retour de l'objet owner */ +User18* vuln_dangling_nested_if(int gate1, int gate2) +{ + User18* u = (User18*)malloc(sizeof(User18)); + if (!u) + return NULL; + + u->name = (char*)malloc(32); + if (!u->name) + { + free(u); + return NULL; + } + + strcpy(u->name, "alice"); + u->id = 7; + + if (gate1) + { + if (gate2) + { + free(u->name); + } + } + + return u; +} + +int main(void) +{ + vuln_uaf_nested_if(1, 1); + vuln_double_free_nested_loop(1); + + User18* u = vuln_dangling_nested_if(1, 1); + if (u) + { + free(u); + } + + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 30, column 24 +// [!!!Error] potential use-after-release: 'HeapAlloc' handle 'p' is used after a release in this function +// ↳ a later dereference/call argument use may access invalid memory + +// at line 34, column 9 +// [!!!Error] potential double release: 'HeapAlloc' handle 'p' is released without a matching acquire in this function +// ↳ this may indicate release-after-release or ownership mismatch + +// at line 51, column 5 +// [!!!Error] potential double release: 'HeapAlloc' handle 'p' is released without a matching acquire in this function +// ↳ this may indicate release-after-release or ownership mismatch + +// at line 70, column 13 +// [ !!Warn ] released handle derived from 'u' may escape through a returned owner object +// ↳ caller-visible object may contain dangling pointer state diff --git a/test/security/variadic-mismatch/14_variadic_mismatch.c b/test/security/variadic-mismatch/14_variadic_mismatch.c new file mode 100644 index 0000000..e61d4f4 --- /dev/null +++ b/test/security/variadic-mismatch/14_variadic_mismatch.c @@ -0,0 +1,77 @@ +/** + * 14 - VARIADIC FUNCTION MISUSE (format/argument mismatch) + * + * Compile: gcc -Wall -Wextra -g 14_variadic_mismatch.c -o 14_test + * Analyze: clang --analyze 14_variadic_mismatch.c + */ + +#include + +/* 14a. Mismatch type : %s attend char*, reçoit int */ +void vuln_format_type_mismatch(void) +{ + int x = 42; + printf("%s\n", x); /* UB: %s attend un pointeur, reçoit un int */ +} + +/* 14b. Pas assez d'arguments */ +void vuln_format_missing_args(void) +{ + int x = 42; + printf("%d %d %d\n", x); /* UB: 2 arguments manquants */ +} + +/* 14c. Trop d'arguments (pas UB mais suspect) */ +void vuln_format_extra_args(void) +{ + printf("%d\n", 1, 2, 3); /* args 2 et 3 ignorés, probablement un bug */ +} + +/* 14d. Mismatch signed/unsigned */ +void vuln_format_signedness(void) +{ + unsigned int u = 4294967295U; + printf("signed: %d\n", u); /* affiche -1, pas la valeur attendue */ + + int neg = -1; + printf("unsigned: %u\n", neg); /* affiche 4294967295, trompeur */ +} + +/* 14e. Mismatch taille : %d pour un long long */ +void vuln_format_size_mismatch(void) +{ + long long big = 1LL << 40; + printf("value: %d\n", big); /* UB: %d attend int, reçoit long long */ +} + +int main(void) +{ + printf("=== 14: Variadic Mismatch Tests ===\n"); + vuln_format_type_mismatch(); + vuln_format_missing_args(); + vuln_format_extra_args(); + vuln_format_signedness(); + vuln_format_size_mismatch(); + return 0; +} + +// run_test expectations +// resource-model: models/resource-lifetime/generic.txt +// escape-model: models/stack-escape/generic.txt +// buffer-model: models/buffer-overflow/generic.txt + +// at line 13, column 20 +// [ !!Warn ] variadic format and argument list appear inconsistent +// ↳ clang: format specifies type 'char *' but the argument has type 'int' + +// at line 19, column 17 +// [ !!Warn ] variadic format and argument list appear inconsistent +// ↳ clang: more '%' conversions than data arguments + +// at line 24, column 23 +// [ !!Warn ] variadic format and argument list appear inconsistent +// ↳ clang: data argument not used by format string + +// at line 39, column 27 +// [ !!Warn ] variadic format and argument list appear inconsistent +// ↳ clang: format specifies type 'int' but the argument has type 'long long' diff --git a/test/uninitialized-variable/cross-tu-uninitialized-wrapper-use.c b/test/uninitialized-variable/cross-tu-uninitialized-wrapper-use.c index c007217..7b8d77b 100644 --- a/test/uninitialized-variable/cross-tu-uninitialized-wrapper-use.c +++ b/test/uninitialized-variable/cross-tu-uninitialized-wrapper-use.c @@ -17,3 +17,7 @@ int cross_tu_read_after_wrapper(void) fill_wrapper_cross_tu(&props); return value; } + +// at line 18, column 12 +// [ !!Warn ] potential read of uninitialized local variable 'value' +// ↳ this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-cpp-aggregate-ctor-copy.cpp b/test/uninitialized-variable/uninitialized-local-cpp-aggregate-ctor-copy.cpp new file mode 100644 index 0000000..b2cfd7c --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-cpp-aggregate-ctor-copy.cpp @@ -0,0 +1,23 @@ +#include +#include + +struct AnalysisResultLike +{ + std::vector labels; + std::vector values; +}; + +struct PipelineStateLike +{ + int& token; + AnalysisResultLike result; +}; + +AnalysisResultLike aggregate_ctor_member_copy_should_not_warn(int& token) +{ + PipelineStateLike state{token}; + return state.result; +} + +// not contains: potential read of uninitialized local variable 'state' +// not contains: local variable 'state' is never initialized diff --git a/test/uninitialized-variable/uninitialized-local-cpp-ctor-forgets-field-warns.cpp b/test/uninitialized-variable/uninitialized-local-cpp-ctor-forgets-field-warns.cpp new file mode 100644 index 0000000..06f3edd --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-cpp-ctor-forgets-field-warns.cpp @@ -0,0 +1,16 @@ +struct BadCtor +{ + int initialized; + int forgotten; + BadCtor(int v) : initialized(v) {} // does not initialize 'forgotten' +}; + +int ctor_forgets_field_should_warn(void) +{ + BadCtor obj(42); + return obj.forgotten; +} + +// at line 11, column 16 +// [ !!Warn ] potential read of uninitialized local variable 'obj' +// ↳ this load may execute before any definite initialization on all control-flow paths diff --git a/test/uninitialized-variable/uninitialized-local-cpp-lambda-receiver.cpp b/test/uninitialized-variable/uninitialized-local-cpp-lambda-receiver.cpp index 266db08..7e97f8e 100644 --- a/test/uninitialized-variable/uninitialized-local-cpp-lambda-receiver.cpp +++ b/test/uninitialized-variable/uninitialized-local-cpp-lambda-receiver.cpp @@ -1,8 +1,21 @@ +#include + int lambda_receiver_object_should_not_warn_never_initialized(void) { auto buildCanonicalize = [&](int v) { return v + 1; }; + auto buildCanonicalize2 = [&](int v) + { + if (v >= INT_MIN && v < INT_MAX) + return v; // ou autre politique + return v + 1; + }; return buildCanonicalize(41); } // not contains: local variable 'buildCanonicalize' is never initialized + +// at line 5, column 52 +// [ !!Warn ] potential signed integer overflow in arithmetic operation +// ↳ operation: add +// ↳ result is returned without a provable non-overflow boun diff --git a/test/uninitialized-variable/uninitialized-local-cpp-nested-aggregate-copy.cpp b/test/uninitialized-variable/uninitialized-local-cpp-nested-aggregate-copy.cpp new file mode 100644 index 0000000..70b552d --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-cpp-nested-aggregate-copy.cpp @@ -0,0 +1,29 @@ +#include +#include + +struct Leaf +{ + std::vector items; + int count = 0; +}; + +struct Middle +{ + Leaf leaf; + bool active = false; +}; + +struct Root +{ + int& ref; + Middle mid; // value-initialized via aggregate init +}; + +Middle nested_aggregate_copy(int& r) +{ + Root root{r}; // ref = r, mid = value-initialized (nested default ctors) + return root.mid; // copies Middle -> reads Leaf -> reads vector internals +} + +// not contains: potential read of uninitialized local variable 'root' +// not contains: local variable 'root' is never initialized diff --git a/test/uninitialized-variable/uninitialized-local-cpp-parameterized-ctor-copy.cpp b/test/uninitialized-variable/uninitialized-local-cpp-parameterized-ctor-copy.cpp new file mode 100644 index 0000000..4785f99 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-cpp-parameterized-ctor-copy.cpp @@ -0,0 +1,24 @@ +#include +#include + +struct Inner +{ + std::vector data; + std::string label; +}; + +struct Outer +{ + int id; + Inner inner; // value-initialized via aggregate init +}; + +Inner parameterized_aggregate_init_then_copy(void) +{ + int x = 42; + Outer obj{x}; // id = x, inner = value-initialized (default ctor) + return obj.inner; // copy-ctor reads inner +} + +// not contains: potential read of uninitialized local variable 'obj' +// not contains: local variable 'obj' is never initialized diff --git a/test/uninitialized-variable/uninitialized-local-cpp-sret-complex-return.cpp b/test/uninitialized-variable/uninitialized-local-cpp-sret-complex-return.cpp new file mode 100644 index 0000000..7186e73 --- /dev/null +++ b/test/uninitialized-variable/uninitialized-local-cpp-sret-complex-return.cpp @@ -0,0 +1,28 @@ +#include +#include + +struct Report +{ + std::vector entries; + std::vector scores; + bool finalized = false; +}; + +static Report build_report(int n) +{ + Report r; + for (int i = 0; i < n; ++i) + r.scores.push_back(i); + return r; +} + +int sret_complex_return_should_not_warn(void) +{ + Report rep = build_report(3); + return static_cast(rep.entries.size() + rep.scores.size()); +} + +// not contains: potential read of uninitialized local variable 'rep' +// not contains: potential read of uninitialized local variable 'r' +// not contains: local variable 'rep' is never initialized +// not contains: local variable 'r' is never initialized diff --git a/test/uninitialized-variable/uninitialized-local-pointer-redirect.c b/test/uninitialized-variable/uninitialized-local-pointer-redirect.c index 47fc83e..790f058 100644 --- a/test/uninitialized-variable/uninitialized-local-pointer-redirect.c +++ b/test/uninitialized-variable/uninitialized-local-pointer-redirect.c @@ -13,3 +13,7 @@ int read_after_pointer_redirect(void) } // not contains: potential read of uninitialized local variable 'uninit' + +// at line 8, column 1 +// [ !!Warn ] local variable 'uninit' is never initialized +// ↳ declared without initializer and no definite write was found in this function diff --git a/test/uninitialized-variable/uninitialized-local-stdint-type.cpp b/test/uninitialized-variable/uninitialized-local-stdint-type.cpp index 4944f0b..cfd292f 100644 --- a/test/uninitialized-variable/uninitialized-local-stdint-type.cpp +++ b/test/uninitialized-variable/uninitialized-local-stdint-type.cpp @@ -11,3 +11,27 @@ int main(void) return 0; } + +// at line 5, column 1 +// [ !!Warn ] local variable 'test0' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 6, column 1 +// [ !!Warn ] local variable 'test1' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 7, column 1 +// [ !!Warn ] local variable 'test2' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 8, column 1 +// [ !!Warn ] local variable 'test3' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 9, column 1 +// [ !!Warn ] local variable 'test4' is never initialized +// ↳ declared without initializer and no definite write was found in this function + +// at line 10, column 1 +// [ !!Warn ] local variable 'test5' is never initialized +// ↳ declared without initializer and no definite write was found in this function diff --git a/test/uninitialized-variable/uninitialized-local-warnings-only-function-filter.c b/test/uninitialized-variable/uninitialized-local-warnings-only-function-filter.c index e2377ba..0b06ed1 100644 --- a/test/uninitialized-variable/uninitialized-local-warnings-only-function-filter.c +++ b/test/uninitialized-variable/uninitialized-local-warnings-only-function-filter.c @@ -13,3 +13,7 @@ int main(void) { return read_uninitialized_value() + clean_value(); } + +// at line 4, column 12 +// [ !!Warn ] potential read of uninitialized local variable 'value' +// ↳ this load may execute before any definite initialization on all control-flow paths diff --git a/test/use-after-free/cross-tu-uaf-def.c b/test/use-after-free/cross-tu-uaf-def.c new file mode 100644 index 0000000..441c5af --- /dev/null +++ b/test/use-after-free/cross-tu-uaf-def.c @@ -0,0 +1,21 @@ +#include + +void* acquire_handle(void) +{ + return malloc(32); +} + +void release_handle(void* h) +{ + free(h); +} + +void* acquire_handle_wrapper(void) +{ + return acquire_handle(); +} + +void release_handle_wrapper(void* h) +{ + release_handle(h); +} diff --git a/test/use-after-free/cross-tu-uaf-use.c b/test/use-after-free/cross-tu-uaf-use.c new file mode 100644 index 0000000..09d3cc6 --- /dev/null +++ b/test/use-after-free/cross-tu-uaf-use.c @@ -0,0 +1,59 @@ +#include +#include + +void* acquire_handle_wrapper(void); +void release_handle_wrapper(void* h); + +/* inter-TU UAF dans un nested-if */ +void io_cross_uaf_nested_if(int gate1, int gate2) +{ + void* h = acquire_handle_wrapper(); + if (!h) + return; + + if (gate1) + { + if (gate2) + { + release_handle_wrapper(h); + } + } + + if (gate1 && gate2) + { + memset(h, 0, 1); + } + + if (!(gate1 && gate2)) + { + release_handle_wrapper(h); + } +} + +/* inter-TU double release dans un nested-loop */ +void io_cross_double_release_nested_loop(int n) +{ + void* h = acquire_handle_wrapper(); + if (!h) + return; + + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < n; ++j) + { + if (i == 0 && j == 0) + { + release_handle_wrapper(h); + } + } + } + + release_handle_wrapper(h); +} + +int main(void) +{ + io_cross_uaf_nested_if(1, 1); + io_cross_double_release_nested_loop(1); + return 0; +}