diff --git a/.github/scripts/patch_sbom.py b/.github/scripts/patch_sbom.py new file mode 100644 index 0000000..70ba2fe --- /dev/null +++ b/.github/scripts/patch_sbom.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +patch_sbom.py — Post-process a Trivy-generated SPDX JSON file to: + 1. Set name to the package name (not the scanned folder name) + 2. Set documentNamespace to the required pattern + 3. Set creationInfo.creators (Organization + Tool line) + 4. Set creationInfo.created (current UTC timestamp) + 5. Set spdxVersion to SPDX-2.3 + 6. Remove synthetic Trivy APPLICATION package (requirements.txt container) + 7. Remove synthetic Trivy filesystem/source root packages + 8. Remove internal test/noise packages (e.g. setuptools' my-test-package) + 9. Remove the files[] section (internal test eggs, not real deliverables) + 10. Promote relationships: replace the removed container's SPDXID with + SPDXRef-DOCUMENT so each real package becomes DESCRIBED BY the document + 11. Remove annotations / comments Trivy adds to packages +""" + +import argparse +import json +from datetime import datetime, timezone + +_STRIP_PKG_FIELDS = {"annotations", "comment"} + +# Packages Trivy picks up from setuptools internals — not real deliverable deps +_NOISE_PACKAGE_NAMES = { + "my-test-package", + "my_test_package", +} + +# SPDX package purposes that are Trivy synthetic scan-root / container artefacts +_SYNTHETIC_PURPOSES = {"SOURCE", "APPLICATION"} + + +def _is_synthetic(pkg: dict) -> bool: + """True for Trivy's scan-root and requirements.txt container packages.""" + if pkg.get("primaryPackagePurpose") not in _SYNTHETIC_PURPOSES: + return False + # Only remove if it has no purl (i.e. it's not a real package) + refs = pkg.get("externalRefs", []) + return not any(r.get("referenceType") == "purl" for r in refs) + + +def _is_noise(pkg: dict) -> bool: + """True for known internal test packages bundled inside setuptools.""" + return pkg.get("name", "").lower().replace("-", "_") in { + n.replace("-", "_") for n in _NOISE_PACKAGE_NAMES + } + + +def patch(input_path: str, output_path: str, + name: str, namespace: str, org: str, tool: str) -> None: + + with open(input_path, encoding="utf-8") as f: + doc = json.load(f) + + # 1. SPDX version + doc["spdxVersion"] = "SPDX-2.3" + + # 2. Document name + doc["name"] = name + + # 3. Namespace + doc["documentNamespace"] = namespace + + # 4. creationInfo + now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + doc.setdefault("creationInfo", {}) + doc["creationInfo"]["created"] = now + doc["creationInfo"]["creators"] = [ + f"Organization: {org}", + f"Tool: {tool}", + ] + + # 5. Filter packages — track removed SPDXIDs so we can fix relationships + removed_spdxids = set() # synthetic / noise packages to remove + kept_packages = [] + for pkg in doc.get("packages", []): + if _is_synthetic(pkg) or _is_noise(pkg): + removed_spdxids.add(pkg["SPDXID"]) + continue + for field in _STRIP_PKG_FIELDS: + pkg.pop(field, None) + kept_packages.append(pkg) + doc["packages"] = kept_packages + + # 6. Remove files[] section (internal test eggs, not deliverables) + doc.pop("files", None) + + # 7. Rewrite relationships: + # - Drop any rel whose *target* was removed + # - Replace the *source* of CONTAINS rels that came from a removed + # synthetic container (requirements.txt / filesystem root) with + # SPDXRef-DOCUMENT, and change type to DESCRIBES + kept_rels = [] + seen_describes = set() # avoid duplicate DESCRIBES entries + + for rel in doc.get("relationships", []): + src = rel["spdxElementId"] + tgt = rel["relatedSpdxElement"] + kind = rel["relationshipType"] + + # Drop if the target was removed + if tgt in removed_spdxids: + continue + + # Promote: if the source was a removed synthetic container, + # rewrite as SPDXRef-DOCUMENT DESCRIBES + if src in removed_spdxids: + if kind == "CONTAINS": + key = ("SPDXRef-DOCUMENT", tgt) + if key not in seen_describes: + kept_rels.append({ + "spdxElementId": "SPDXRef-DOCUMENT", + "relatedSpdxElement": tgt, + "relationshipType": "DESCRIBES", + }) + seen_describes.add(key) + continue # drop the original rel with the removed source + + kept_rels.append(rel) + + # Ensure every kept package has at least a DESCRIBES from the document + kept_pkg_ids = {p["SPDXID"] for p in kept_packages} + described = {r["relatedSpdxElement"] + for r in kept_rels if r["relationshipType"] == "DESCRIBES" + and r["spdxElementId"] == "SPDXRef-DOCUMENT"} + for spdx_id in kept_pkg_ids - described: + kept_rels.append({ + "spdxElementId": "SPDXRef-DOCUMENT", + "relatedSpdxElement": spdx_id, + "relationshipType": "DESCRIBES", + }) + + doc["relationships"] = kept_rels + + with open(output_path, "w", encoding="utf-8") as f: + json.dump(doc, f, indent=2, ensure_ascii=False) + f.write("\n") + + print(f"Patched SBOM written to {output_path}") + print(f" name : {name}") + print(f" namespace : {namespace}") + print(f" created : {now}") + print(f" creators : Organization: {org} | Tool: {tool}") + print(f" packages : {len(kept_packages)} kept, {len(removed_spdxids)} removed") + + +def main() -> None: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--input", required=True) + p.add_argument("--output", required=True) + p.add_argument("--name", required=True, help="Clean package name e.g. spotfire-2.5.0") + p.add_argument("--namespace", required=True) + p.add_argument("--org", required=True) + p.add_argument("--tool", required=True) + args = p.parse_args() + patch(args.input, args.output, args.name, args.namespace, args.org, args.tool) + + +if __name__ == "__main__": + main() + diff --git a/.github/workflows/sbom.yaml b/.github/workflows/sbom.yaml new file mode 100644 index 0000000..c56492b --- /dev/null +++ b/.github/workflows/sbom.yaml @@ -0,0 +1,242 @@ +name: Generate SBOM + +on: + # Runs after build.yaml completes successfully on main — no duplicate build + workflow_run: + workflows: ["Build and Test Package"] + types: [completed] + branches: [main] + # Always run on release so SBOMs are attached to published releases + release: + types: [published] + # Allow manual trigger for any branch + workflow_dispatch: + +permissions: + contents: write # dependency-submission API + release asset upload + actions: read # needed to download artifacts from the triggering workflow_run + id-token: write # sigstore attestation + +# Shared values that appear in every SBOM's creationInfo / documentNamespace +env: + TRIVY_VERSION: "0.69.3" + SBOM_ORG: "Cloud Software Group, Inc., Spotfire" + SBOM_NS_BASE: "https://spotfire.com/spdx" + +jobs: + # ── 1. Read config (no build) ─────────────────────────────────────────────── + setup: + name: Read Config + if: > + github.event_name == 'release' || + github.event_name == 'workflow_dispatch' || + github.event.workflow_run.conclusion == 'success' + runs-on: ubuntu-latest + outputs: + python-versions: ${{ steps.dynamic.outputs.pythons }} + steps: + - uses: actions/checkout@v4 + - name: Read python-versions + id: dynamic + run: | + echo -n "pythons=" >> $GITHUB_OUTPUT + cat .github/python-versions.json >> $GITHUB_OUTPUT + + # ── 2. SBOM for the sdist ────────────────────────────────────────────────── + sbom-sdist: + name: SBOM – Source Distribution + needs: setup + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive # needed for vendor/sbdf-c when building/installing sdist + + # workflow_run: reuse artifact from build.yaml — no rebuild + - name: Download sdist (from workflow_run) + if: github.event_name == 'workflow_run' + uses: actions/download-artifact@v4 + with: + name: sdist + path: dist + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + # push / release / workflow_dispatch: build fresh + - name: Set Up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Build sdist + if: github.event_name != 'workflow_run' + run: | + pip install build + python -m build --sdist + + # Install the sdist into an isolated venv so we can freeze its dependencies + - name: Install sdist into scan-env + run: | + python -m venv scan-env + scan-env/bin/pip install --quiet dist/spotfire-*.tar.gz + + - name: Set SBOM metadata + id: meta + run: | + PKG_NAME=$(ls dist/spotfire-*.tar.gz | sed 's|dist/||;s|\.tar\.gz||') + CREATED=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + echo "pkg_name=$PKG_NAME" >> $GITHUB_OUTPUT + echo "namespace=${{ env.SBOM_NS_BASE }}/$PKG_NAME/$CREATED" >> $GITHUB_OUTPUT + + - name: Install Trivy ${{ env.TRIVY_VERSION }} + run: | + curl -sSfL https://github.com/aquasecurity/trivy/releases/download/v${{ env.TRIVY_VERSION }}/trivy_${{ env.TRIVY_VERSION }}_Linux-64bit.tar.gz \ + | tar -xz trivy + sudo mv trivy /usr/local/bin/trivy + + # Freeze installed packages into requirements.txt then scan for licenses + # This avoids Trivy picking up test eggs / synthetic filesystem packages + - name: Trivy scan → raw SPDX JSON + run: | + scan-env/bin/pip freeze > requirements.txt + trivy fs \ + --scanners license \ + --license-full \ + --format spdx-json \ + --output _trivy_raw.spdx.json \ + --quiet \ + requirements.txt + rm requirements.txt + + - name: Patch SBOM metadata and strip annotations + run: | + python .github/scripts/patch_sbom.py \ + --input _trivy_raw.spdx.json \ + --output spotfire-sdist.sbom.spdx.json \ + --namespace "${{ steps.meta.outputs.namespace }}" \ + --name "${{ steps.meta.outputs.pkg_name }}" \ + --org "${{ env.SBOM_ORG }}" \ + --tool "trivy-${{ env.TRIVY_VERSION }}" + + - name: Upload SBOM artifact + uses: actions/upload-artifact@v4 + with: + name: sbom-sdist + path: spotfire-sdist.sbom.spdx.json + + # ── 3. SBOM for each wheel ───────────────────────────────────────────────── + sbom-wheel: + name: SBOM – Wheel (${{ matrix.python-version }}) + needs: setup + runs-on: ubuntu-latest # Linux only — Windows wheels cannot be cross-compiled + strategy: + matrix: + python-version: ${{ fromJson(needs.setup.outputs.python-versions) }} + fail-fast: false + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive # needed for vendor/sbdf-c when building wheel fresh + + # workflow_run: reuse the ubuntu wheel artifact from build.yaml — no rebuild + - name: Download wheel (from workflow_run) + if: github.event_name == 'workflow_run' + uses: actions/download-artifact@v4 + with: + name: wheel-${{ matrix.python-version }}-ubuntu-latest + path: dist + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + # push / release / workflow_dispatch: build fresh on Linux + - name: Set Up Python + if: github.event_name != 'workflow_run' + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Build wheel + if: github.event_name != 'workflow_run' + run: | + git submodule update --init --recursive + pip install auditwheel build setuptools + python -m build --sdist + tar xzf dist/spotfire-*.tar.gz + cd spotfire-* + python -m build --wheel + auditwheel repair -w ../dist --plat manylinux2014_x86_64 dist/*.whl + + # Install wheel into isolated venv so we can freeze its dependencies + - name: Install wheel into scan-env + run: | + python -m venv scan-env + scan-env/bin/pip install --quiet dist/spotfire-*.whl + + - name: Set SBOM metadata + id: meta + run: | + PKG_NAME=$(ls dist/spotfire-*.whl | sed 's|dist/||;s|\.whl||' | cut -d- -f1,2) + CREATED=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + echo "pkg_name=$PKG_NAME" >> $GITHUB_OUTPUT + echo "namespace=${{ env.SBOM_NS_BASE }}/$PKG_NAME/$CREATED" >> $GITHUB_OUTPUT + + - name: Install Trivy ${{ env.TRIVY_VERSION }} + run: | + curl -sSfL https://github.com/aquasecurity/trivy/releases/download/v${{ env.TRIVY_VERSION }}/trivy_${{ env.TRIVY_VERSION }}_Linux-64bit.tar.gz \ + | tar -xz trivy + sudo mv trivy /usr/local/bin/trivy + + # Freeze installed packages into requirements.txt then scan for licenses + # This avoids Trivy picking up test eggs / synthetic filesystem packages + - name: Trivy scan → raw SPDX JSON + run: | + scan-env/bin/pip freeze > requirements.txt + trivy fs \ + --scanners license \ + --license-full \ + --format spdx-json \ + --output _trivy_raw.spdx.json \ + --quiet \ + requirements.txt + rm requirements.txt + + - name: Patch SBOM metadata and strip annotations + run: | + python .github/scripts/patch_sbom.py \ + --input _trivy_raw.spdx.json \ + --output spotfire-wheel-${{ matrix.python-version }}.sbom.spdx.json \ + --namespace "${{ steps.meta.outputs.namespace }}" \ + --name "${{ steps.meta.outputs.pkg_name }}" \ + --org "${{ env.SBOM_ORG }}" \ + --tool "trivy-${{ env.TRIVY_VERSION }}" + + - name: Upload SBOM artifact + uses: actions/upload-artifact@v4 + with: + name: sbom-wheel-${{ matrix.python-version }} + path: spotfire-wheel-${{ matrix.python-version }}.sbom.spdx.json + + # ── 4. Attach SBOMs to GitHub Release ────────────────────────────────────── + attach-to-release: + name: Attach SBOMs to Release + if: github.event_name == 'release' + needs: [sbom-sdist, sbom-wheel] + runs-on: ubuntu-latest + steps: + - name: Download all SBOM artifacts + uses: actions/download-artifact@v4 + with: + pattern: sbom-* + path: all-sboms + merge-multiple: true + + - name: List SBOMs + run: find all-sboms -name "*.spdx.json" | sort + + # gh CLI is pre-installed on all GitHub-hosted runners — no third-party action needed + - name: Upload SBOMs to release + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh release upload "${{ github.event.release.tag_name }}" \ + $(find all-sboms -name "*.spdx.json" | tr '\n' ' ') \ + --repo "${{ github.repository }}" \ + --clobber