strands-labs · sundargthb · May 21, 2026 · May 20, 2026 · May 20, 2026 · May 20, 2026
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -0,0 +1,42 @@
+name: CodeQL
+
+on:
+  push:
+    branches: [main, dev]
+  pull_request:
+    branches: [main, dev]
+  schedule:
+    # Weekly scan on Monday 06:00 UTC catches drift even when no PRs land.
+    - cron: '0 6 * * 1'
+
+jobs:
+  analyze:
+    name: Analyze (Python)
+    runs-on: ubuntu-latest
+    permissions:
+      security-events: write
+      contents: read
+      actions: read
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
+        with:
+          persist-credentials: false
+
+      - name: Initialize CodeQL
+        uses: github/codeql-action/init@4e828ff8d448a8a6e532957b1811f387a63867e8  # v3.29.4
+        with:
+          languages: python
+          # security-and-quality covers taint flows (LLM input -> subprocess /
+          # XML / paths) plus broader quality lints. Catches the bug class
+          # surfaced in PR #85 (MJCF interpolation) and PR #90 (subprocess
+          # injection / path traversal).
+          queries: security-and-quality
+
+      - name: Perform CodeQL Analysis
+        uses: github/codeql-action/analyze@4e828ff8d448a8a6e532957b1811f387a63867e8  # v3.29.4
+        with:
+          category: "/language:python"
+          # Findings appear in the Security tab; PRs are not blocked on
+          # CodeQL alerts (false-positive rate makes hard-fail noisy).
diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml
@@ -0,0 +1,38 @@
+version: 2
+updates:
+  # Python dependencies — heavy ML stack pulls in long transitive trees.
+  # Grouped so the torch / lerobot / transformers cluster doesn't generate
+  # a separate PR per package.
+  - package-ecosystem: pip
+    directory: /
+    schedule:
+      interval: weekly
+      day: monday
+    open-pull-requests-limit: 5
+    groups:
+      ml-stack:
+        patterns:
+          - "torch*"
+          - "lerobot*"
+          - "transformers*"
+          - "huggingface-hub*"
+      sim-stack:
+        patterns:
+          - "mujoco*"
+          - "imageio*"
+          - "robot_descriptions*"
+      dev-tools:
+        patterns:
+          - "ruff"
+          - "mypy"
+          - "pytest*"
+
+  # GitHub Actions — keeps SHA pins fresh automatically. Without this entry,
+  # SHA pinning becomes manual maintenance. With it, every action update
+  # arrives as a pre-pinned PR.
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: weekly
+      day: monday
+    open-pull-requests-limit: 5
diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
@@ -0,0 +1,29 @@
+name: Dependency Review
+
+on:
+  pull_request:
+    branches: [main, dev]
+
+permissions:
+  contents: read
+  pull-requests: write   # so the action can comment summary on failure
+
+jobs:
+  dependency-review:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
+        with:
+          persist-credentials: false
+
+      - name: Dependency Review
+        uses: actions/dependency-review-action@da24556b548a50705dd671f47852072ea4c105d9  # v4.7.1
+        with:
+          # Hard fail: a new dependency with a known high or critical CVE.
+          # This is the only thing in the security baseline that blocks PRs.
+          fail-on-severity: high
+          # License changes are surfaced but do not block. Apache-2.0 is the
+          # project license; if a transitive flips to GPL we want eyes on it,
+          # not an auto-block.
+          comment-summary-in-pr: on-failure
diff --git a/.github/workflows/llm-input-safety.yml b/.github/workflows/llm-input-safety.yml
@@ -0,0 +1,77 @@
+name: LLM Input Safety Check
+
+# Surfaces an annotation on PRs touching agent-callable code so reviewers
+# eyeball the validation. Intentionally warn-only: regex matching has FPs,
+# and this should not block agents from shipping. The signal is "review
+# this carefully", not "don't merge".
+
+on:
+  pull_request:
+    branches: [main, dev]
+    paths:
+      - 'strands_robots/tools/**.py'
+      - 'strands_robots/simulation/**.py'
+      - 'strands_robots/mesh/**.py'
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  scan:
+    name: Scan agent-callable code for unvalidated LLM input
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
+        with:
+          persist-credentials: false
+          fetch-depth: 2  # need previous commit for diff context
+
+      - name: Find subprocess calls with f-strings in agent-callable code
+        run: |
+          set +e  # don't fail the workflow; we annotate instead
+
+          MATCHES=$(grep -rnE \
+            'subprocess\.(run|Popen|call|check_output|check_call)\([^)]*f["\047]' \
+            --include='*.py' \
+            strands_robots/tools/ \
+            strands_robots/simulation/ \
+            strands_robots/mesh/ 2>/dev/null || true)
+
+          if [ -n "$MATCHES" ]; then
+            echo "::warning::Found subprocess call(s) with f-string interpolation in agent-callable code. Verify inputs are validated via an allowlist or regex before interpolation — see AGENTS.md PR #85/#90 review learnings."
+            echo ""
+            echo "Locations:"
+            echo "$MATCHES" | while IFS= read -r line; do
+              file=$(echo "$line" | cut -d: -f1)
+              lineno=$(echo "$line" | cut -d: -f2)
+              echo "::warning file=${file},line=${lineno}::subprocess + f-string — confirm LLM input is validated"
+            done
+          fi
+
+      - name: Find string-concat into MJCF/XML in agent-callable code
+        run: |
+          set +e
+
+          # MJCF/XML built via f-string or % formatting that includes a name-like
+          # variable. Heuristic — not exhaustive. Match the patterns surfaced
+          # in PR #85 review.
+          MATCHES=$(grep -rnE \
+            '(\.format\([^)]*name|f["\047]<[a-z]+[^"]*\{[a-z_]*name)' \
+            --include='*.py' \
+            strands_robots/simulation/ 2>/dev/null || true)
+
+          if [ -n "$MATCHES" ]; then
+            echo "::warning::Found string interpolation of a name-like variable into XML/MJCF. Validate against ^[a-zA-Z0-9_-]+$ before interpolation — see AGENTS.md PR #85 review learning on sanitizing user inputs into XML."
+            echo "$MATCHES" | while IFS= read -r line; do
+              file=$(echo "$line" | cut -d: -f1)
+              lineno=$(echo "$line" | cut -d: -f2)
+              echo "::warning file=${file},line=${lineno}::name-like variable interpolated into XML — confirm sanitization"
+            done
+          fi
+
+      - name: Always pass
+        # This workflow does not fail PRs. Its purpose is reviewer attention
+        # via inline annotations. The check itself always succeeds so it
+        # appears green in the PR status list.
+        run: echo "LLM input safety scan complete — see annotations (if any)"
diff --git a/.github/workflows/pr-and-push.yml b/.github/workflows/pr-and-push.yml
@@ -16,4 +16,4 @@ jobs:
     permissions:
       contents: read
     with:
-      ref: ${{ github.event.pull_request.head.sha || github.sha }}
+      ref: ${{ github.event.pull_request.head.sha || github.sha }}
diff --git a/.github/workflows/pypi-publish-on-release.yml b/.github/workflows/pypi-publish-on-release.yml
@@ -14,28 +14,29 @@ jobs:
       ref: ${{ github.event.release.target_commitish }}
 
   build:
-    name: Build distribution 📦
+    name: Build distribution
     permissions:
       contents: read
     needs:
       - call-test-lint
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
         with:
           persist-credentials: false
           fetch-depth: 0
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
         with:
           python-version: '3.12'
 
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           pip install hatch twine
+
       - name: Validate version
         run: |
           version=$(hatch version)
@@ -46,11 +47,13 @@ jobs:
               echo "Invalid version format"
               exit 1
           fi
+
       - name: Build
         run: |
           hatch build
+
       - name: Store the distribution packages
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4.6.2
         with:
           name: python-package-distributions
           path: dist/
@@ -69,9 +72,10 @@ jobs:
 
     steps:
       - name: Download all the dists
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4.3.0
         with:
           name: python-package-distributions
           path: dist/
-      - name: Publish distribution 📦 to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
+
+      - name: Publish distribution to PyPI
+        uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc  # release/v1
diff --git a/.github/workflows/test-lint.yml b/.github/workflows/test-lint.yml
@@ -15,13 +15,13 @@ jobs:
       contents: read
     steps:
       - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683  # v4.2.2
         with:
           ref: ${{ inputs.ref }}
           persist-credentials: false
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
         with:
           python-version: '3.12'
           cache: 'pip'
@@ -42,4 +42,4 @@ jobs:
       - name: Run tests
         env:
           MUJOCO_GL: osmesa
-        run: hatch run test -x --strict-markers
+        run: hatch run test -x --strict-markers
diff --git a/AGENTS.md b/AGENTS.md
@@ -194,3 +194,48 @@ Corrections from code review that apply to all future contributions:
 - **Resolve threads as you fix them** - leaving 14 unresolved threads on a PR with all fixes pushed makes re-review painful. Mark threads resolved when the commit lands; reviewers can re-open if not satisfied.
 - **Reference commits in resolution comments** - "Fixed in `376376b`" + the suggested code block is dramatically faster to re-review than "fixed".
 - **Force-push invalidates approvals** - after a rebase, prior `APPROVED` reviews drop to `DISMISSED` automatically. Mention it in the PR comment so reviewers know to re-approve, not re-review the whole diff.
+
+## Review Learnings (PR #92 - CI Security Baseline)
+
+Corrections from code review that apply to all future contributions:
+
+### LLM Input Safety
+- **Validate before subprocess interpolation** — every parameter on an agent-callable
+  tool (`@tool` decorated function, `AgentTool.stream` dispatch handler) that flows
+  into `subprocess.run`, `subprocess.Popen`, MJCF / XML interpolation, or filesystem
+  path construction MUST be validated up front via regex allowlist, enum match, or
+  range check. Argv-style subprocess does not exempt you — defense-in-depth.
+- **Centralise validation in one function** — pattern: a `validate_inputs(...)` helper
+  at the top of the tool module that takes every user-supplied param as a keyword arg
+  and raises `ValueError` with a clear message on any rejection. Single entry-point
+  is independently testable. PR #90's `gr00t_inference.validate_inputs()` is the
+  canonical example.
+- **Allowlist enumerable values** — `data_config`, `embodiment_tag`, dtype strings,
+  container names: all match `^[a-z][a-z0-9_]+$` or an explicit `{"fp16", "fp8", ...}`
+  set. Never accept arbitrary strings into enumerable surfaces.
+- **Reject shell metacharacters in paths** — `;`, `|`, `$`, backticks, `>`, `<`,
+  `\n`, `\r`, `\x00`. Also reject `..` path traversal components. Apply even when
+  using argv-style subprocess.
+- **Bind to `127.0.0.1` by default**, not `0.0.0.0`. Users explicitly opt into
+  network exposure.
+
+### CI Security Baseline
+- **CodeQL findings are not PR-blocking but ARE actionable** — check the Security
+  tab after pushing to a branch. False-positives get dismissed with a reason;
+  real findings get fixed.
+- **Dependency Review hard-fails on high/critical CVEs in new deps.** If a PR
+  needs a dep with a known critical CVE, the conversation is "do we need this
+  dep" not "let's bypass the check."
+- **The LLM-input-safety workflow is a hint, not a gate.** Inline annotations
+  on `subprocess + f-string` and `name-into-XML` patterns flag code that needs
+  validation review. Confirm validation is present, then ignore the annotation
+  in review.
+
+### Action Pinning
+- **All `uses:` references in workflows pin to a full 40-character commit SHA**,
+  with the version tag preserved as a trailing comment: `uses: actions/checkout@<sha>  # v4.2.2`.
+- **Dependabot keeps these fresh** via the `github-actions` ecosystem entry.
+  Do not manually bump tags; merge the Dependabot PR.
+- **Especially `pypa/gh-action-pypi-publish`** — it uses a moving `release/v1`
+  branch, which is exactly the supply-chain pattern that the `tj-actions/changed-files`
+  incident exploited. This pin is non-negotiable.