Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .github/workflows/gitleaks-reusable.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Reusable gitleaks secret scan. Callers delegate here:
#
# jobs:
# gitleaks:
# uses: roleme/workflows/.github/workflows/gitleaks-reusable.yml@<sha>
# secrets: inherit
#
# fetch-depth: 0 lets gitleaks scan full history, not just the tip.
name: gitleaks (reusable)

on:
workflow_call: {}

permissions:
contents: read

jobs:
scan:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
fetch-depth: 0
persist-credentials: false
- uses: gitleaks/gitleaks-action@e0c47f4f8be36e29cdc102c57e68cb5cbf0e8d1e # v3
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
59 changes: 59 additions & 0 deletions .github/workflows/privacy-scan-reusable.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Reusable privacy scan: fails if tracked files contain private-infrastructure
# markers (private IPs, .local hosts, host paths, key/secret markers, plus any
# caller-supplied hostnames). Runs scripts/privacy-scan.sh from this repo.
#
# Callers delegate here and pass their own private hostnames via extra-patterns
# (newline-separated extended-regexps). Those patterns stay in the caller's
# (private) repo and run in the caller's (private) Actions context — they are
# NOT hardcoded in this public repo:
#
# jobs:
# privacy-scan:
# uses: roleme/workflows/.github/workflows/privacy-scan-reusable.yml@<sha>
# with:
# extra-patterns: |
# example-private-host\.example
name: privacy-scan (reusable)

on:
workflow_call:
inputs:
extra-patterns:
description: Newline-separated extended-regexps of extra strings to flag (e.g. private hostnames)
required: false
type: string
default: ""

permissions:
contents: read

jobs:
privacy-scan:
runs-on: ubuntu-latest
steps:
# The caller's own code is checked out (default), but the scan SCRIPT must
# come from this workflows repo. Fetch it explicitly so the rules cannot
# be tampered with by the calling repo's working tree.
- name: Checkout caller repo
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6
with:
persist-credentials: false

- name: Fetch scan script from workflows repo
# job.workflow_repository + job.workflow_sha resolve to THIS reusable
# workflow's repo at the exact commit the caller pinned — so the scan
# rules cannot be swapped by the calling repo's working tree, and a
# caller pinned to an older SHA gets that SHA's script (not main).
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6
with:
repository: ${{ job.workflow_repository }}
ref: ${{ job.workflow_sha }}
path: .privacy-scan-tools
persist-credentials: false

- name: Run privacy scan
# extra-patterns is a workflow input (trusted caller config). Pass it via
# env and quote it so it is never spliced into the command line.
env:
EXTRA_PATTERNS: ${{ inputs.extra-patterns }}
run: bash .privacy-scan-tools/scripts/privacy-scan.sh
22 changes: 22 additions & 0 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Lets repos use this script as a local pre-commit hook via the pre-commit
# framework. In a consuming repo's .pre-commit-config.yaml:
#
# repos:
# - repo: https://github.com/roleme/workflows
# rev: <sha-or-tag>
# hooks:
# - id: privacy-scan
# # private hostnames stay in the consuming repo's config, not here:
# args: ["--"]
# # pass extra patterns via env in the hook invocation, e.g.:
# # EXTRA_PATTERNS=$'domovas\\.uk' git commit ...
#
# The scan reads EXTRA_PATTERNS from the environment (same as CI), so private
# hostnames are supplied by the developer/repo, never stored in this public repo.
- id: privacy-scan
name: privacy scan (private-infra markers)
description: Fail the commit if tracked files contain private-infrastructure markers.
entry: scripts/privacy-scan.sh
language: script
pass_filenames: false
always_run: true
91 changes: 91 additions & 0 deletions scripts/privacy-scan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env bash
# privacy-scan.sh — fail if tracked files contain private infrastructure
# markers. Shared by the reusable CI workflow and the local pre-commit hook so
# both enforce the exact same rules.
#
# Scans all tracked files (git ls-files). Built-in patterns are GENERIC and
# safe to live in this PUBLIC repo (private IP ranges, .local hosts, common
# host paths, key/secret markers). Caller-specific private hostnames are NOT
# hardcoded here — a private caller passes them via $EXTRA_PATTERNS
# (newline-separated extended-regexps), so the secret-ish list stays in the
# private repo that runs the scan, never in this public one.
#
# Usage:
# EXTRA_PATTERNS=$'domovas\\.uk\nmininas\\.local' scripts/privacy-scan.sh
#
# Env:
# EXTRA_PATTERNS newline-separated ERE patterns to also flag (optional)
# ALLOW_FILE path to a file listing path globs to skip (optional)
#
# Exits non-zero (and prints offending file:line) if anything matches.
set -euo pipefail

# Generic markers — no private values, safe for a public repo.
builtin_patterns=(
'\b10\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\b' # 10.0.0.0/8
'\b192\.168\.[0-9]{1,3}\.[0-9]{1,3}\b' # 192.168.0.0/16
'\b172\.(1[6-9]|2[0-9]|3[01])\.[0-9]{1,3}\.[0-9]{1,3}\b' # 172.16.0.0/12
'\b[a-z0-9-]+\.local\b' # mDNS/.local hosts
'/home/[a-z0-9_-]+/' # host home paths
'/volume[0-9]+/' # Synology volume paths
'/etc/komodo/' # komodo host config
'ghp_[A-Za-z0-9]{30,}' # GitHub PAT
'github_pat_[A-Za-z0-9_]{30,}' # fine-grained PAT
'xox[baprs]-[A-Za-z0-9-]{10,}' # Slack token
'-----BEGIN[A-Z ]*PRIVATE KEY-----' # private keys
'cli_secret' # komodo cli secret key
)

# Caller-supplied extra patterns (one ERE per line).
mapfile -t extra_patterns < <(printf '%s' "${EXTRA_PATTERNS:-}" | sed '/^[[:space:]]*$/d')

patterns=("${builtin_patterns[@]}" "${extra_patterns[@]}")

# This script defines the patterns, so it would match itself — exclude it, plus
# the lockfile-ish / binary stuff that produces noise. Callers can extend via
# ALLOW_FILE (one path-glob per line).
exclude_globs=(
'*privacy-scan.sh'
'*.png' '*.jpg' '*.jpeg' '*.gif' '*.ico' '*.pdf' '*.lock'
)
if [[ -n "${ALLOW_FILE:-}" && -f "${ALLOW_FILE}" ]]; then
while IFS= read -r line; do
[[ -z "$line" || "$line" == \#* ]] && continue
exclude_globs+=("$line")
done < "${ALLOW_FILE}"
fi

is_excluded() {
local f="$1" g
for g in "${exclude_globs[@]}"; do
# shellcheck disable=SC2053
[[ "$f" == $g ]] && return 0
done
return 1
}

# Build a single alternation for one grep pass per file.
joined=$(printf '%s|' "${patterns[@]}")
joined="${joined%|}"

found=0
while IFS= read -r f; do
is_excluded "$f" && continue
# -I skips binary files; -n gives line numbers; -E extended regex.
if matches=$(grep -InE "$joined" -- "$f" 2>/dev/null); then
found=1
echo "::error file=$f::private-infra marker found" 2>/dev/null || true
while IFS= read -r m; do
echo " $f:$m"
done <<< "$matches"
fi
done < <(git ls-files)

if [[ "$found" -ne 0 ]]; then
echo ""
echo "privacy-scan: found private-infrastructure markers above." >&2
echo "If a match is a false positive, exclude its path via ALLOW_FILE." >&2
exit 1
fi

echo "privacy-scan: clean — no private-infrastructure markers found."