diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c4c5bf1..a8b5a43 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -101,6 +101,7 @@ generate_matrix: - "*.md" - "docs/**/*" - "result_server/**/*" + - "requirements-result-server.txt" - "config/system_info.csv" when: never - when: always @@ -142,6 +143,7 @@ trigger_child_pipeline: - "*.md" - "docs/**/*" - "result_server/**/*" + - "requirements-result-server.txt" - "config/system_info.csv" when: never - when: always diff --git a/docs/ci.md b/docs/ci.md index d1faa3d..c77a5d5 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -230,6 +230,7 @@ Current skip-oriented patterns include: - `*.md` - `docs/**/*` - `result_server/**/*` +- `requirements-result-server.txt` - `config/system_info.csv` > Synchronization note: this list mirrors the `paths:` entries in diff --git a/result_server/templates/_filter_dropdowns.html b/result_server/templates/_filter_dropdowns.html index f9dbb44..97468dd 100644 --- a/result_server/templates/_filter_dropdowns.html +++ b/result_server/templates/_filter_dropdowns.html @@ -19,3 +19,4 @@ {% endfor %} + diff --git a/result_server/templates/_results_table_cell_code.html b/result_server/templates/_results_table_cell_code.html index ff18fed..f11900f 100644 --- a/result_server/templates/_results_table_cell_code.html +++ b/result_server/templates/_results_table_cell_code.html @@ -1,7 +1,7 @@ -{% if row.source_info and row.source_info.source_type == "git" %} -{{ row.code }}{{ row.quality.label }} -{% elif row.source_info and row.source_info.source_type == "file" %} -{{ row.code }}{{ row.quality.label }} +{% if row.source_link and row.source_link.href %} +{{ row.code }}{{ row.quality.label }} +{% elif row.source_link %} +{{ row.code }}{{ row.quality.label }} {% else %} {{ row.code }}{{ row.quality.label }} {% endif %} diff --git a/result_server/tests/test_portal_list_templates.py b/result_server/tests/test_portal_list_templates.py index 0b28924..6f19725 100644 --- a/result_server/tests/test_portal_list_templates.py +++ b/result_server/tests/test_portal_list_templates.py @@ -173,6 +173,30 @@ def test_pagination_template_urlencodes_filters_without_inline_javascript(): assert 'code" onclick="alert(1)' not in html +def test_code_cell_adds_noopener_to_source_links(): + app = build_portal_shell_app( + templates_dir=os.path.join(os.path.dirname(__file__), "..", "templates"), + ) + with app.test_request_context("/results"): + from flask import render_template + + html = render_template( + "_results_table_cell_code.html", + row={ + "code": "qws", + "source_link": { + "href": "https://example.invalid/repo.git", + "title": "https://example.invalid/repo.git", + }, + "quality": {"level": "ready", "label": "Ready", "summary": "ok"}, + }, + ) + + assert 'target="_blank"' in html + assert 'rel="noopener noreferrer"' in html + assert 'href="https://example.invalid/repo.git"' in html + + def test_estimated_results_template_renders_table_note(): app = build_portal_shell_app( templates_dir=os.path.join(os.path.dirname(__file__), "..", "templates"), diff --git a/result_server/tests/test_script_source_info_security.py b/result_server/tests/test_script_source_info_security.py new file mode 100644 index 0000000..a7d25c0 --- /dev/null +++ b/result_server/tests/test_script_source_info_security.py @@ -0,0 +1,25 @@ +"""Static checks for source_info handoff scripts.""" + +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +def test_result_script_does_not_source_source_info_env(): + result_script = (REPO_ROOT / "scripts" / "result.sh").read_text(encoding="utf-8") + + assert ". results/source_info.env" not in result_script + assert "source results/source_info.env" not in result_script + assert "build_source_info_block" in result_script + assert "jq -n" in result_script + + +def test_bk_fetch_source_writes_encoded_source_info_values(): + bk_functions = (REPO_ROOT / "scripts" / "bk_functions.sh").read_text(encoding="utf-8") + + assert "BK_SOURCE_INFO_FORMAT=base64-v1" in bk_functions + assert "BK_REPO_URL_B64" in bk_functions + assert "BK_FILE_PATH_B64" in bk_functions + assert 'export BK_REPO_URL="$BK_REPO_URL"' not in bk_functions + assert 'export BK_FILE_PATH="$BK_FILE_PATH"' not in bk_functions diff --git a/result_server/tests/test_source_info_properties.py b/result_server/tests/test_source_info_properties.py index 1f668cf..c80481b 100644 --- a/result_server/tests/test_source_info_properties.py +++ b/result_server/tests/test_source_info_properties.py @@ -11,6 +11,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from test_support import install_portal_test_stubs +from utils.result_table_rows import _build_source_link install_portal_test_stubs() @@ -77,3 +78,51 @@ def test_file_source_info_structure_valid_with_md5sum(self, md5sum): assert "file_path" in source_info assert "md5sum" in source_info assert MD5SUM_PATTERN.match(source_info["md5sum"]) is not None + + +def test_git_source_link_allows_http_urls_only(): + source_info = { + "source_type": "git", + "repo_url": "https://github.com/example/repo.git", + } + + link = _build_source_link(source_info) + + assert link["href"] == "https://github.com/example/repo.git" + assert link["title"] == "https://github.com/example/repo.git" + + +def test_git_source_link_rejects_javascript_urls(): + source_info = { + "source_type": "git", + "repo_url": "javascript:alert(1)", + } + + link = _build_source_link(source_info) + + assert link["href"] is None + assert link["title"] == "Repository URL is not linkable" + + +def test_git_source_link_rejects_ambiguous_urls(): + source_info = { + "source_type": "git", + "repo_url": "https://example.invalid\\@evil.invalid/repo.git", + } + + link = _build_source_link(source_info) + + assert link["href"] is None + assert link["title"] == "Repository URL is not linkable" + + +def test_file_source_link_uses_basename_only(): + source_info = { + "source_type": "file", + "file_path": "/sensitive/path/archive.tar.gz", + } + + link = _build_source_link(source_info) + + assert link["href"] is None + assert link["title"] == "archive.tar.gz" diff --git a/result_server/utils/result_table_rows.py b/result_server/utils/result_table_rows.py index db025a5..fabd42f 100644 --- a/result_server/utils/result_table_rows.py +++ b/result_server/utils/result_table_rows.py @@ -1,3 +1,6 @@ +import os +from urllib.parse import urlsplit + from flask import url_for from utils.result_records import ( @@ -13,6 +16,7 @@ def build_result_table_row(json_filename, result_data, padata_filenames): matched_padata = _find_matching_padata_archive(json_filename, result_data, padata_filenames) pipeline_timing = result_data.get("pipeline_timing", {}) source_info = result_data.get("source_info") + source_link = _build_source_link(source_info) profile_data = result_data.get("profile_data") ci_trigger = result_data.get("ci_trigger", "-") or "-" @@ -47,6 +51,7 @@ def build_result_table_row(json_filename, result_data, padata_filenames): "run_job": result_data.get("run_job", "-") or "-", "pipeline_id": pipeline_id, "source_info": source_info, + "source_link": source_link, "source_hash": _format_source_hash(source_info), "quality": summarize_result_quality(result_data), "profile_data": profile_data, @@ -97,6 +102,36 @@ def _format_source_hash(source_info): return "-" +def _build_source_link(source_info): + if not isinstance(source_info, dict): + return None + + source_type = source_info.get("source_type") + if source_type == "git": + repo_url = str(source_info.get("repo_url") or "").strip() + parsed = urlsplit(repo_url) + has_unsafe_chars = any(ch.isspace() for ch in repo_url) or "\\" in repo_url + if parsed.scheme in {"http", "https"} and parsed.netloc and not has_unsafe_chars: + return { + "href": repo_url, + "title": repo_url, + } + return { + "href": None, + "title": "Repository URL is not linkable", + } + + if source_type == "file": + file_path = str(source_info.get("file_path") or "").strip() + filename = os.path.basename(file_path) if file_path else "source archive" + return { + "href": None, + "title": filename or "source archive", + } + + return None + + def _format_profile_summary(profile_data): if not isinstance(profile_data, dict) or not profile_data: return "-" diff --git a/scripts/bk_functions.sh b/scripts/bk_functions.sh index c73a69a..1bcdb33 100644 --- a/scripts/bk_functions.sh +++ b/scripts/bk_functions.sh @@ -1250,6 +1250,43 @@ bk_emit_overlap() { bk_emit_section "overlap:${_bk_ovl_sections}" "$_bk_ovl_time" "$_bk_ovl_package" "$_bk_ovl_artifact" --type overlap --members "$_bk_ovl_sections" } +bk_base64_encode_value() { + if command -v base64 >/dev/null 2>&1; then + printf '%s' "$1" | base64 | tr -d '\r\n' + return 0 + fi + if command -v openssl >/dev/null 2>&1; then + printf '%s' "$1" | openssl base64 -A | tr -d '\r\n' + return 0 + fi + echo "bk_base64_encode_value: neither base64 nor openssl found" >&2 + return 1 +} + +bk_write_source_info_env() { + _bk_source_type="$1" + _bk_repo_url="${2:-}" + _bk_branch="${3:-}" + _bk_commit_hash="${4:-}" + _bk_file_path="${5:-}" + _bk_md5sum="${6:-}" + + if ! command -v base64 >/dev/null 2>&1 && ! command -v openssl >/dev/null 2>&1; then + echo "bk_write_source_info_env: neither base64 nor openssl found" >&2 + return 1 + fi + + { + printf 'BK_SOURCE_INFO_FORMAT=base64-v1\n' + printf 'BK_SOURCE_TYPE_B64=%s\n' "$(bk_base64_encode_value "$_bk_source_type")" + printf 'BK_REPO_URL_B64=%s\n' "$(bk_base64_encode_value "$_bk_repo_url")" + printf 'BK_BRANCH_B64=%s\n' "$(bk_base64_encode_value "$_bk_branch")" + printf 'BK_COMMIT_HASH_B64=%s\n' "$(bk_base64_encode_value "$_bk_commit_hash")" + printf 'BK_FILE_PATH_B64=%s\n' "$(bk_base64_encode_value "$_bk_file_path")" + printf 'BK_MD5SUM_B64=%s\n' "$(bk_base64_encode_value "$_bk_md5sum")" + } > results/source_info.env +} + # bk_fetch_source - Fetch source code and collect metadata. # # Usage: @@ -1273,7 +1310,7 @@ bk_emit_overlap() { # BK_MD5SUM - (file) Full 32-char md5sum # # Side effects: -# Writes results/source_info.env in export format +# Writes results/source_info.env as data, not executable shell # # Returns: # 0 - success @@ -1331,13 +1368,7 @@ bk_fetch_source() { export BK_BRANCH BK_COMMIT_HASH - # Write results/source_info.env - cat > results/source_info.env < results/source_info.env </dev/null || true } -# Read source_info.env if it exists (written by bk_fetch_source in build stage) -source_info_block="null" -if [ -f results/source_info.env ]; then - . results/source_info.env - if [ "$BK_SOURCE_TYPE" = "git" ]; then - source_info_block=$(cat </dev/null 2>&1 /dev/null 2>&1 /dev/null 2>&1 /dev/null 2>&1; then + openssl base64 -d -A + return $? + fi + return 1 +} + +source_info_env_value() { + local key="$1" + local line + line=$(awk -F= -v k="${key}_B64" '$1 == k {print substr($0, length(k) + 2); exit}' results/source_info.env) + if [ -n "$line" ]; then + printf '%s' "$line" | decode_base64_value 2>/dev/null || true + return 0 + fi + + # Legacy fallback for old source_info.env files. Treat the file as data and + # accept only simple quoted values; never source it as shell. + awk -v key="$key" ' + index($0, "export " key "=\"") == 1 && substr($0, length($0), 1) == "\"" { + prefix = "export " key "=\"" + value = substr($0, length(prefix) + 1, length($0) - length(prefix) - 1) + if (value !~ /[`$\\]/) { + print value + } + exit + } + ' results/source_info.env +} + +build_source_info_block() { + if [ ! -f results/source_info.env ]; then + printf '%s' "null" + return 0 + fi + + local source_type + source_type=$(source_info_env_value BK_SOURCE_TYPE) + + if [ "$source_type" = "git" ]; then + jq -n \ + --arg source_type "git" \ + --arg repo_url "$(source_info_env_value BK_REPO_URL)" \ + --arg branch "$(source_info_env_value BK_BRANCH)" \ + --arg commit_hash "$(source_info_env_value BK_COMMIT_HASH)" \ + '{source_type: $source_type, repo_url: $repo_url, branch: $branch, commit_hash: $commit_hash}' + return 0 + fi + + if [ "$source_type" = "file" ]; then + jq -n \ + --arg source_type "file" \ + --arg file_path "$(source_info_env_value BK_FILE_PATH)" \ + --arg md5sum "$(source_info_env_value BK_MD5SUM)" \ + '{source_type: $source_type, file_path: $file_path, md5sum: $md5sum}' + return 0 + fi + + printf '%s' "null" +} + +# Read source_info.env if it exists (written by bk_fetch_source in build stage). +# It is parsed as data and converted with jq; it is never sourced as shell. +source_info_block=$(build_source_info_block) # Function to write a Result_JSON file for one FOM block # Arguments: $1=index, uses global vars: code, system, fom, fom_version, exp, node_count, numproc_node, description, confidential, sections_json, overlaps_json