Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions routes/main_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,34 @@ def recommend():

return jsonify({"projects": results}), 200

@main.route("/api/project/<int:project_id>/resources")
def project_resources(project_id):
"""Return the validated resource list for a project.

Each resource is parsed from its raw "Label: URL" string format and
returned as a structured object so the frontend can render broken
links differently from valid ones.

Response shape:
{
"project_id": 1,
"resources": [
{"label": "Python official docs", "url": "https://docs.python.org", "valid": true},
{"label": "Broken link", "url": "not-a-url", "valid": false}
]
}
"""
from utils.url_validator import validate_resources

project = find_project_by_id(project_id)
if not project:
return jsonify({"error": "Project not found."}), 404

validated = validate_resources(project.get("resources", []))
return jsonify({
"project_id": project_id,
"resources": validated
}), 200

@main.route("/project/<int:project_id>")
def project_detail(project_id):
Expand Down
267 changes: 267 additions & 0 deletions tests/test_resource_url_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
# tests/test_resource_url_validation.py
# Tests for bug #781 — learning resource URLs must be validated and
# served as structured objects so broken links can be handled gracefully.
#
# Run with: python -m pytest tests/test_resource_url_validation.py -v

import sys
import os
import pytest

sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from utils.url_validator import is_valid_url, parse_resource, validate_resource, validate_resources
from utils.data_loader import load_all_projects, clear_cache
from app import app


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------

@pytest.fixture
def client():
app.config["TESTING"] = True
with app.test_client() as c:
yield c


@pytest.fixture(autouse=True)
def reset_cache():
clear_cache()
yield
clear_cache()


# ---------------------------------------------------------------------------
# is_valid_url
# ---------------------------------------------------------------------------

def test_valid_https_url():
assert is_valid_url("https://docs.python.org") is True

def test_valid_http_url():
assert is_valid_url("http://example.com") is True

def test_valid_url_with_path():
assert is_valid_url("https://docs.python.org/3/library/csv.html") is True

def test_valid_url_with_query():
assert is_valid_url("https://example.com/search?q=python") is True

def test_valid_url_with_fragment():
assert is_valid_url("https://example.com/page#section") is True

def test_valid_url_with_port():
assert is_valid_url("https://localhost:5000/api") is True

def test_invalid_bare_domain():
assert is_valid_url("example.com") is False

def test_invalid_ftp_scheme():
assert is_valid_url("ftp://example.com") is False

def test_invalid_mailto():
assert is_valid_url("mailto:test@example.com") is False

def test_invalid_empty_string():
assert is_valid_url("") is False

def test_invalid_none():
assert is_valid_url(None) is False

def test_invalid_plain_text():
assert is_valid_url("not a url at all") is False

def test_invalid_missing_scheme():
assert is_valid_url("//example.com/path") is False


# ---------------------------------------------------------------------------
# parse_resource
# ---------------------------------------------------------------------------

def test_parse_label_and_url():
result = parse_resource("Python official docs: https://docs.python.org")
assert result["label"] == "Python official docs"
assert result["url"] == "https://docs.python.org"

def test_parse_url_only():
result = parse_resource("https://realpython.com")
assert result["label"] == "https://realpython.com"
assert result["url"] == "https://realpython.com"

def test_parse_label_with_https_url_and_path():
result = parse_resource("CSV module guide: https://docs.python.org/3/library/csv.html")
assert result["label"] == "CSV module guide"
assert result["url"] == "https://docs.python.org/3/library/csv.html"

def test_parse_does_not_split_on_url_colon():
"""Colons inside the URL (e.g. https://) must not split the label."""
result = parse_resource("MDN Fetch API: https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API")
assert result["label"] == "MDN Fetch API"
assert "developer.mozilla.org" in result["url"]

def test_parse_empty_string():
result = parse_resource("")
assert result["label"] == ""
assert result["url"] == ""

def test_parse_none():
result = parse_resource(None)
assert result["label"] == ""
assert result["url"] == ""

def test_parse_strips_whitespace():
result = parse_resource(" Label : https://example.com ")
assert result["url"] == "https://example.com"


# ---------------------------------------------------------------------------
# validate_resource
# ---------------------------------------------------------------------------

def test_validate_resource_valid():
result = validate_resource("Python docs: https://docs.python.org")
assert result["valid"] is True
assert result["label"] == "Python docs"
assert result["url"] == "https://docs.python.org"

def test_validate_resource_invalid_url():
result = validate_resource("Broken link: not-a-url")
assert result["valid"] is False

def test_validate_resource_empty():
result = validate_resource("")
assert result["valid"] is False

def test_validate_resource_has_all_keys():
result = validate_resource("Label: https://example.com")
assert "label" in result
assert "url" in result
assert "valid" in result


# ---------------------------------------------------------------------------
# validate_resources (list)
# ---------------------------------------------------------------------------

def test_validate_resources_all_valid():
raw = [
"Python docs: https://docs.python.org",
"MDN: https://developer.mozilla.org",
]
results = validate_resources(raw)
assert len(results) == 2
assert all(r["valid"] for r in results)

def test_validate_resources_mixed():
raw = [
"Good link: https://docs.python.org",
"Bad link: not-a-url",
]
results = validate_resources(raw)
assert results[0]["valid"] is True
assert results[1]["valid"] is False

def test_validate_resources_empty_list():
assert validate_resources([]) == []

def test_validate_resources_not_a_list():
assert validate_resources(None) == []


# ---------------------------------------------------------------------------
# All resources in projects.json must have valid URL format
# ---------------------------------------------------------------------------

def test_all_project_resources_have_valid_urls():
"""Every resource URL in projects.json must pass format validation."""
from utils.url_validator import parse_resource, is_valid_url
projects = load_all_projects()
broken = []
for project in projects:
for raw in project.get("resources", []):
parsed = parse_resource(raw)
url = parsed.get("url", "")
if url and not is_valid_url(url):
broken.append((project["id"], project["title"], url))

assert broken == [], (
"Malformed resource URLs found in projects.json:\n" +
"\n".join(f" project id={pid} '{title}': {url}" for pid, title, url in broken)
)

def test_all_projects_have_at_least_one_resource():
"""Every project must have at least one learning resource."""
projects = load_all_projects()
missing = [p for p in projects if not p.get("resources")]
assert missing == [], (
"Projects with no resources: " +
", ".join(str(p["id"]) for p in missing)
)

def test_all_resource_strings_are_non_empty():
"""No resource entry should be an empty string."""
projects = load_all_projects()
for project in projects:
for raw in project.get("resources", []):
assert raw.strip(), (
f"Empty resource string in project id={project['id']}"
)


# ---------------------------------------------------------------------------
# /api/project/<id>/resources route
# ---------------------------------------------------------------------------

def test_resources_route_returns_200(client):
response = client.get("/api/project/1/resources")
assert response.status_code == 200

def test_resources_route_returns_json(client):
response = client.get("/api/project/1/resources")
data = response.get_json()
assert data is not None

def test_resources_route_has_project_id(client):
data = client.get("/api/project/1/resources").get_json()
assert "project_id" in data
assert data["project_id"] == 1

def test_resources_route_has_resources_list(client):
data = client.get("/api/project/1/resources").get_json()
assert "resources" in data
assert isinstance(data["resources"], list)

def test_resources_route_each_item_has_label(client):
data = client.get("/api/project/1/resources").get_json()
for item in data["resources"]:
assert "label" in item

def test_resources_route_each_item_has_url(client):
data = client.get("/api/project/1/resources").get_json()
for item in data["resources"]:
assert "url" in item

def test_resources_route_each_item_has_valid_flag(client):
data = client.get("/api/project/1/resources").get_json()
for item in data["resources"]:
assert "valid" in item
assert isinstance(item["valid"], bool)

def test_resources_route_project_1_all_valid(client):
"""Project 1 resources in the dataset must all pass URL validation."""
data = client.get("/api/project/1/resources").get_json()
invalid = [r for r in data["resources"] if not r["valid"]]
assert invalid == [], f"Invalid resources in project 1: {invalid}"

def test_resources_route_invalid_project_returns_404(client):
response = client.get("/api/project/99999/resources")
assert response.status_code == 404
assert "error" in response.get_json()

def test_resources_route_security_headers(client):
response = client.get("/api/project/1/resources")
assert response.headers.get("X-Frame-Options") == "DENY"
assert response.headers.get("X-Content-Type-Options") == "nosniff"
26 changes: 25 additions & 1 deletion utils/data_loader.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
# utils/data_loader.py
import json
import os
import threading
import logging

from utils.url_validator import is_valid_url, parse_resource

DATA_FILE = os.path.join(os.path.dirname(__file__), "..", "data", "projects.json")
_projects_cache = None
_cache_lock = threading.Lock()

logger = logging.getLogger("devpath.data_loader")


def validate_projects(projects):
"""
Validate project dataset integrity.
Expand All @@ -15,9 +22,10 @@ def validate_projects(projects):
- Duplicate project titles (case-insensitive)
- Missing required fields
- Empty required string fields
- Malformed resource URLs (logs a warning, does not raise)

Raises:
ValueError: If any validation rule is violated.
ValueError: If any structural validation rule is violated.
"""
seen_ids = set()
seen_titles = set()
Expand Down Expand Up @@ -51,6 +59,20 @@ def validate_projects(projects):
raise ValueError(f"Duplicate project title found: {project['title']}")
seen_titles.add(title)

# Resource URL format validation — warn, do not raise
# Broken URLs in production are logged so they can be fixed in the
# data file without crashing the application.
for raw in project.get("resources", []):
parsed = parse_resource(raw)
url = parsed.get("url", "")
if url and not is_valid_url(url):
logger.warning(
"Malformed resource URL in project '%s' (id=%s): %r",
project.get("title", "Unknown"),
project_id,
url,
)


def load_all_projects():
"""Read and return the full list of projects from the JSON file.
Expand All @@ -66,11 +88,13 @@ def load_all_projects():
validate_projects(_projects_cache)
return _projects_cache


def get_available_levels():
"""Return all unique project levels."""
projects = load_all_projects()
return sorted({p["level"] for p in projects})


def find_project_by_id(project_id):
"""Return the project whose 'id' matches project_id, or None."""
for project in load_all_projects():
Expand Down
Loading
Loading