Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions data/projects.json
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@
"starter_code": "starter_code/survey_form/index.html"
},
{
"id": 10,
"id": 12,
"title": "API ETL Pipeline",
"skills": ["Python", "pandas", "requests"],
"level": "Intermediate",
Expand Down Expand Up @@ -503,7 +503,7 @@
"starter_code": "starter_code/ai_resume_analyzer.py"
},
{
"id": 11,
"id": 14,
"title": "Number Guessing Game",
"skills": [
"Python"
Expand Down Expand Up @@ -539,7 +539,7 @@
"starter_code": "starter_code/number_guessing.py"
},
{
"id": 12,
"id": 15,
"title": "Simple Email Automation",
"skills": [
"Python"
Expand Down Expand Up @@ -576,7 +576,7 @@
"starter_code": "starter_code/email_automation.py"
},
{
"id": 13,
"id": 16,
"title": "Quiz App",
"skills": [
"HTML",
Expand Down Expand Up @@ -616,7 +616,7 @@
"starter_code": "starter_code/quiz_app.html"
},
{
"id": 14,
"id": 17,
"title": "File Organiser Script",
"skills": ["Python"],
"level": "Beginner",
Expand Down Expand Up @@ -647,7 +647,7 @@
"starter_code": "starter_code/file_organiser.py"
},
{
"id": 15,
"id": 18,
"title": "Flashcard Study App",
"skills": ["HTML", "CSS", "JavaScript"],
"level": "Beginner",
Expand Down Expand Up @@ -678,7 +678,7 @@
"starter_code": "starter_code/flashcard_app.html"
},
{
"id": 16,
"id": 19,
"title": "Budget Tracker Web App",
"skills": ["HTML", "CSS", "JavaScript"],
"level": "Intermediate",
Expand Down Expand Up @@ -710,7 +710,7 @@
"starter_code": "starter_code/budget_tracker.html"
},
{
"id": 17,
"id": 20,
"title": "Network Port Scanner",
"skills": ["Python"],
"level": "Intermediate",
Expand Down Expand Up @@ -741,7 +741,7 @@
"starter_code": "starter_code/port_scanner.py"
},
{
"id": 18,
"id": 21,
"title": "Typing Speed Test",
"skills": ["HTML", "CSS", "JavaScript"],
"level": "Beginner",
Expand Down Expand Up @@ -773,7 +773,7 @@
"starter_code": "starter_code/typing_test.html"
},
{
"id": 19,
"id": 22,
"title": "Course Progress Tracker",
"skills": ["Python"],
"level": "Intermediate",
Expand Down
13 changes: 13 additions & 0 deletions data/skill_graph.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"python": ["flask", "pandas", "requests"],
"javascript": ["node.js", "html", "css"],
"html": ["css", "javascript"],
"css": ["javascript"],
"flask": ["javascript", "html", "css"],
"pandas": ["requests"],
"node.js": ["javascript"],
"java": ["spring", "sql"],
"requests": [],
"spring": [],
"sql": []
}
33 changes: 33 additions & 0 deletions test_recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,39 @@ def section(title):
f"overlap: {[p['title'] for p in overlap]}")
else:
print(" SKIP no recommendations returned, skipping overlap check")


# ---------------------------------------------------------------------------
# Progression (skill graph)
# ---------------------------------------------------------------------------

section("Skill graph progression")

result_prog = get_recommendations("Python", "Intermediate", "Web", "High")

if "progression" in result_prog:
passed("dict has 'progression' key")
else:
failed("dict has 'progression' key", f"keys found: {list(result_prog.keys())}")

prog = result_prog["progression"]
if isinstance(prog, list):
passed(f"progression is a list ({len(prog)} result(s))")
else:
failed("progression is a list", f"got {type(prog)}")

rec_ids = [p["id"] for p in result_prog["recommendations"]]
overlap = [p for p in prog if p["project"]["id"] in rec_ids]
if not overlap:
passed("progression projects don't repeat recommended ones")
else:
failed("progression projects don't repeat recommended ones",
f"overlap: {[p['title'] for p in overlap]}")

if isinstance(prog, list):
passed(f"progression is a list ({len(prog)} result(s))")
for p in prog:
print(f" → {p['project']['title']} (gap_score: {p['gap_score']})")

# ---------------------------------------------------------------------------
# Summary
Expand Down
100 changes: 100 additions & 0 deletions utils/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,103 @@ def score_single_project(project, user_skills, level, interest, time_availabilit

if project_time == user_time:
score += SCORING_WEIGHTS["time"]

graph = _load_skill_graph()
score += gap_boost(user_skills, project_skills, graph)

return score

# ---------------------------------------------------------------------------
# Skill graph helpers
# ---------------------------------------------------------------------------

def _load_skill_graph():
"""Load skill_graph.json from data/. Returns empty dict on failure."""
path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"data", "skill_graph.json"
)
if not os.path.exists(path):
return {}
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, OSError):
return {}


def _hops_to_skill(target, user_skills, graph, max_hops=3):
"""
BFS from every known user skill — find minimum hops to reach target.
Returns None if unreachable within max_hops.
"""
if target in user_skills:
return 0

visited = set(user_skills)
frontier = list(user_skills)

for hop in range(1, max_hops + 1):
next_frontier = []
for skill in frontier:
for neighbour in graph.get(skill, []):
if neighbour == target:
return hop
if neighbour not in visited:
visited.add(neighbour)
next_frontier.append(neighbour)
frontier = next_frontier

return None


def gap_boost(user_skills, project_skills, graph):
"""
For each project skill the user doesn't have,
compute boost based on graph distance.

boost = 1/hops per reachable missing skill
Returns total boost score (float).
"""
boost = 0.0
for skill in project_skills:
if skill not in user_skills:
hops = _hops_to_skill(skill, user_skills, graph)
if hops and hops > 0:
boost += 1.0 / hops
return round(boost, 3)


def get_progression(user_skills, recommended_ids, all_projects, graph):
"""
Return projects that are 1 hop away from user's current skills
but were NOT already recommended.
"""
# Find all 1-hop reachable skills
reachable = set()
for skill in user_skills:
for neighbour in graph.get(skill, []):
reachable.add(neighbour)

progression = []
for project in all_projects:
if project["id"] in recommended_ids:
continue
project_skills = [
SKILL_ALIASES.get(s.lower(), s.lower())
for s in project.get("skills", [])
]
# Project skills must overlap with reachable skills
if any(s in reachable for s in project_skills):
boost = gap_boost(user_skills, project_skills, graph)
progression.append({
"project": project,
"gap_score": boost
})

progression.sort(key=lambda x: x["gap_score"], reverse=True)
return progression[:3]


# ---------------------------------------------------------------------------
# Clustering helpers
Expand Down Expand Up @@ -187,6 +281,8 @@ def get_recommendations(skills_string, level, interest, time_availability):
"""
user_skills = parse_skills(skills_string)
all_projects = load_all_projects()
graph = _load_skill_graph()


scored = []
for project in all_projects:
Expand All @@ -202,14 +298,18 @@ def get_recommendations(skills_string, level, interest, time_availability):

cluster_data = _load_clusters()
related = _get_related(top_ids, all_projects, cluster_data) if cluster_data else []
graph = _load_skill_graph()
progression = get_progression(user_skills, top_ids, all_projects, graph)

return {
"recommendations": top_projects,
"related": related,
"progression": progression,
}


VALID_LEVELS = ["beginner", "intermediate", "advanced"]
VALID_INTERESTS = ["data", "web", "backend", "cybersecurity", "games", "education", "automation"]
VALID_TIME_AVAILABILITY = ["low", "medium", "high"]


Expand Down
Loading