diff --git a/data/projects.json b/data/projects.json index f33cbe0..30365ce 100644 --- a/data/projects.json +++ b/data/projects.json @@ -416,7 +416,7 @@ "starter_code": "starter_code/survey_form/index.html" }, { - "id": 10, + "id": 12, "title": "API ETL Pipeline", "skills": ["Python", "pandas", "requests"], "level": "Intermediate", @@ -503,7 +503,7 @@ "starter_code": "starter_code/ai_resume_analyzer.py" }, { - "id": 11, + "id": 14, "title": "Number Guessing Game", "skills": [ "Python" @@ -539,7 +539,7 @@ "starter_code": "starter_code/number_guessing.py" }, { - "id": 12, + "id": 15, "title": "Simple Email Automation", "skills": [ "Python" @@ -576,7 +576,7 @@ "starter_code": "starter_code/email_automation.py" }, { - "id": 13, + "id": 16, "title": "Quiz App", "skills": [ "HTML", @@ -616,7 +616,7 @@ "starter_code": "starter_code/quiz_app.html" }, { - "id": 14, + "id": 17, "title": "File Organiser Script", "skills": ["Python"], "level": "Beginner", @@ -647,7 +647,7 @@ "starter_code": "starter_code/file_organiser.py" }, { - "id": 15, + "id": 18, "title": "Flashcard Study App", "skills": ["HTML", "CSS", "JavaScript"], "level": "Beginner", @@ -678,7 +678,7 @@ "starter_code": "starter_code/flashcard_app.html" }, { - "id": 16, + "id": 19, "title": "Budget Tracker Web App", "skills": ["HTML", "CSS", "JavaScript"], "level": "Intermediate", @@ -710,7 +710,7 @@ "starter_code": "starter_code/budget_tracker.html" }, { - "id": 17, + "id": 20, "title": "Network Port Scanner", "skills": ["Python"], "level": "Intermediate", @@ -741,7 +741,7 @@ "starter_code": "starter_code/port_scanner.py" }, { - "id": 18, + "id": 21, "title": "Typing Speed Test", "skills": ["HTML", "CSS", "JavaScript"], "level": "Beginner", @@ -773,7 +773,7 @@ "starter_code": "starter_code/typing_test.html" }, { - "id": 19, + "id": 22, "title": "Course Progress Tracker", "skills": ["Python"], "level": "Intermediate", diff --git a/data/skill_graph.json b/data/skill_graph.json new file mode 100644 index 0000000..d244719 --- /dev/null +++ b/data/skill_graph.json @@ -0,0 +1,13 @@ +{ + "python": ["flask", "pandas", "requests"], + "javascript": ["node.js", "html", "css"], + "html": ["css", "javascript"], + "css": ["javascript"], + "flask": ["javascript", "html", "css"], + "pandas": ["requests"], + "node.js": ["javascript"], + "java": ["spring", "sql"], + "requests": [], + "spring": [], + "sql": [] +} \ No newline at end of file diff --git a/test_recommender.py b/test_recommender.py index b25f85b..ba661c0 100644 --- a/test_recommender.py +++ b/test_recommender.py @@ -173,6 +173,39 @@ def section(title): f"overlap: {[p['title'] for p in overlap]}") else: print(" SKIP no recommendations returned, skipping overlap check") + + + # --------------------------------------------------------------------------- +# Progression (skill graph) +# --------------------------------------------------------------------------- + +section("Skill graph progression") + +result_prog = get_recommendations("Python", "Intermediate", "Web", "High") + +if "progression" in result_prog: + passed("dict has 'progression' key") +else: + failed("dict has 'progression' key", f"keys found: {list(result_prog.keys())}") + +prog = result_prog["progression"] +if isinstance(prog, list): + passed(f"progression is a list ({len(prog)} result(s))") +else: + failed("progression is a list", f"got {type(prog)}") + +rec_ids = [p["id"] for p in result_prog["recommendations"]] +overlap = [p for p in prog if p["project"]["id"] in rec_ids] +if not overlap: + passed("progression projects don't repeat recommended ones") +else: + failed("progression projects don't repeat recommended ones", + f"overlap: {[p['title'] for p in overlap]}") + +if isinstance(prog, list): + passed(f"progression is a list ({len(prog)} result(s))") + for p in prog: + print(f" → {p['project']['title']} (gap_score: {p['gap_score']})") # --------------------------------------------------------------------------- # Summary diff --git a/utils/recommender.py b/utils/recommender.py index 111dddd..38fca1b 100644 --- a/utils/recommender.py +++ b/utils/recommender.py @@ -110,9 +110,103 @@ def score_single_project(project, user_skills, level, interest, time_availabilit if project_time == user_time: score += SCORING_WEIGHTS["time"] + + graph = _load_skill_graph() + score += gap_boost(user_skills, project_skills, graph) return score +# --------------------------------------------------------------------------- +# Skill graph helpers +# --------------------------------------------------------------------------- + +def _load_skill_graph(): + """Load skill_graph.json from data/. Returns empty dict on failure.""" + path = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "data", "skill_graph.json" + ) + if not os.path.exists(path): + return {} + try: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + return {} + + +def _hops_to_skill(target, user_skills, graph, max_hops=3): + """ + BFS from every known user skill — find minimum hops to reach target. + Returns None if unreachable within max_hops. + """ + if target in user_skills: + return 0 + + visited = set(user_skills) + frontier = list(user_skills) + + for hop in range(1, max_hops + 1): + next_frontier = [] + for skill in frontier: + for neighbour in graph.get(skill, []): + if neighbour == target: + return hop + if neighbour not in visited: + visited.add(neighbour) + next_frontier.append(neighbour) + frontier = next_frontier + + return None + + +def gap_boost(user_skills, project_skills, graph): + """ + For each project skill the user doesn't have, + compute boost based on graph distance. + + boost = 1/hops per reachable missing skill + Returns total boost score (float). + """ + boost = 0.0 + for skill in project_skills: + if skill not in user_skills: + hops = _hops_to_skill(skill, user_skills, graph) + if hops and hops > 0: + boost += 1.0 / hops + return round(boost, 3) + + +def get_progression(user_skills, recommended_ids, all_projects, graph): + """ + Return projects that are 1 hop away from user's current skills + but were NOT already recommended. + """ + # Find all 1-hop reachable skills + reachable = set() + for skill in user_skills: + for neighbour in graph.get(skill, []): + reachable.add(neighbour) + + progression = [] + for project in all_projects: + if project["id"] in recommended_ids: + continue + project_skills = [ + SKILL_ALIASES.get(s.lower(), s.lower()) + for s in project.get("skills", []) + ] + # Project skills must overlap with reachable skills + if any(s in reachable for s in project_skills): + boost = gap_boost(user_skills, project_skills, graph) + progression.append({ + "project": project, + "gap_score": boost + }) + + progression.sort(key=lambda x: x["gap_score"], reverse=True) + return progression[:3] + # --------------------------------------------------------------------------- # Clustering helpers @@ -187,6 +281,8 @@ def get_recommendations(skills_string, level, interest, time_availability): """ user_skills = parse_skills(skills_string) all_projects = load_all_projects() + graph = _load_skill_graph() + scored = [] for project in all_projects: @@ -202,14 +298,18 @@ def get_recommendations(skills_string, level, interest, time_availability): cluster_data = _load_clusters() related = _get_related(top_ids, all_projects, cluster_data) if cluster_data else [] + graph = _load_skill_graph() + progression = get_progression(user_skills, top_ids, all_projects, graph) return { "recommendations": top_projects, "related": related, + "progression": progression, } VALID_LEVELS = ["beginner", "intermediate", "advanced"] +VALID_INTERESTS = ["data", "web", "backend", "cybersecurity", "games", "education", "automation"] VALID_TIME_AVAILABILITY = ["low", "medium", "high"]