komalharshita · parinaB · Jun 9, 2026
diff --git a/data/projects.json b/data/projects.json
@@ -416,7 +416,7 @@
     "starter_code": "starter_code/survey_form/index.html"
   },
   {
-    "id": 10,
+    "id": 12,
     "title": "API ETL Pipeline",
     "skills": ["Python", "pandas", "requests"],
     "level": "Intermediate",
@@ -503,7 +503,7 @@
     "starter_code": "starter_code/ai_resume_analyzer.py"
   },
   {
-    "id": 11,
+    "id": 14,
     "title": "Number Guessing Game",
     "skills": [
       "Python"
@@ -539,7 +539,7 @@
     "starter_code": "starter_code/number_guessing.py"
   },
   {
-    "id": 12,
+    "id": 15,
     "title": "Simple Email Automation",
     "skills": [
       "Python"
@@ -576,7 +576,7 @@
     "starter_code": "starter_code/email_automation.py"
   },
   {
-    "id": 13,
+    "id": 16,
     "title": "Quiz App",
     "skills": [
       "HTML",
@@ -616,7 +616,7 @@
     "starter_code": "starter_code/quiz_app.html"
   },
   {
-    "id": 14,
+    "id": 17,
     "title": "File Organiser Script",
     "skills": ["Python"],
     "level": "Beginner",
@@ -647,7 +647,7 @@
     "starter_code": "starter_code/file_organiser.py"
   },
   {
-    "id": 15,
+    "id": 18,
     "title": "Flashcard Study App",
     "skills": ["HTML", "CSS", "JavaScript"],
     "level": "Beginner",
@@ -678,7 +678,7 @@
     "starter_code": "starter_code/flashcard_app.html"
   },
   {
-    "id": 16,
+    "id": 19,
     "title": "Budget Tracker Web App",
     "skills": ["HTML", "CSS", "JavaScript"],
     "level": "Intermediate",
@@ -710,7 +710,7 @@
     "starter_code": "starter_code/budget_tracker.html"
   },
   {
-    "id": 17,
+    "id": 20,
     "title": "Network Port Scanner",
     "skills": ["Python"],
     "level": "Intermediate",
@@ -741,7 +741,7 @@
     "starter_code": "starter_code/port_scanner.py"
   },
   {
-    "id": 18,
+    "id": 21,
     "title": "Typing Speed Test",
     "skills": ["HTML", "CSS", "JavaScript"],
     "level": "Beginner",
@@ -773,7 +773,7 @@
     "starter_code": "starter_code/typing_test.html"
   },
   {
-    "id": 19,
+    "id": 22,
     "title": "Course Progress Tracker",
     "skills": ["Python"],
     "level": "Intermediate",

diff --git a/data/skill_graph.json b/data/skill_graph.json
@@ -0,0 +1,13 @@
+{
+  "python": ["flask", "pandas", "requests"],
+  "javascript": ["node.js", "html", "css"],
+  "html": ["css", "javascript"],
+  "css": ["javascript"],
+  "flask": ["javascript", "html", "css"],
+  "pandas": ["requests"],
+  "node.js": ["javascript"],
+  "java": ["spring", "sql"],
+  "requests": [],
+  "spring": [],
+  "sql": []
+}
diff --git a/test_recommender.py b/test_recommender.py
@@ -173,6 +173,39 @@ def section(title):
                    f"overlap: {[p['title'] for p in overlap]}")
     else:
         print("  SKIP  no recommendations returned, skipping overlap check")
+
+
+    # ---------------------------------------------------------------------------
+# Progression (skill graph)
+# ---------------------------------------------------------------------------
+
+section("Skill graph progression")
+
+result_prog = get_recommendations("Python", "Intermediate", "Web", "High")
+
+if "progression" in result_prog:
+    passed("dict has 'progression' key")
+else:
+    failed("dict has 'progression' key", f"keys found: {list(result_prog.keys())}")
+
+prog = result_prog["progression"]
+if isinstance(prog, list):
+    passed(f"progression is a list  ({len(prog)} result(s))")
+else:
+    failed("progression is a list", f"got {type(prog)}")
+
+rec_ids = [p["id"] for p in result_prog["recommendations"]]
+overlap = [p for p in prog if p["project"]["id"] in rec_ids]
+if not overlap:
+    passed("progression projects don't repeat recommended ones")
+else:
+    failed("progression projects don't repeat recommended ones",
+           f"overlap: {[p['title'] for p in overlap]}")
+
+if isinstance(prog, list):
+    passed(f"progression is a list  ({len(prog)} result(s))")
+    for p in prog:
+        print(f"        → {p['project']['title']}  (gap_score: {p['gap_score']})")
 
 # ---------------------------------------------------------------------------
 # Summary

diff --git a/utils/recommender.py b/utils/recommender.py
@@ -110,9 +110,103 @@ def score_single_project(project, user_skills, level, interest, time_availabilit
 
     if project_time == user_time:
         score += SCORING_WEIGHTS["time"]
+
+    graph = _load_skill_graph()
+    score += gap_boost(user_skills, project_skills, graph)
 
     return score
 
+# ---------------------------------------------------------------------------
+# Skill graph helpers
+# ---------------------------------------------------------------------------
+
+def _load_skill_graph():
+    """Load skill_graph.json from data/. Returns empty dict on failure."""
+    path = os.path.join(
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+        "data", "skill_graph.json"
+    )
+    if not os.path.exists(path):
+        return {}
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return {}
+
+
+def _hops_to_skill(target, user_skills, graph, max_hops=3):
+    """
+    BFS from every known user skill — find minimum hops to reach target.
+    Returns None if unreachable within max_hops.
+    """
+    if target in user_skills:
+        return 0
+
+    visited = set(user_skills)
+    frontier = list(user_skills)
+
+    for hop in range(1, max_hops + 1):
+        next_frontier = []
+        for skill in frontier:
+            for neighbour in graph.get(skill, []):
+                if neighbour == target:
+                    return hop
+                if neighbour not in visited:
+                    visited.add(neighbour)
+                    next_frontier.append(neighbour)
+        frontier = next_frontier
+
+    return None
+
+
+def gap_boost(user_skills, project_skills, graph):
+    """
+    For each project skill the user doesn't have,
+    compute boost based on graph distance.
+
+    boost = 1/hops per reachable missing skill
+    Returns total boost score (float).
+    """
+    boost = 0.0
+    for skill in project_skills:
+        if skill not in user_skills:
+            hops = _hops_to_skill(skill, user_skills, graph)
+            if hops and hops > 0:
+                boost += 1.0 / hops
+    return round(boost, 3)
+
+
+def get_progression(user_skills, recommended_ids, all_projects, graph):
+    """
+    Return projects that are 1 hop away from user's current skills
+    but were NOT already recommended.
+    """
+    # Find all 1-hop reachable skills
+    reachable = set()
+    for skill in user_skills:
+        for neighbour in graph.get(skill, []):
+            reachable.add(neighbour)
+
+    progression = []
+    for project in all_projects:
+        if project["id"] in recommended_ids:
+            continue
+        project_skills = [
+            SKILL_ALIASES.get(s.lower(), s.lower())
+            for s in project.get("skills", [])
+        ]
+        # Project skills must overlap with reachable skills
+        if any(s in reachable for s in project_skills):
+            boost = gap_boost(user_skills, project_skills, graph)
+            progression.append({
+                "project": project,
+                "gap_score": boost
+            })
+
+    progression.sort(key=lambda x: x["gap_score"], reverse=True)
+    return progression[:3]
+
 
 # ---------------------------------------------------------------------------
 # Clustering helpers
@@ -187,6 +281,8 @@ def get_recommendations(skills_string, level, interest, time_availability):
     """
     user_skills  = parse_skills(skills_string)
     all_projects = load_all_projects()
+    graph = _load_skill_graph()
+
 
     scored = []
     for project in all_projects:
@@ -202,14 +298,18 @@ def get_recommendations(skills_string, level, interest, time_availability):
 
     cluster_data = _load_clusters()
     related = _get_related(top_ids, all_projects, cluster_data) if cluster_data else []
+    graph = _load_skill_graph()
+    progression = get_progression(user_skills, top_ids, all_projects, graph)
 
     return {
         "recommendations": top_projects,
         "related":         related,
+        "progression":     progression,
     }
 
 
 VALID_LEVELS = ["beginner", "intermediate", "advanced"]
+VALID_INTERESTS = ["data", "web", "backend", "cybersecurity", "games", "education", "automation"]
 VALID_TIME_AVAILABILITY = ["low", "medium", "high"]