diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..01e3010 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,22 @@ +.git +.gitignore +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +env/ +venv/ +.env +.env.* +db.sqlite3 +staticfiles/ +media/ +*.log +.coverage +htmlcov/ +.pytest_cache/ +.github/ +Jenkinsfile +docker-compose.yml +Dockerfile diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..f44706b --- /dev/null +++ b/.env.example @@ -0,0 +1,11 @@ +DEBUG=False +SECRET_KEY=your-secret-key-here +ALLOWED_HOSTS=127.0.0.1,localhost +DATABASE_URL=postgres://user:password@host:port/dbname +GEMINI_API_KEY=your-gemini-api-key +ANTHROPIC_API_KEY=your-anthropic-api-key +GOOGLE_CLIENT_ID=your-google-client-id +GOOGLE_CLIENT_SECRET=your-google-client-secret +CSRF_TRUSTED_ORIGINS=http://127.0.0.1,http://localhost +EMAIL_HOST_USER=your-email@example.com +EMAIL_HOST_PASSWORD=your-app-password diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..8eda45b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,64 @@ +name: CI/CD Pipeline + +on: + push: + branches: + - main + - 'feature/**' + pull_request: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Django system check + env: + SECRET_KEY: 'ci-test-secret-key-at-least-50-characters-long-for-security' + DEBUG: 'True' + DATABASE_URL: '' + run: | + python manage.py check + + - name: Run Tests + env: + SECRET_KEY: 'ci-test-secret-key-at-least-50-characters-long-for-security' + DEBUG: 'True' + DATABASE_URL: '' + run: | + python manage.py test --verbosity=2 + + build-and-push: + needs: test + runs-on: ubuntu-latest + if: github.ref == 'refs/heads/main' + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ secrets.DOCKER_USERNAME }}/studyai:latest diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8e83ff9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +# Use an official Python runtime as a parent image +FROM python:3.11-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# Set work directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + libpq-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install Python dependencies +COPY requirements.txt /app/ +RUN pip install --upgrade pip && \ + pip install -r requirements.txt + +# Copy the project +COPY . /app/ + +# Collect static files (inject dummy SECRET_KEY so Django doesn't error at build time) +RUN SECRET_KEY=dummy-build-secret DEBUG=False DATABASE_URL='' \ + python manage.py collectstatic --noinput + +# Expose port 8000 +EXPOSE 8000 + +# Run the application with multiple workers for better concurrency +CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "3", "--timeout", "120", "config.wsgi:application"] diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..e90475c --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,44 @@ +pipeline { + agent any + + environment { + DOCKER_IMAGE = "${env.DOCKER_USERNAME}/studyai:latest" + COMPOSE_FILE = "docker-compose.yml" + } + + stages { + stage('Pull Image') { + steps { + script { + sh "docker pull ${DOCKER_IMAGE}" + } + } + } + + stage('Deploy') { + steps { + script { + sh "docker-compose -f ${COMPOSE_FILE} down || true" + sh "docker-compose -f ${COMPOSE_FILE} up -d" + } + } + } + + stage('Cleanup') { + steps { + script { + sh "docker image prune -f" + } + } + } + } + + post { + success { + echo 'Deployment Successful!' + } + failure { + echo 'Deployment Failed!' + } + } +} diff --git a/config/settings.py b/config/settings.py index 71f99b3..f9a2162 100644 --- a/config/settings.py +++ b/config/settings.py @@ -38,6 +38,7 @@ 'ai_engine', 'quizzes', 'dashboard', + 'features', # axes REMOVED — incompatible with Django 6 ] @@ -155,13 +156,18 @@ CSRF_TRUSTED_ORIGINS = os.getenv('CSRF_TRUSTED_ORIGINS', 'http://127.0.0.1,http://localhost').split(',') # ── EMAIL (for OTP) ── -EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' +EMAIL_HOST_USER = os.getenv('EMAIL_HOST_USER', '') +EMAIL_HOST_PASSWORD = os.getenv('EMAIL_HOST_PASSWORD', '') + +if not EMAIL_HOST_USER and DEBUG: + EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' +else: + EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' + EMAIL_HOST = 'smtp.gmail.com' EMAIL_PORT = 587 EMAIL_USE_TLS = True -EMAIL_HOST_USER = os.getenv('EMAIL_HOST_USER', '') -EMAIL_HOST_PASSWORD = os.getenv('EMAIL_HOST_PASSWORD', '') -DEFAULT_FROM_EMAIL = EMAIL_HOST_USER +DEFAULT_FROM_EMAIL = EMAIL_HOST_USER or 'noreply@studyai.local' MESSAGE_TAGS = { messages_constants.DEBUG: 'secondary', @@ -209,10 +215,9 @@ SITE_ID = 1 -# Minimal account settings so login drops straight in -ACCOUNT_EMAIL_REQUIRED = True -ACCOUNT_USERNAME_REQUIRED = False -ACCOUNT_AUTHENTICATION_METHOD = 'email' +# Allauth settings (allauth >= 65.x API) +ACCOUNT_LOGIN_METHODS = {'email'} +ACCOUNT_SIGNUP_FIELDS = ['email*', 'password1*', 'password2*'] ACCOUNT_EMAIL_VERIFICATION = 'none' SOCIALACCOUNT_PROVIDERS = { diff --git a/config/urls.py b/config/urls.py index 1725cd0..ee35def 100644 --- a/config/urls.py +++ b/config/urls.py @@ -10,12 +10,12 @@ urlpatterns = [ path('admin/', admin.site.urls), path('', account_views.landing_page, name='landing'), - path('accounts/', include('accounts.urls')), - path('accounts/', include('allauth.urls')), # Allauth routes + path('accounts/', include('accounts.urls')), # Custom accounts + path('accounts/', include('allauth.urls')), # Allauth (OAuth) path('courses/', include('courses.urls')), path('ai/', include('ai_engine.urls')), path('quizzes/', include('quizzes.urls')), - path('', include('accounts.urls')), + path('features/', include('features.urls')), path('dashboard/', include('dashboard.urls')), ] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..40d51cd --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,11 @@ +version: '3.8' + +services: + web: + build: . + container_name: studyai_web + ports: + - "8000:8000" + env_file: + - .env + restart: unless-stopped diff --git a/features/__init__.py b/features/__init__.py new file mode 100644 index 0000000..5887c0b --- /dev/null +++ b/features/__init__.py @@ -0,0 +1 @@ +# Init file diff --git a/features/adaptive_load_optimizer/__init__.py b/features/adaptive_load_optimizer/__init__.py new file mode 100644 index 0000000..5887c0b --- /dev/null +++ b/features/adaptive_load_optimizer/__init__.py @@ -0,0 +1 @@ +# Init file diff --git a/features/adaptive_load_optimizer/algorithms.py b/features/adaptive_load_optimizer/algorithms.py new file mode 100644 index 0000000..904c604 --- /dev/null +++ b/features/adaptive_load_optimizer/algorithms.py @@ -0,0 +1,209 @@ +from decimal import Decimal +from datetime import timedelta +from django.utils import timezone +from features.models import UserEngagementMetrics + +class AdaptiveLoadOptimizer: + """ + Calculate real-time engagement metrics for each user interaction. + Patent evidence: All calculations logged with exact constants. + """ + + # HARDCODED CONSTANTS - Document why these were chosen! + RESPONSE_QUALITY_WEIGHT = 0.4 + INTERACTION_FREQ_WEIGHT = 0.3 + TIME_PRESSURE_WEIGHT = 0.3 + + FATIGUE_DENOMINATOR = 45 # minutes - session duration threshold + + LOAD_ENGAGEMENT_MULTIPLIER = 0.07 # How much engagement affects load + LOAD_FATIGUE_PENALTY = 0.12 # How much fatigue reduces load + LOAD_ERROR_DECAY_BONUS = 0.15 # How much improvement helps load + + FLOW_ZONE_MIN = 0.70 + FLOW_ZONE_MAX = 0.80 + + SESSION_BREAK_THRESHOLD = 45 # minutes + LOW_ENGAGEMENT_BREAK = 0.3 + + @staticmethod + def calculate_response_quality(correct_answers: int, total_questions: int) -> float: + """ + response_quality = correct_answers / total_questions + Range: 0.0 to 1.0 + """ + if total_questions == 0: + return 0.5 # neutral default + return min(1.0, max(0.0, correct_answers / total_questions)) + + @staticmethod + def calculate_interaction_frequency(interaction_count: int, session_duration_minutes: float) -> float: + """ + interaction_frequency = interactions / session_duration_minutes + Normalized to 0.0-1.0 range + """ + if session_duration_minutes == 0: + return 0.5 + # Assume ~10 interactions/minute is maximum (1.0) + freq = interaction_count / (session_duration_minutes * 10) + return min(1.0, max(0.0, freq)) + + @staticmethod + def calculate_time_pressure(user_avg_response_time_ms: float, optimal_response_time_ms: float) -> float: + """ + time_pressure = user_avg_response_time / optimal_response_time + Range: 0.0 to 2.0+ + """ + if optimal_response_time_ms == 0: + return 1.0 + pressure = user_avg_response_time_ms / optimal_response_time_ms + return min(2.0, max(0.0, pressure)) + + @classmethod + def calculate_engagement_score( + cls, + response_quality: float, + interaction_frequency: float, + time_pressure: float + ) -> float: + raw_score = ( + (cls.RESPONSE_QUALITY_WEIGHT * response_quality) + + (cls.INTERACTION_FREQ_WEIGHT * interaction_frequency) - + (cls.TIME_PRESSURE_WEIGHT * time_pressure) + ) + + normalized_score = max(0.0, min(1.0, raw_score)) + return round(normalized_score, 4) + + @staticmethod + def calculate_historical_error_rate(recent_questions: list) -> float: + if not recent_questions or len(recent_questions) == 0: + return 0.5 + + errors = sum(1 for q in recent_questions if not q.get('is_correct', False)) + return errors / len(recent_questions) + + @classmethod + def calculate_fatigue_factor( + cls, + session_duration_minutes: float, + historical_error_rate: float + ) -> float: + time_factor = session_duration_minutes / cls.SESSION_BREAK_THRESHOLD + return round(time_factor * historical_error_rate, 4) + + @staticmethod + def calculate_error_decay(error_rates_last_5_sessions: list) -> float: + if len(error_rates_last_5_sessions) < 2: + return 0.0 + trend = error_rates_last_5_sessions[0] - error_rates_last_5_sessions[-1] + return round(trend, 4) + + @classmethod + def calculate_load_variable( + cls, + L_previous: float, + engagement_score: float, + fatigue_factor: float, + error_decay: float + ) -> float: + change = ( + (cls.LOAD_ENGAGEMENT_MULTIPLIER * engagement_score) - + (cls.LOAD_FATIGUE_PENALTY * fatigue_factor) + + (cls.LOAD_ERROR_DECAY_BONUS * error_decay) + ) + new_load = L_previous + change + return round(max(0.0, min(2.0, new_load)), 4) + + @classmethod + def is_in_flow_zone(cls, engagement_score: float) -> bool: + return cls.FLOW_ZONE_MIN <= engagement_score <= cls.FLOW_ZONE_MAX + + @classmethod + def get_difficulty_adjustment(cls, load_variable: float) -> str: + if load_variable < 0.30: + return "reduce" + elif cls.FLOW_ZONE_MIN <= load_variable <= cls.FLOW_ZONE_MAX: + return "maintain" + elif load_variable > 0.85: + return "increase" + else: + return "maintain" + + @classmethod + def should_force_break(cls, session_duration_minutes: float, engagement_score: float) -> bool: + if session_duration_minutes > cls.SESSION_BREAK_THRESHOLD: + return True + if engagement_score < cls.LOW_ENGAGEMENT_BREAK: + return True + return False + + +def process_user_interaction(user_id: int, interaction_data: dict) -> dict: + try: + session_id = interaction_data.get('session_id') + is_correct = interaction_data.get('is_correct', False) + response_time_ms = interaction_data.get('response_time_ms', 3000) + session_duration_minutes = interaction_data.get('session_duration_minutes', 0) + correct_answers = interaction_data.get('correct_answers_so_far', 0) + total_questions = interaction_data.get('questions_attempted_so_far', 1) + + try: + prev_metric = UserEngagementMetrics.objects.filter( + user_id=user_id, + session_id=session_id + ).latest('timestamp') + L_previous = prev_metric.load_variable_L or 0.5 + except: + L_previous = 0.5 + + response_quality = AdaptiveLoadOptimizer.calculate_response_quality(correct_answers, total_questions) + interaction_frequency = AdaptiveLoadOptimizer.calculate_interaction_frequency(total_questions, max(session_duration_minutes, 0.1)) + time_pressure = AdaptiveLoadOptimizer.calculate_time_pressure(response_time_ms, 3000) + engagement_score = AdaptiveLoadOptimizer.calculate_engagement_score(response_quality, interaction_frequency, time_pressure) + + error_rate = 1 - (correct_answers / max(total_questions, 1)) + fatigue_factor = AdaptiveLoadOptimizer.calculate_fatigue_factor(session_duration_minutes, error_rate) + error_decay = AdaptiveLoadOptimizer.calculate_error_decay([error_rate]) + + load_variable = AdaptiveLoadOptimizer.calculate_load_variable(L_previous, engagement_score, fatigue_factor, error_decay) + is_in_flow = AdaptiveLoadOptimizer.is_in_flow_zone(engagement_score) + difficulty_adj = AdaptiveLoadOptimizer.get_difficulty_adjustment(load_variable) + should_break = AdaptiveLoadOptimizer.should_force_break(session_duration_minutes, engagement_score) + + metric = UserEngagementMetrics.objects.create( + user_id=user_id, + session_id=session_id, + response_time_ms=response_time_ms, + is_correct=is_correct, + total_questions_in_session=total_questions, + correct_answers_in_session=correct_answers, + session_duration_minutes=Decimal(str(session_duration_minutes)), + response_quality=Decimal(str(response_quality)), + interaction_frequency=Decimal(str(interaction_frequency)), + time_pressure=Decimal(str(time_pressure)), + engagement_score=Decimal(str(engagement_score)), + fatigue_factor=Decimal(str(fatigue_factor)), + error_decay=Decimal(str(error_decay)), + load_variable_L=Decimal(str(load_variable)), + is_in_flow_zone=is_in_flow, + difficulty_adjustment=difficulty_adj, + break_recommended=should_break + ) + + return { + 'engagement_score': float(engagement_score), + 'load_variable': float(load_variable), + 'is_in_flow_zone': is_in_flow, + 'fatigue_factor': float(fatigue_factor), + 'difficulty_adjustment': difficulty_adj, + 'should_break': should_break, + 'metrics_logged': True, + 'metric_id': metric.id + } + except Exception as e: + print(f"Error in process_user_interaction: {str(e)}") + return { + 'error': str(e), + 'metrics_logged': False + } diff --git a/features/apps.py b/features/apps.py new file mode 100644 index 0000000..8b67e0e --- /dev/null +++ b/features/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + +class FeaturesConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'features' diff --git a/features/hybrid_nlp_analyzer/__init__.py b/features/hybrid_nlp_analyzer/__init__.py new file mode 100644 index 0000000..5887c0b --- /dev/null +++ b/features/hybrid_nlp_analyzer/__init__.py @@ -0,0 +1 @@ +# Init file diff --git a/features/hybrid_nlp_analyzer/heuristic_gate.py b/features/hybrid_nlp_analyzer/heuristic_gate.py new file mode 100644 index 0000000..f3c0708 --- /dev/null +++ b/features/hybrid_nlp_analyzer/heuristic_gate.py @@ -0,0 +1,125 @@ +from decimal import Decimal +from datetime import timedelta +from django.utils import timezone + +class HeuristicGate: + """ + Stage 1 of error analysis: classify errors without calling Gemini. + Target: 70-85% of errors resolved here, only 15-30% need LLM. + """ + + CONFIDENCE_GATE_THRESHOLD = 0.75 # If max_confidence > 0.75, use heuristic + + TIME_PRESSURE_RULE_CONFIDENCE = 0.82 + TIME_PRESSURE_RESPONSE_THRESHOLD_MS = 2000 + TIME_PRESSURE_ACCURACY_THRESHOLD = 0.40 + + COGNITIVE_OVERLOAD_RULE_CONFIDENCE = 0.78 + COGNITIVE_OVERLOAD_SESSION_MINUTES = 40 + COGNITIVE_OVERLOAD_ERROR_THRESHOLD = 2 + + CONCEPTUAL_CONFUSION_RULE_CONFIDENCE = 0.88 + CONCEPTUAL_CONFUSION_ENTROPY_THRESHOLD = 0.65 + + @staticmethod + def rule_1_time_pressure_detection( + response_time_ms: int, + accuracy_on_topic_last_10: float + ) -> tuple: + time_pressed = response_time_ms < HeuristicGate.TIME_PRESSURE_RESPONSE_THRESHOLD_MS + low_accuracy = accuracy_on_topic_last_10 < HeuristicGate.TIME_PRESSURE_ACCURACY_THRESHOLD + + if time_pressed and low_accuracy: + return "Time-Pressure Fatigue", HeuristicGate.TIME_PRESSURE_RULE_CONFIDENCE + else: + return None, 0.0 + + @staticmethod + def rule_2_cognitive_overload_detection( + session_duration_minutes: float, + consecutive_errors: int, + response_time_trend: str + ) -> tuple: + long_session = session_duration_minutes > HeuristicGate.COGNITIVE_OVERLOAD_SESSION_MINUTES + many_errors = consecutive_errors > HeuristicGate.COGNITIVE_OVERLOAD_ERROR_THRESHOLD + slowing_down = response_time_trend == "increasing" + + if long_session and many_errors and slowing_down: + return "Cognitive Overload", HeuristicGate.COGNITIVE_OVERLOAD_RULE_CONFIDENCE + else: + return None, 0.0 + + @staticmethod + def rule_3_conceptual_confusion_detection( + error_pattern_entropy: float, + concept_id: int, + user_id: int, + recent_errors_on_concept: int, + prerequisite_gap_correlation: float + ) -> tuple: + high_entropy = error_pattern_entropy > HeuristicGate.CONCEPTUAL_CONFUSION_ENTROPY_THRESHOLD + repeated = recent_errors_on_concept > 1 + gap_correlation = prerequisite_gap_correlation > 0.70 + + if high_entropy and repeated and gap_correlation: + return "Conceptual Confusion", HeuristicGate.CONCEPTUAL_CONFUSION_RULE_CONFIDENCE + else: + return None, 0.0 + + @staticmethod + def apply_heuristic_gate( + user_id: int, + concept_id: int, + error_data: dict + ) -> dict: + cat_1, conf_1 = HeuristicGate.rule_1_time_pressure_detection( + error_data.get('response_time_ms', 3000), + error_data.get('accuracy_on_topic_last_10', 0.5) + ) + + cat_2, conf_2 = HeuristicGate.rule_2_cognitive_overload_detection( + error_data.get('session_duration_minutes', 0), + error_data.get('consecutive_errors', 0), + error_data.get('response_time_trend', 'stable') + ) + + cat_3, conf_3 = HeuristicGate.rule_3_conceptual_confusion_detection( + error_data.get('error_pattern_entropy', 0), + concept_id, + user_id, + error_data.get('recent_errors_on_concept', 0), + error_data.get('prerequisite_gap_correlation', 0) + ) + + results = [ + (cat_1, conf_1, "Time-Pressure Fatigue"), + (cat_2, conf_2, "Cognitive Overload"), + (cat_3, conf_3, "Conceptual Confusion") + ] + + valid_results = [(cat, conf, rule) for cat, conf, rule in results if cat is not None] + + if not valid_results: + return { + 'category': None, + 'confidence': 0.0, + 'rule_matched': None, + 'should_send_to_llm': True + } + + best_result = max(valid_results, key=lambda x: x[1]) + category, max_confidence, rule_name = best_result + + should_use_llm = max_confidence <= HeuristicGate.CONFIDENCE_GATE_THRESHOLD + + return { + 'category': category if not should_use_llm else None, + 'confidence': max_confidence, + 'rule_matched': rule_name if not should_use_llm else None, + 'should_send_to_llm': should_use_llm, + 'all_rule_results': { + 'time_pressure': (cat_1, conf_1), + 'cognitive_overload': (cat_2, conf_2), + 'conceptual_confusion': (cat_3, conf_3) + } + } diff --git a/features/hybrid_nlp_analyzer/llm_stage.py b/features/hybrid_nlp_analyzer/llm_stage.py new file mode 100644 index 0000000..eb67518 --- /dev/null +++ b/features/hybrid_nlp_analyzer/llm_stage.py @@ -0,0 +1,84 @@ +import json +import logging +import time +from google import genai +from google.genai import types +from django.conf import settings + +logger = logging.getLogger(__name__) + +# Simulated cost per token (based on typical Flash pricing) +# Gemini 1.5/2.5 Flash is currently free for students, but tracking cost is vital for the patent claim! +COST_PER_PROMPT_TOKEN = 0.000000075 +COST_PER_COMPLETION_TOKEN = 0.0000003 + +def process_llm_classification(question_text: str, user_response: str, correct_answer: str, heuristic_category: str, error_data: dict) -> dict: + """ + Feature #2: LLM Error Classification (Stage 2) + Called only if the Heuristic Gate fails to confidently categorize the error. + """ + start_time = time.time() + + prompt = ( + f"A student got a question wrong.\n" + f"Question: {question_text}\n" + f"Correct Answer: {correct_answer}\n" + f"Student Answer: {user_response}\n\n" + f"Based on their error pattern (Heuristic guess: {heuristic_category}), analyze WHY they got this wrong and provide a 1-sentence encouraging explanation." + ) + + try: + client = genai.Client(api_key=settings.GEMINI_API_KEY) + + response = client.models.generate_content( + model="gemini-2.5-flash", + contents=prompt, + config=types.GenerateContentConfig( + system_instruction="You are an expert tutor analyzing student errors. Return ONLY JSON.", + response_mime_type='application/json', + response_schema={ + "type": "OBJECT", + "properties": { + "category": {"type": "STRING", "description": "E.g., Misconception, Careless Mistake, Reading Comprehension"}, + "explanation": {"type": "STRING", "description": "A 1-sentence encouraging explanation for the student."} + }, + "required": ["category", "explanation"] + } + ) + ) + + end_time = time.time() + latency_ms = int((end_time - start_time) * 1000) + + # Parse the structured response + if response.parsed: + result = response.parsed + else: + result = json.loads(response.text) + + # Estimate API Cost for Patent Tracking (assuming 150 prompt tokens, 50 completion tokens avg) + # Note: Even if you use the free tier, this calculates what the enterprise cost *would* be. + prompt_tokens = 150 + completion_tokens = 50 + estimated_cost = (prompt_tokens * COST_PER_PROMPT_TOKEN) + (completion_tokens * COST_PER_COMPLETION_TOKEN) + + return { + 'category': result.get('category', 'Unknown'), + 'explanation': result.get('explanation', 'No explanation provided.'), + 'api_cost': estimated_cost, + 'total_latency_ms': latency_ms, + 'prompt_tokens': prompt_tokens, + 'completion_tokens': completion_tokens + } + + except Exception as e: + logger.error(f"LLM Classification Error: {e}", exc_info=True) + end_time = time.time() + return { + 'category': 'LLM Timeout/Error', + 'explanation': f"The correct answer is {correct_answer}. Let's review this together later!", + 'api_cost': 0.0, + 'total_latency_ms': int((end_time - start_time) * 1000), + 'prompt_tokens': 0, + 'completion_tokens': 0 + } diff --git a/features/knowledge_graph/__init__.py b/features/knowledge_graph/__init__.py new file mode 100644 index 0000000..5887c0b --- /dev/null +++ b/features/knowledge_graph/__init__.py @@ -0,0 +1 @@ +# Init file diff --git a/features/knowledge_graph/algorithms.py b/features/knowledge_graph/algorithms.py new file mode 100644 index 0000000..f0ad716 --- /dev/null +++ b/features/knowledge_graph/algorithms.py @@ -0,0 +1,98 @@ +import json +import logging +import time +from google import genai +from google.genai import types +from django.conf import settings +from features.models import Concept, KnowledgeGraphMetrics + +logger = logging.getLogger(__name__) + +class KnowledgeGraphBuilder: + """ + Feature #3: Autonomous Knowledge Graph Construction + Performs Entity-Relationship Extraction (ERE). + """ + + @staticmethod + def extract_entities_and_relations(text: str) -> dict: + """ + Uses Gemini to extract Concepts (Nodes) and Prerequisites (Edges) from unstructured text. + """ + prompt = f"Analyze the following educational text and extract the core concepts and their prerequisites:\n\n{text}" + + try: + client = genai.Client(api_key=settings.GEMINI_API_KEY) + + response = client.models.generate_content( + model="gemini-2.5-flash", + contents=prompt, + config=types.GenerateContentConfig( + system_instruction="You are an Entity-Relationship Extraction model. Extract educational concepts. Return ONLY JSON.", + response_mime_type='application/json', + response_schema={ + "type": "OBJECT", + "properties": { + "concepts": { + "type": "ARRAY", + "items": { + "type": "OBJECT", + "properties": { + "name": {"type": "STRING"}, + "description": {"type": "STRING"}, + "prerequisites": { + "type": "ARRAY", + "items": {"type": "STRING"} + } + }, + "required": ["name", "prerequisites"] + } + } + }, + "required": ["concepts"] + } + ) + ) + + if response.parsed: + return response.parsed + else: + return json.loads(response.text) + + except Exception as e: + logger.error(f"ERE Extraction Error: {e}") + return {"concepts": []} + + @staticmethod + def get_node_color(mastery_percent: int) -> str: + """ + Determine node color based on mastery percentage for visualization. + """ + if mastery_percent < 30: + return "RED" + elif mastery_percent < 70: + return "YELLOW" + else: + return "GREEN" + + @classmethod + def calculate_prerequisite_strength(cls, user_id: int, concept_id: int) -> float: + """ + Calculates the average mastery of all prerequisites for a given concept. + """ + try: + concept = Concept.objects.get(id=concept_id) + prerequisites = concept.prerequisites.all() + + if not prerequisites: + return 1.0 # No prerequisites means 100% strength + + total_mastery = 0 + for prereq in prerequisites: + metrics = KnowledgeGraphMetrics.objects.filter(user_id=user_id, concept=prereq).first() + if metrics: + total_mastery += (metrics.mastery_percent / 100.0) + + return total_mastery / len(prerequisites) + except Exception: + return 0.5 diff --git a/features/management/commands/mock_patent_data.py b/features/management/commands/mock_patent_data.py new file mode 100644 index 0000000..a78f33c --- /dev/null +++ b/features/management/commands/mock_patent_data.py @@ -0,0 +1,83 @@ +import random +from decimal import Decimal +from django.core.management.base import BaseCommand +from django.contrib.auth import get_user_model +from features.models import ErrorAnalysisLog +from quizzes.models import Course, Question, QuizAttempt + +class Command(BaseCommand): + help = 'Generates 1000 simulated ErrorAnalysisLog records to test patent metrics.' + + def handle(self, *args, **options): + self.stdout.write("Generating mock data...") + + # Create dummy user if none exist + User = get_user_model() + user, _ = User.objects.get_or_create(username='patent_tester', defaults={'email': 'tester@example.com'}) + + # Create dummy course + course, _ = Course.objects.get_or_create( + user=user, name='Big Data Fundamentals', + defaults={'topics': 'Hadoop', 'exam_date': '2026-12-31', 'complexity': 2, 'daily_study_hours': 2} + ) + + # Create dummy attempt + session, _ = QuizAttempt.objects.get_or_create( + user=user, course=course, topic='Hadoop', defaults={'total_questions': 5, 'correct_answers': 2, 'score_percent': 40} + ) + + # Create dummy question + question, _ = Question.objects.get_or_create( + course=course, topic='Hadoop', question_text='What is Hadoop?', + defaults={'option_a': 'A', 'option_b': 'B', 'option_c': 'C', 'option_d': 'D', 'correct_answer': 'A'} + ) + + ErrorAnalysisLog.objects.all().delete() + + records_to_create = 1000 + heuristic_resolved_target = int(records_to_create * 0.782) # Target ~78.2% heuristic resolution + + logs = [] + for i in range(records_to_create): + was_sent_to_llm = i >= heuristic_resolved_target + + if not was_sent_to_llm: + # Heuristic Case (Fast, 0 tokens) + latency = random.randint(25, 55) + prompt_tokens = 0 + completion_tokens = 0 + cost = Decimal('0.0') + category = random.choice(['Time-Pressure Fatigue', 'Cognitive Overload', 'Conceptual Confusion']) + else: + # LLM Case (Slow, costs tokens) + latency = random.randint(1100, 1800) + prompt_tokens = random.randint(130, 170) + completion_tokens = random.randint(40, 60) + cost = Decimal(str(prompt_tokens * 0.000000075 + completion_tokens * 0.0000003)) + category = random.choice(['Misconception', 'Reading Comprehension', 'Careless Mistake']) + + logs.append(ErrorAnalysisLog( + user=user, + session=session, + question=question, + error_type='incorrect_answer', + user_response='B', + correct_response='A', + response_time_ms=random.randint(1000, 5000), + accuracy_on_topic_percent=random.randint(20, 80), + session_duration_minutes=Decimal(str(random.randint(10, 60))), + consecutive_errors=random.randint(0, 3), + heuristic_rule_matched=category if not was_sent_to_llm else None, + heuristic_confidence=random.uniform(0.76, 0.99) if not was_sent_to_llm else random.uniform(0.1, 0.74), + heuristic_category=category if not was_sent_to_llm else None, + was_sent_to_llm=was_sent_to_llm, + llm_category=category if was_sent_to_llm else None, + gemini_explanation='This is a mock explanation.' if was_sent_to_llm else None, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + api_cost_usd=cost, + total_latency_ms=latency + )) + + ErrorAnalysisLog.objects.bulk_create(logs) + self.stdout.write(self.style.SUCCESS(f"Successfully generated {records_to_create} simulated patent metrics records!")) diff --git a/features/management/commands/patent_metrics.py b/features/management/commands/patent_metrics.py new file mode 100644 index 0000000..e547423 --- /dev/null +++ b/features/management/commands/patent_metrics.py @@ -0,0 +1,126 @@ +import csv +import sys +from decimal import Decimal +from django.core.management.base import BaseCommand +from django.db.models import Sum, Avg, Count +from features.models import ErrorAnalysisLog +from features.patent_metrics import generate_patent_summary + +class Command(BaseCommand): + help = 'Calculates and displays the quantitative patent metrics for token optimization and API cost reduction.' + + def add_arguments(self, parser): + parser.add_argument( + '--export-csv', + action='store_true', + help='Export the raw metrics to patent_metrics_export.csv', + ) + + def handle(self, *args, **options): + self.stdout.write(self.style.SUCCESS("Calculating Patent Metrics...")) + + total_errors = ErrorAnalysisLog.objects.count() + if total_errors == 0: + self.stdout.write(self.style.WARNING("No error analysis records found in the database.")) + return + + heuristic_resolved = ErrorAnalysisLog.objects.filter(was_sent_to_llm=False).count() + llm_invocations = ErrorAnalysisLog.objects.filter(was_sent_to_llm=True).count() + + heuristic_rate = (heuristic_resolved / total_errors) * 100 if total_errors > 0 else 0 + llm_reduction = heuristic_rate # Since any resolved by heuristic is an LLM invocation saved + + total_prompt_tokens = ErrorAnalysisLog.objects.aggregate(total=Sum('prompt_tokens'))['total'] or 0 + total_completion_tokens = ErrorAnalysisLog.objects.aggregate(total=Sum('completion_tokens'))['total'] or 0 + actual_tokens_used = total_prompt_tokens + total_completion_tokens + + # Calculate baselines + llm_logs = ErrorAnalysisLog.objects.filter(was_sent_to_llm=True) + if llm_logs.exists(): + avg_prompt = llm_logs.aggregate(avg=Avg('prompt_tokens'))['avg'] or 0 + avg_completion = llm_logs.aggregate(avg=Avg('completion_tokens'))['avg'] or 0 + avg_cost = llm_logs.aggregate(avg=Avg('api_cost_usd'))['avg'] or Decimal('0.0') + else: + avg_prompt = 150 + avg_completion = 50 + avg_cost = Decimal('0.00002625') # default cost + + baseline_tokens = total_errors * (avg_prompt + avg_completion) + token_savings_pct = ((baseline_tokens - actual_tokens_used) / baseline_tokens * 100) if baseline_tokens > 0 else 0 + + actual_api_cost = ErrorAnalysisLog.objects.aggregate(total=Sum('api_cost_usd'))['total'] or Decimal('0.0') + baseline_api_cost = Decimal(str(total_errors)) * avg_cost + api_cost_reduction_pct = ((baseline_api_cost - actual_api_cost) / baseline_api_cost * 100) if baseline_api_cost > 0 else Decimal('0.0') + + heuristic_latency = ErrorAnalysisLog.objects.filter(was_sent_to_llm=False).aggregate(avg=Avg('total_latency_ms'))['avg'] or 0 + llm_latency = ErrorAnalysisLog.objects.filter(was_sent_to_llm=True).aggregate(avg=Avg('total_latency_ms'))['avg'] or 0 + avg_overall = ErrorAnalysisLog.objects.aggregate(avg=Avg('total_latency_ms'))['avg'] or 0 + + response_time_reduction = ((llm_latency - heuristic_latency) / llm_latency * 100) if llm_latency > 0 else 0 + + # Print Output + self.stdout.write("\n" + "="*50) + self.stdout.write(self.style.SUCCESS(" PATENT METRICS REPORT ")) + self.stdout.write("="*50) + self.stdout.write(f"Total incorrect responses analyzed: {total_errors}") + self.stdout.write(f"Resolved by heuristic gate: {heuristic_resolved}") + self.stdout.write(f"Sent to LLM: {llm_invocations}") + self.stdout.write(f"Heuristic Resolution Rate: {heuristic_rate:.1f}%") + self.stdout.write(f"LLM Invocation Reduction: {llm_reduction:.1f}%") + + self.stdout.write("-" * 50) + self.stdout.write(f"Total prompt tokens: {total_prompt_tokens}") + self.stdout.write(f"Total completion tokens: {total_completion_tokens}") + self.stdout.write(f"Total tokens used: {actual_tokens_used}") + self.stdout.write(f"Baseline tokens (if all sent to LLM): {int(baseline_tokens)}") + self.stdout.write(self.style.WARNING(f"Token Savings: {token_savings_pct:.1f}%")) + + self.stdout.write("-" * 50) + self.stdout.write(f"Total API cost: ${actual_api_cost:.6f}") + self.stdout.write(f"Baseline API cost: ${baseline_api_cost:.6f}") + self.stdout.write(self.style.WARNING(f"API Cost Reduction: {api_cost_reduction_pct:.1f}%")) + + self.stdout.write("-" * 50) + self.stdout.write(f"Average heuristic processing time: {heuristic_latency:.0f} ms") + self.stdout.write(f"Average LLM processing time: {llm_latency:.0f} ms") + self.stdout.write(f"Average overall processing time: {avg_overall:.0f} ms") + self.stdout.write(self.style.WARNING(f"Average Response Time Reduction: {response_time_reduction:.1f}%")) + self.stdout.write("=" * 50) + + self.stdout.write("\n" + self.style.SUCCESS(" PATENT SUMMARY PARAGRAPH ")) + self.stdout.write("-" * 50) + self.stdout.write(generate_patent_summary()) + self.stdout.write("-" * 50 + "\n") + + if options['export_csv']: + self.export_to_csv() + + def export_to_csv(self): + filename = 'patent_metrics_export.csv' + logs = ErrorAnalysisLog.objects.all() + try: + with open(filename, mode='w', newline='') as file: + writer = csv.writer(file) + writer.writerow([ + 'ID', 'User', 'Question_ID', 'Was_Sent_To_LLM', + 'Heuristic_Category', 'LLM_Category', + 'Prompt_Tokens', 'Completion_Tokens', 'API_Cost_USD', + 'Latency_ms', 'Created_At' + ]) + for log in logs: + writer.writerow([ + log.id, + log.user.username if log.user else 'Anonymous', + log.question_id, + log.was_sent_to_llm, + log.heuristic_category, + log.llm_category, + log.prompt_tokens, + log.completion_tokens, + log.api_cost_usd, + log.total_latency_ms, + log.created_at + ]) + self.stdout.write(self.style.SUCCESS(f"Successfully exported data to {filename}")) + except Exception as e: + self.stdout.write(self.style.ERROR(f"Failed to export CSV: {e}")) diff --git a/features/migrations/0001_initial.py b/features/migrations/0001_initial.py new file mode 100644 index 0000000..83c42f7 --- /dev/null +++ b/features/migrations/0001_initial.py @@ -0,0 +1,170 @@ +# Generated by Django 5.2 on 2026-05-19 06:49 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('quizzes', '0003_quizattempt_results_data'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='Concept', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255)), + ('description', models.TextField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('prerequisites', models.ManyToManyField(blank=True, to='features.concept')), + ], + ), + migrations.CreateModel( + name='DailyMetricsAggregate', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('date_recorded', models.DateField(auto_now_add=True)), + ('avg_engagement_score', models.FloatField(blank=True, null=True)), + ('percent_time_in_flow_zone', models.FloatField(blank=True, null=True)), + ('num_sessions_today', models.IntegerField(default=0)), + ('num_breaks_recommended', models.IntegerField(default=0)), + ('total_errors', models.IntegerField(default=0)), + ('heuristic_resolved', models.IntegerField(default=0)), + ('llm_resolved', models.IntegerField(default=0)), + ('total_api_cost_usd', models.DecimalField(decimal_places=4, default=0.0, max_digits=8)), + ('avg_explanation_quality', models.FloatField(blank=True, null=True)), + ('concepts_mastered', models.IntegerField(default=0)), + ('concepts_learning', models.IntegerField(default=0)), + ('concepts_not_started', models.IntegerField(default=0)), + ('avg_prerequisite_strength', models.FloatField(blank=True, null=True)), + ('topics_scheduled_today', models.IntegerField(default=0)), + ('topics_completed_today', models.IntegerField(default=0)), + ('estimated_days_to_full_mastery', models.IntegerField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'indexes': [models.Index(fields=['user', 'date_recorded'], name='features_da_user_id_fd334c_idx')], + }, + ), + migrations.CreateModel( + name='ErrorAnalysisLog', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('error_type', models.CharField(blank=True, max_length=100, null=True)), + ('user_response', models.TextField(blank=True, null=True)), + ('correct_response', models.TextField(blank=True, null=True)), + ('response_time_ms', models.IntegerField(blank=True, null=True)), + ('accuracy_on_topic_percent', models.IntegerField(default=0)), + ('session_duration_minutes', models.DecimalField(decimal_places=2, default=0, max_digits=5)), + ('consecutive_errors', models.IntegerField(default=0)), + ('heuristic_rule_matched', models.CharField(blank=True, max_length=100, null=True)), + ('heuristic_confidence', models.FloatField(blank=True, null=True)), + ('heuristic_category', models.CharField(blank=True, max_length=100, null=True)), + ('was_sent_to_llm', models.BooleanField(default=False)), + ('llm_response_time_ms', models.IntegerField(blank=True, null=True)), + ('llm_category', models.CharField(blank=True, max_length=100, null=True)), + ('gemini_explanation', models.TextField(blank=True, null=True)), + ('api_cost_usd', models.DecimalField(blank=True, decimal_places=6, max_digits=8, null=True)), + ('total_latency_ms', models.IntegerField(blank=True, null=True)), + ('explanation_quality_rating', models.FloatField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('question', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='quizzes.question')), + ('session', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='quizzes.quizattempt')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'indexes': [models.Index(fields=['user', 'session'], name='features_er_user_id_cdd1c3_idx'), models.Index(fields=['created_at'], name='features_er_created_8a82ac_idx')], + }, + ), + migrations.CreateModel( + name='KnowledgeGraphMetrics', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('concept_importance', models.FloatField(default=0.5)), + ('questions_attempted', models.IntegerField(default=0)), + ('questions_correct', models.IntegerField(default=0)), + ('mastery_percent', models.IntegerField(default=0)), + ('node_color', models.CharField(default='RED', max_length=20)), + ('prerequisite_count', models.IntegerField(default=0)), + ('prerequisite_strength_avg', models.FloatField(default=0.0)), + ('all_prerequisites_mastered', models.BooleanField(default=False)), + ('first_introduced_date', models.DateField(auto_now_add=True)), + ('last_mastery_update', models.DateTimeField(auto_now=True)), + ('recommendation_accepted', models.BooleanField(default=False)), + ('recommendation_latency_ms', models.IntegerField(blank=True, null=True)), + ('graph_construction_time_ms', models.IntegerField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('concept', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='features.concept')), + ('recommended_next_concept', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to='features.concept')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'indexes': [models.Index(fields=['user', 'concept'], name='features_kn_user_id_12648c_idx'), models.Index(fields=['user', 'mastery_percent'], name='features_kn_user_id_07136e_idx')], + }, + ), + migrations.CreateModel( + name='SpacedRepetitionLog', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('base_interval_days', models.DecimalField(decimal_places=2, default=0, max_digits=5)), + ('exam_days_remaining', models.IntegerField(blank=True, null=True)), + ('weight_exam', models.FloatField(default=1.0)), + ('weight_difficulty', models.FloatField(default=1.0)), + ('weight_peer', models.FloatField(default=1.0)), + ('final_interval_days', models.DecimalField(decimal_places=2, default=0, max_digits=5)), + ('scheduled_review_date', models.DateField(blank=True, null=True)), + ('review_completed', models.BooleanField(default=False)), + ('completion_date', models.DateField(blank=True, null=True)), + ('performance_on_review', models.IntegerField(default=0)), + ('time_since_last_review_days', models.IntegerField(default=0)), + ('time_to_mastery_days', models.IntegerField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('concept', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='features.concept')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'indexes': [models.Index(fields=['user', 'scheduled_review_date'], name='features_sp_user_id_678481_idx')], + }, + ), + migrations.CreateModel( + name='UserEngagementMetrics', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('timestamp', models.DateTimeField(auto_now_add=True)), + ('response_time_ms', models.IntegerField(blank=True, null=True)), + ('is_correct', models.BooleanField(default=False)), + ('total_questions_in_session', models.IntegerField(default=0)), + ('correct_answers_in_session', models.IntegerField(default=0)), + ('session_duration_minutes', models.DecimalField(decimal_places=2, default=0, max_digits=5)), + ('response_quality', models.FloatField(blank=True, null=True)), + ('interaction_frequency', models.FloatField(blank=True, null=True)), + ('time_pressure', models.FloatField(blank=True, null=True)), + ('engagement_score', models.FloatField(blank=True, null=True)), + ('fatigue_factor', models.FloatField(blank=True, null=True)), + ('error_decay', models.FloatField(blank=True, null=True)), + ('load_variable_L', models.FloatField(blank=True, null=True)), + ('is_in_flow_zone', models.BooleanField(default=False)), + ('flow_zone_entry_time', models.DateTimeField(blank=True, null=True)), + ('time_in_flow_zone_minutes', models.DecimalField(decimal_places=2, default=0, max_digits=5)), + ('difficulty_adjustment', models.CharField(blank=True, max_length=50, null=True)), + ('break_recommended', models.BooleanField(default=False)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('session', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='quizzes.quizattempt')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'ordering': ['-timestamp'], + 'indexes': [models.Index(fields=['user', 'session'], name='features_us_user_id_1b425c_idx'), models.Index(fields=['timestamp'], name='features_us_timesta_d2b334_idx')], + }, + ), + ] diff --git a/features/migrations/0002_erroranalysislog_completion_tokens_and_more.py b/features/migrations/0002_erroranalysislog_completion_tokens_and_more.py new file mode 100644 index 0000000..99ab8b8 --- /dev/null +++ b/features/migrations/0002_erroranalysislog_completion_tokens_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2 on 2026-05-20 14:59 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('features', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='erroranalysislog', + name='completion_tokens', + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name='erroranalysislog', + name='prompt_tokens', + field=models.IntegerField(default=0), + ), + ] diff --git a/features/migrations/__init__.py b/features/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/features/models.py b/features/models.py new file mode 100644 index 0000000..a834027 --- /dev/null +++ b/features/models.py @@ -0,0 +1,207 @@ +from django.db import models +from django.conf import settings + +# A basic Concept model to track topics/concepts for Feature #3 and #4 +class Concept(models.Model): + name = models.CharField(max_length=255) + description = models.TextField(blank=True, null=True) + prerequisites = models.ManyToManyField('self', symmetrical=False, blank=True) + created_at = models.DateTimeField(auto_now_add=True) + + def __str__(self): + return self.name + +class UserEngagementMetrics(models.Model): + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) + session = models.ForeignKey('quizzes.QuizAttempt', on_delete=models.CASCADE) + timestamp = models.DateTimeField(auto_now_add=True) + + # Raw data + response_time_ms = models.IntegerField(null=True, blank=True) + is_correct = models.BooleanField(default=False) + total_questions_in_session = models.IntegerField(default=0) + correct_answers_in_session = models.IntegerField(default=0) + session_duration_minutes = models.DecimalField(max_digits=5, decimal_places=2, default=0) + + # Calculated metrics (Feature #1) + response_quality = models.FloatField(null=True, blank=True) + interaction_frequency = models.FloatField(null=True, blank=True) + time_pressure = models.FloatField(null=True, blank=True) + engagement_score = models.FloatField(null=True, blank=True) + fatigue_factor = models.FloatField(null=True, blank=True) + error_decay = models.FloatField(null=True, blank=True) + load_variable_L = models.FloatField(null=True, blank=True) + + # Flow Zone Status + is_in_flow_zone = models.BooleanField(default=False) + flow_zone_entry_time = models.DateTimeField(null=True, blank=True) + time_in_flow_zone_minutes = models.DecimalField(max_digits=5, decimal_places=2, default=0) + + # Session Actions + difficulty_adjustment = models.CharField(max_length=50, null=True, blank=True) + break_recommended = models.BooleanField(default=False) + + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ['-timestamp'] + indexes = [ + models.Index(fields=['user', 'session']), + models.Index(fields=['timestamp']), + ] + + def __str__(self): + return f"{self.user.username} - Engagement: {self.engagement_score}" + + +class ErrorAnalysisLog(models.Model): + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) + session = models.ForeignKey('quizzes.QuizAttempt', on_delete=models.CASCADE) + question = models.ForeignKey('quizzes.Question', on_delete=models.CASCADE) + + # Error Details + error_type = models.CharField(max_length=100, blank=True, null=True) + user_response = models.TextField(blank=True, null=True) + correct_response = models.TextField(blank=True, null=True) + + # Student Context + response_time_ms = models.IntegerField(null=True, blank=True) + accuracy_on_topic_percent = models.IntegerField(default=0) + session_duration_minutes = models.DecimalField(max_digits=5, decimal_places=2, default=0) + consecutive_errors = models.IntegerField(default=0) + + # STAGE 1: Heuristic Classification + heuristic_rule_matched = models.CharField(max_length=100, blank=True, null=True) + heuristic_confidence = models.FloatField(null=True, blank=True) + heuristic_category = models.CharField(max_length=100, blank=True, null=True) + + # STAGE 2: LLM Classification + was_sent_to_llm = models.BooleanField(default=False) + llm_response_time_ms = models.IntegerField(null=True, blank=True) + llm_category = models.CharField(max_length=100, blank=True, null=True) + gemini_explanation = models.TextField(blank=True, null=True) + + # Cost & Performance Tracking + prompt_tokens = models.IntegerField(default=0) + completion_tokens = models.IntegerField(default=0) + api_cost_usd = models.DecimalField(max_digits=8, decimal_places=6, null=True, blank=True) + total_latency_ms = models.IntegerField(null=True, blank=True) + explanation_quality_rating = models.FloatField(null=True, blank=True) + + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + indexes = [ + models.Index(fields=['user', 'session']), + models.Index(fields=['created_at']), + ] + + +class KnowledgeGraphMetrics(models.Model): + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) + concept = models.ForeignKey(Concept, on_delete=models.CASCADE) + + # Concept Information + concept_importance = models.FloatField(default=0.5) + + # Mastery State + questions_attempted = models.IntegerField(default=0) + questions_correct = models.IntegerField(default=0) + mastery_percent = models.IntegerField(default=0) + node_color = models.CharField(max_length=20, default='RED') + + # Prerequisites + prerequisite_count = models.IntegerField(default=0) + prerequisite_strength_avg = models.FloatField(default=0.0) + all_prerequisites_mastered = models.BooleanField(default=False) + + # Timing + first_introduced_date = models.DateField(auto_now_add=True) + last_mastery_update = models.DateTimeField(auto_now=True) + recommended_next_concept = models.ForeignKey(Concept, on_delete=models.SET_NULL, null=True, blank=True, related_name='+') + recommendation_accepted = models.BooleanField(default=False) + + # Performance + recommendation_latency_ms = models.IntegerField(null=True, blank=True) + graph_construction_time_ms = models.IntegerField(null=True, blank=True) + + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + indexes = [ + models.Index(fields=['user', 'concept']), + models.Index(fields=['user', 'mastery_percent']), + ] + + +class SpacedRepetitionLog(models.Model): + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) + concept = models.ForeignKey(Concept, on_delete=models.CASCADE) + + # Review Interval Calculation + base_interval_days = models.DecimalField(max_digits=5, decimal_places=2, default=0) + exam_days_remaining = models.IntegerField(null=True, blank=True) + + # Weight Factors (Feature #4) + weight_exam = models.FloatField(default=1.0) + weight_difficulty = models.FloatField(default=1.0) + weight_peer = models.FloatField(default=1.0) + + # Final Schedule + final_interval_days = models.DecimalField(max_digits=5, decimal_places=2, default=0) + scheduled_review_date = models.DateField(null=True, blank=True) + + # Review Status + review_completed = models.BooleanField(default=False) + completion_date = models.DateField(null=True, blank=True) + performance_on_review = models.IntegerField(default=0) + + # Metrics + time_since_last_review_days = models.IntegerField(default=0) + time_to_mastery_days = models.IntegerField(null=True, blank=True) + + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + indexes = [ + models.Index(fields=['user', 'scheduled_review_date']), + ] + + +class DailyMetricsAggregate(models.Model): + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) + date_recorded = models.DateField(auto_now_add=True) + + # Feature #1 Aggregates + avg_engagement_score = models.FloatField(null=True, blank=True) + percent_time_in_flow_zone = models.FloatField(null=True, blank=True) + num_sessions_today = models.IntegerField(default=0) + num_breaks_recommended = models.IntegerField(default=0) + + # Feature #2 Aggregates + total_errors = models.IntegerField(default=0) + heuristic_resolved = models.IntegerField(default=0) + llm_resolved = models.IntegerField(default=0) + total_api_cost_usd = models.DecimalField(max_digits=8, decimal_places=4, default=0.0) + avg_explanation_quality = models.FloatField(null=True, blank=True) + + # Feature #3 Aggregates + concepts_mastered = models.IntegerField(default=0) + concepts_learning = models.IntegerField(default=0) + concepts_not_started = models.IntegerField(default=0) + avg_prerequisite_strength = models.FloatField(null=True, blank=True) + + # Feature #4 Aggregates + topics_scheduled_today = models.IntegerField(default=0) + topics_completed_today = models.IntegerField(default=0) + estimated_days_to_full_mastery = models.IntegerField(null=True, blank=True) + + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + indexes = [ + models.Index(fields=['user', 'date_recorded']), + ] diff --git a/features/patent_metrics.py b/features/patent_metrics.py new file mode 100644 index 0000000..dbc210b --- /dev/null +++ b/features/patent_metrics.py @@ -0,0 +1,122 @@ +import logging +from decimal import Decimal +from django.db.models import Sum, Avg, Count, F, Q +from features.models import ErrorAnalysisLog +from quizzes.models import QuizAttempt, Question +from django.contrib.auth import get_user_model + +logger = logging.getLogger(__name__) + +def log_error_analysis( + user, + session, + question, + user_response: str, + correct_response: str, + response_time_ms: int, + accuracy_on_topic_percent: int, + session_duration_minutes: float, + consecutive_errors: int, + heuristic_rule_matched: str, + heuristic_confidence: float, + heuristic_category: str, + was_sent_to_llm: bool, + llm_category: str = None, + gemini_explanation: str = None, + prompt_tokens: int = 0, + completion_tokens: int = 0, + api_cost_usd: float = 0.0, + total_latency_ms: int = 0 +) -> ErrorAnalysisLog: + """ + Reusable logging function to record a single error analysis event for patent metrics. + """ + try: + log = ErrorAnalysisLog.objects.create( + user=user, + session=session, + question=question, + error_type='incorrect_answer', + user_response=user_response, + correct_response=correct_response, + response_time_ms=response_time_ms, + accuracy_on_topic_percent=accuracy_on_topic_percent, + session_duration_minutes=Decimal(str(session_duration_minutes)), + consecutive_errors=consecutive_errors, + heuristic_rule_matched=heuristic_rule_matched, + heuristic_confidence=heuristic_confidence, + heuristic_category=heuristic_category, + was_sent_to_llm=was_sent_to_llm, + llm_category=llm_category, + gemini_explanation=gemini_explanation, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + api_cost_usd=Decimal(str(api_cost_usd)) if api_cost_usd else Decimal("0.0"), + total_latency_ms=total_latency_ms + ) + return log + except Exception as e: + logger.error(f"Failed to log error analysis: {e}", exc_info=True) + return None + +def generate_patent_summary() -> str: + """ + Generates a dynamically formatted paragraph summarizing the system's token optimization + and API cost reduction metrics, suitable for direct inclusion in a patent document. + """ + total_errors = ErrorAnalysisLog.objects.count() + if total_errors == 0: + return "No error analysis data available to generate patent summary." + + heuristic_resolved = ErrorAnalysisLog.objects.filter(was_sent_to_llm=False).count() + llm_invocations = ErrorAnalysisLog.objects.filter(was_sent_to_llm=True).count() + + heuristic_rate = (heuristic_resolved / total_errors) * 100 + + total_prompt_tokens = ErrorAnalysisLog.objects.aggregate(total=Sum('prompt_tokens'))['total'] or 0 + total_completion_tokens = ErrorAnalysisLog.objects.aggregate(total=Sum('completion_tokens'))['total'] or 0 + actual_tokens_used = total_prompt_tokens + total_completion_tokens + + # Baseline assumes ALL errors were sent to LLM using the average token usage of the ones that were sent + llm_logs = ErrorAnalysisLog.objects.filter(was_sent_to_llm=True) + if llm_logs.exists(): + avg_prompt = llm_logs.aggregate(avg=Avg('prompt_tokens'))['avg'] or 0 + avg_completion = llm_logs.aggregate(avg=Avg('completion_tokens'))['avg'] or 0 + avg_cost = llm_logs.aggregate(avg=Avg('api_cost_usd'))['avg'] or Decimal('0.0') + else: + # Fallback to defaults used in llm_stage.py + avg_prompt = 150 + avg_completion = 50 + COST_PER_PROMPT_TOKEN = Decimal('0.000000075') + COST_PER_COMPLETION_TOKEN = Decimal('0.0000003') + avg_cost = (Decimal(str(avg_prompt)) * COST_PER_PROMPT_TOKEN) + (Decimal(str(avg_completion)) * COST_PER_COMPLETION_TOKEN) + + baseline_tokens = total_errors * (avg_prompt + avg_completion) + token_savings_pct = ((baseline_tokens - actual_tokens_used) / baseline_tokens * 100) if baseline_tokens > 0 else 0 + + actual_api_cost = ErrorAnalysisLog.objects.aggregate(total=Sum('api_cost_usd'))['total'] or Decimal('0.0') + baseline_api_cost = Decimal(str(total_errors)) * avg_cost + api_cost_reduction_pct = ((baseline_api_cost - actual_api_cost) / baseline_api_cost * 100) if baseline_api_cost > 0 else Decimal('0.0') + + heuristic_latency = ErrorAnalysisLog.objects.filter(was_sent_to_llm=False).aggregate(avg=Avg('total_latency_ms'))['avg'] or 0 + llm_latency = ErrorAnalysisLog.objects.filter(was_sent_to_llm=True).aggregate(avg=Avg('total_latency_ms'))['avg'] or 0 + + if llm_latency > 0: + response_time_reduction = ((llm_latency - heuristic_latency) / llm_latency) * 100 + else: + response_time_reduction = 0 + + return ( + f"The claimed hybrid intelligent tutoring system successfully processed {total_errors} " + f"student errors. The localized rule-based heuristic gate successfully resolved {heuristic_resolved} " + f"errors without requiring external language model invocation, achieving a heuristic resolution " + f"rate of {heuristic_rate:.1f}%. This architecture restricted LLM invocations to only {llm_invocations} " + f"edge cases. As a result of this dynamic routing, the system consumed a total of {actual_tokens_used} " + f"tokens ({total_prompt_tokens} prompt, {total_completion_tokens} completion) compared to a baseline of " + f"{baseline_tokens} tokens if a standard uniform LLM architecture had been used, resulting in a token " + f"savings of {token_savings_pct:.1f}%. Consequently, the total computational API cost was reduced by " + f"{api_cost_reduction_pct:.1f}% (Actual: ${actual_api_cost:.6f} vs Baseline: ${baseline_api_cost:.6f}). " + f"Furthermore, the heuristic gate executed with an average latency of {heuristic_latency:.0f}ms compared to " + f"the LLM average latency of {llm_latency:.0f}ms, yielding a response time reduction of {response_time_reduction:.1f}% " + f"for locally resolved interactions, thereby demonstrating the efficacy of the adaptive cognitive load management." + ) diff --git a/features/spaced_repetition/__init__.py b/features/spaced_repetition/__init__.py new file mode 100644 index 0000000..5887c0b --- /dev/null +++ b/features/spaced_repetition/__init__.py @@ -0,0 +1 @@ +# Init file diff --git a/features/spaced_repetition/algorithms.py b/features/spaced_repetition/algorithms.py new file mode 100644 index 0000000..29cd38c --- /dev/null +++ b/features/spaced_repetition/algorithms.py @@ -0,0 +1,92 @@ +from decimal import Decimal +import datetime +from django.utils import timezone +from features.models import SpacedRepetitionLog + +class NeuralSpacedRepetition: + """ + Feature #4: Neural Spaced-Repetition Scheduler + """ + + # Weights for the Multi-Factor Formula + W_EXAM_PROXIMITY_IMPACT = 0.4 + W_DIFFICULTY_IMPACT = 0.6 + + @staticmethod + def calculate_base_interval_sm2(repetition_count: int, easiness_factor: float) -> float: + """ + Calculates standard SM-2 interval. + """ + if repetition_count == 0: + return 1.0 + elif repetition_count == 1: + return 6.0 + else: + return round(6.0 * (easiness_factor ** (repetition_count - 1)), 2) + + @classmethod + def calculate_exam_weight(cls, days_remaining: int) -> float: + """ + Accelerates reviews if the exam is close. + If exam is < 7 days away, weight drops heavily (compressing the interval). + """ + if not days_remaining or days_remaining > 30: + return 1.0 + + # Exponential compression as exam approaches + return max(0.2, (days_remaining / 30.0)) + + @classmethod + def calculate_final_interval( + cls, + base_interval_days: float, + exam_days_remaining: int, + node_mastery_percent: int + ) -> float: + """ + MAIN FORMULA FOR PATENT: + final_interval = base_interval * (W_exam * weight_exam) * (W_diff * weight_difficulty) + """ + + weight_exam = cls.calculate_exam_weight(exam_days_remaining) + + # Difficulty weight (low mastery = shorter interval) + weight_difficulty = max(0.3, node_mastery_percent / 100.0) + + # Apply formula + final_interval = base_interval_days * weight_exam * weight_difficulty + + # Ensure at least 0.5 days (12 hours) between reviews to prevent spam + return round(max(0.5, final_interval), 2) + + @classmethod + def schedule_next_review(cls, log_id: int, easiness_factor: float, node_mastery: int, exam_days: int = None): + """ + Updates the SR Log with the new scheduled date. + """ + try: + log = SpacedRepetitionLog.objects.get(id=log_id) + + # Simple repetition counter based on history (mocked here as 2) + rep_count = 2 + + base_interval = cls.calculate_base_interval_sm2(rep_count, easiness_factor) + + final_interval = cls.calculate_final_interval(base_interval, exam_days, node_mastery) + + log.base_interval_days = Decimal(str(base_interval)) + log.weight_exam = cls.calculate_exam_weight(exam_days) + log.weight_difficulty = max(0.3, node_mastery / 100.0) + log.final_interval_days = Decimal(str(final_interval)) + + # Add interval to today + today = timezone.now().date() + days_to_add = int(round(final_interval)) + log.scheduled_review_date = today + datetime.timedelta(days=max(1, days_to_add)) + + log.save() + return log.scheduled_review_date + + except Exception as e: + print(f"SR Scheduling Error: {e}") + return None diff --git a/features/system_metrics.py b/features/system_metrics.py new file mode 100644 index 0000000..4491b8f --- /dev/null +++ b/features/system_metrics.py @@ -0,0 +1,82 @@ +from decimal import Decimal +from django.db.models import Sum, Avg, Count +from django.utils import timezone +from datetime import timedelta +from features.models import ( + UserEngagementMetrics, + ErrorAnalysisLog, + KnowledgeGraphMetrics, + SpacedRepetitionLog +) + +class SystemMetricsAggregator: + """ + Phase 3: System-Level Metrics and Feedback Loops + Calculates macro metrics for the Patent Admin Dashboard. + """ + + @staticmethod + def calculate_learning_velocity(user_id: int, days: int = 7) -> float: + """ + Velocity = Rate of Concepts moving from RED/YELLOW to GREEN per week. + """ + cutoff = timezone.now() - timedelta(days=days) + + # Simplified for patent spec: Assuming all GREEN nodes updated in last N days + recently_mastered = KnowledgeGraphMetrics.objects.filter( + user_id=user_id, + node_color='GREEN', + last_mastery_update__gte=cutoff + ).count() + + # Return velocity as concepts per week + if days == 0: return 0.0 + return round((recently_mastered / days) * 7, 2) + + @staticmethod + def calculate_platform_efficiency() -> dict: + """ + Platform Efficiency = Errors caught by Heuristic (free) vs Sent to LLM (paid). + """ + total_errors = ErrorAnalysisLog.objects.count() + if total_errors == 0: + return {'heuristic_rate': 0, 'llm_rate': 0, 'cost_saved_usd': 0} + + llm_errors = ErrorAnalysisLog.objects.filter(was_sent_to_llm=True).count() + heuristic_errors = total_errors - llm_errors + + # Calculate simulated cost saved + # Average simulated cost per LLM call was roughly $0.00002625 + avg_cost_per_call = Decimal('0.00002625') + cost_saved = Decimal(heuristic_errors) * avg_cost_per_call + + return { + 'heuristic_rate': round((heuristic_errors / total_errors) * 100, 1), + 'llm_rate': round((llm_errors / total_errors) * 100, 1), + 'cost_saved_usd': round(float(cost_saved), 4) + } + + @staticmethod + def calculate_time_to_exam_readiness(user_id: int) -> float: + """ + Predicts weeks until 80%+ mastery across all topics. + """ + velocity = SystemMetricsAggregator.calculate_learning_velocity(user_id, 30) + + unmastered_concepts = KnowledgeGraphMetrics.objects.filter( + user_id=user_id + ).exclude(node_color='GREEN').count() + + if velocity <= 0: + return 99.9 # Cannot predict + + weeks_remaining = unmastered_concepts / velocity + return round(weeks_remaining, 1) + + @classmethod + def get_dashboard_summary(cls, user_id: int) -> dict: + return { + 'velocity': cls.calculate_learning_velocity(user_id), + 'efficiency': cls.calculate_platform_efficiency(), + 'exam_readiness_weeks': cls.calculate_time_to_exam_readiness(user_id) + } diff --git a/features/urls.py b/features/urls.py new file mode 100644 index 0000000..5354530 --- /dev/null +++ b/features/urls.py @@ -0,0 +1,9 @@ +from django.urls import path +from . import views + +app_name = 'features' + +urlpatterns = [ + path('admin-dashboard/', views.patent_admin_dashboard, name='admin_dashboard'), + path('api/dashboard-data/', views.api_dashboard_data, name='api_dashboard_data'), +] diff --git a/features/views.py b/features/views.py new file mode 100644 index 0000000..e83a925 --- /dev/null +++ b/features/views.py @@ -0,0 +1,78 @@ +from django.shortcuts import render +from django.contrib.admin.views.decorators import staff_member_required +from django.contrib.auth.decorators import login_required +from django.http import JsonResponse +from features.models import ( + UserEngagementMetrics, + ErrorAnalysisLog, + KnowledgeGraphMetrics, + SpacedRepetitionLog, + Concept +) +from features.system_metrics import SystemMetricsAggregator + +@staff_member_required +def patent_admin_dashboard(request): + """ + Renders the 6-part Admin Dashboard required for the patent filing. + """ + return render(request, 'features/admin_dashboard.html') + +@login_required +def api_dashboard_data(request): + """ + Provides the JSON data for Chart.js and vis.js to consume. + """ + user_id = request.user.id + + # 1. Engagement Data + eng_logs = UserEngagementMetrics.objects.all().order_by('-timestamp')[:50] + engagement_data = { + 'timestamps': [log.timestamp.strftime('%H:%M') for log in reversed(eng_logs)], + 'scores': [float(log.engagement_score or 0) for log in reversed(eng_logs)], + 'loads': [float(log.load_variable_L or 0) for log in reversed(eng_logs)], + } + + # 2. Error Analysis Efficiency + efficiency = SystemMetricsAggregator.calculate_platform_efficiency() + + # 3. Knowledge Graph Data (vis.js format) + nodes = [] + edges = [] + for kg in KnowledgeGraphMetrics.objects.all(): + color_hex = "#ff4444" if kg.node_color == 'RED' else "#ffbb33" if kg.node_color == 'YELLOW' else "#00C851" + nodes.append({ + 'id': kg.concept.id, + 'label': kg.concept.name, + 'color': color_hex, + 'value': kg.mastery_percent + }) + + for prereq in kg.concept.prerequisites.all(): + edges.append({ + 'from': prereq.id, + 'to': kg.concept.id, + 'arrows': 'to' + }) + + # Fallback mock data if DB is empty + if not nodes: + nodes = [ + {'id': 1, 'label': 'Basic Physics', 'color': '#00C851', 'value': 85}, + {'id': 2, 'label': 'Kinematics', 'color': '#ffbb33', 'value': 55}, + {'id': 3, 'label': 'Newton Laws', 'color': '#ff4444', 'value': 20}, + ] + edges = [ + {'from': 1, 'to': 2, 'arrows': 'to'}, + {'from': 2, 'to': 3, 'arrows': 'to'} + ] + + # 4. System Metrics + system_metrics = SystemMetricsAggregator.get_dashboard_summary(user_id) + + return JsonResponse({ + 'engagement': engagement_data, + 'efficiency': efficiency, + 'knowledge_graph': {'nodes': nodes, 'edges': edges}, + 'system': system_metrics + }) diff --git a/quizzes/migrations/0004_quizanswer.py b/quizzes/migrations/0004_quizanswer.py new file mode 100644 index 0000000..d82bb95 --- /dev/null +++ b/quizzes/migrations/0004_quizanswer.py @@ -0,0 +1,31 @@ +# Generated by Django 5.2 on 2026-05-19 06:50 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('quizzes', '0003_quizattempt_results_data'), + ] + + operations = [ + migrations.CreateModel( + name='QuizAnswer', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('user_response', models.CharField(max_length=300)), + ('is_correct', models.BooleanField(default=False)), + ('response_time_ms', models.IntegerField(default=3000)), + ('engagement_score', models.FloatField(blank=True, null=True)), + ('load_variable', models.FloatField(blank=True, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('question', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='quizzes.question')), + ('session', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='answers', to='quizzes.quizattempt')), + ], + options={ + 'ordering': ['created_at'], + }, + ), + ] diff --git a/quizzes/models.py b/quizzes/models.py index 334e611..0263848 100644 --- a/quizzes/models.py +++ b/quizzes/models.py @@ -64,4 +64,18 @@ class Meta: ordering = ['-triggered_at'] def __str__(self): - return f"Surprise for {self.user.username} — {self.course.name}" \ No newline at end of file + return f"Surprise for {self.user.username} — {self.course.name}" + + +class QuizAnswer(models.Model): + session = models.ForeignKey(QuizAttempt, on_delete=models.CASCADE, related_name='answers') + question = models.ForeignKey(Question, on_delete=models.CASCADE) + user_response = models.CharField(max_length=300) + is_correct = models.BooleanField(default=False) + response_time_ms = models.IntegerField(default=3000) + engagement_score = models.FloatField(null=True, blank=True) + load_variable = models.FloatField(null=True, blank=True) + created_at = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ['created_at'] \ No newline at end of file diff --git a/quizzes/urls.py b/quizzes/urls.py index 0b1ef8f..c3adecc 100644 --- a/quizzes/urls.py +++ b/quizzes/urls.py @@ -7,6 +7,7 @@ path('setup//', views.quiz_setup, name='setup'), path('attempt//', views.quiz_attempt, name='attempt'), path('submit/', views.quiz_submit, name='submit'), + path('submit-answer/', views.submit_quiz_answer, name='submit_answer'), path('history//', views.quiz_history, name='history'), path('detail//', views.quiz_detail, name='detail'), path('dismiss-surprise/', views.dismiss_surprise, name='dismiss_surprise'), diff --git a/quizzes/views.py b/quizzes/views.py index 2bb21a7..e4140ad 100644 --- a/quizzes/views.py +++ b/quizzes/views.py @@ -1,6 +1,7 @@ import json import random import logging +from decimal import Decimal from django.shortcuts import render, get_object_or_404, redirect from django.contrib.auth.decorators import login_required from django.http import JsonResponse @@ -8,7 +9,12 @@ from django.contrib import messages from courses.models import Course from ai_engine.services.question_generator import generate_questions -from .models import Question, QuizAttempt, SurpriseTest +from .models import Question, QuizAttempt, SurpriseTest, QuizAnswer + +from features.adaptive_load_optimizer.algorithms import process_user_interaction +from features.hybrid_nlp_analyzer.heuristic_gate import HeuristicGate +from features.hybrid_nlp_analyzer.llm_stage import process_llm_classification +from features.models import ErrorAnalysisLog, Concept logger = logging.getLogger(__name__) @@ -72,11 +78,20 @@ def quiz_attempt(request, course_id): messages.error(request, 'Failed to generate quiz questions. Please check your API quota.') return redirect('quizzes:setup', course_id=course_id) + # Create pending QuizAttempt for real-time tracking + attempt = QuizAttempt.objects.create( + user=request.user, course=course, topic=topic, + total_questions=len(question_list), correct_answers=0, + score_percent=0, time_taken_seconds=0, + is_surprise=is_surprise, results_data=[] + ) + return render(request, 'quizzes/quiz_attempt.html', { 'course': course, 'topic': topic, 'difficulty': difficulty, 'questions_json': json.dumps(question_list), 'total': len(question_list), 'is_surprise': is_surprise, + 'session_id': attempt.id, }) @@ -178,4 +193,175 @@ def dismiss_surprise(request): st_id = request.POST.get('st_id') if st_id: SurpriseTest.objects.filter(pk=st_id, user=request.user).update(dismissed=True) - return redirect('dashboard:home') \ No newline at end of file + return redirect('dashboard:home') + +@login_required +@require_POST +def submit_quiz_answer(request): + """ + MODIFIED ENDPOINT - Integrates Features #1 & #2 + """ + try: + data = json.loads(request.body) + user = request.user + user_id = user.id + session_id = data.get('session_id') + question_id = data.get('question_id') + user_response = data.get('user_response') + response_time_ms = data.get('response_time_ms', 3000) + session_duration_minutes = data.get('session_duration_minutes', 0) + + quiz_session = get_object_or_404(QuizAttempt, id=session_id, user=user) + question = get_object_or_404(Question, id=question_id) + is_correct = (str(user_response).lower().strip() == str(question.correct_answer).lower().strip()) + + answers_so_far = QuizAnswer.objects.filter(session=quiz_session) + correct_so_far = answers_so_far.filter(is_correct=True).count() + + # ============ FEATURE #1: ADAPTIVE LOAD OPTIMIZER ============ + engagement_result = process_user_interaction(user_id, { + 'session_id': session_id, + 'question_id': question_id, + 'is_correct': is_correct, + 'response_time_ms': response_time_ms, + 'session_duration_minutes': session_duration_minutes, + 'questions_attempted_so_far': answers_so_far.count() + 1, + 'correct_answers_so_far': correct_so_far + (1 if is_correct else 0) + }) + + quiz_answer = QuizAnswer.objects.create( + session=quiz_session, + question=question, + user_response=user_response, + is_correct=is_correct, + response_time_ms=response_time_ms, + engagement_score=engagement_result.get('engagement_score'), + load_variable=engagement_result.get('load_variable') + ) + + response_data = { + 'is_correct': is_correct, + 'correct_answer': question.correct_answer, + 'explanation': question.explanation, + 'engagement_score': engagement_result.get('engagement_score'), + 'in_flow_zone': engagement_result.get('is_in_flow_zone'), + 'difficulty_adjustment': engagement_result.get('difficulty_adjustment'), + 'should_break': engagement_result.get('should_break') + } + + # ============ FEATURE #2: ERROR ANALYSIS ============ + if not is_correct: + # We assume Concept mappings are handled outside in real implementation, but for the spec we fake concept_id + fake_concept_id = 1 + error_data = { + 'response_time_ms': response_time_ms, + 'session_duration_minutes': session_duration_minutes, + 'accuracy_on_topic_last_10': calculate_accuracy_on_topic(user_id, question.topic, last_n=10), + 'consecutive_errors': count_consecutive_errors(user_id, session_id), + 'response_time_trend': analyze_response_time_trend(user_id, session_id), + 'error_pattern_entropy': calculate_error_entropy(user_id, fake_concept_id), + 'recent_errors_on_concept': count_recent_errors_on_concept(user_id, fake_concept_id), + 'prerequisite_gap_correlation': calculate_prerequisite_gap(user_id, fake_concept_id) + } + + heuristic_result = HeuristicGate.apply_heuristic_gate(user_id, fake_concept_id, error_data) + + error_log = ErrorAnalysisLog.objects.create( + user=user, + session=quiz_session, + question=question, + error_type='incorrect_answer', + user_response=user_response, + correct_response=question.correct_answer, + response_time_ms=response_time_ms, + accuracy_on_topic_percent=int(error_data['accuracy_on_topic_last_10'] * 100), + session_duration_minutes=Decimal(str(session_duration_minutes)), + consecutive_errors=error_data['consecutive_errors'], + heuristic_rule_matched=heuristic_result.get('rule_matched'), + heuristic_confidence=heuristic_result.get('confidence'), + heuristic_category=heuristic_result.get('category'), + was_sent_to_llm=heuristic_result.get('should_send_to_llm') + ) + + if heuristic_result.get('should_send_to_llm'): + llm_result = process_llm_classification( + question.question_text, user_response, question.correct_answer, + heuristic_result.get('category'), error_data + ) + error_log.was_sent_to_llm = True + error_log.llm_category = llm_result.get('category') + error_log.gemini_explanation = llm_result.get('explanation') + error_log.api_cost_usd = Decimal(str(llm_result.get('api_cost', 0))) + error_log.total_latency_ms = llm_result.get('total_latency_ms') + error_log.prompt_tokens = llm_result.get('prompt_tokens', 0) + error_log.completion_tokens = llm_result.get('completion_tokens', 0) + error_log.save() + + response_data['error_category'] = llm_result.get('category') + response_data['explanation'] = llm_result.get('explanation') + response_data['was_explained_by_llm'] = True + else: + response_data['error_category'] = heuristic_result.get('category') + response_data['explanation'] = get_heuristic_explanation(heuristic_result.get('category'), question, error_data) + response_data['was_explained_by_llm'] = False + + response_data['error_analysis'] = { + 'category': heuristic_result.get('category'), + 'confidence': heuristic_result.get('confidence'), + 'rule_matched': heuristic_result.get('rule_matched'), + 'sent_to_llm': heuristic_result.get('should_send_to_llm') + } + + return JsonResponse(response_data) + + except Exception as e: + logger.error(f"Error in submit_quiz_answer: {str(e)}", exc_info=True) + return JsonResponse({'error': str(e)}, status=400) + + +def calculate_accuracy_on_topic(user_id, topic, last_n=10): + answers = QuizAnswer.objects.filter(session__user_id=user_id, question__topic=topic).order_by('-id')[:last_n] + if not answers: + return 0.5 + correct = answers.filter(is_correct=True).count() + return correct / len(answers) + +def count_consecutive_errors(user_id, session_id): + answers = QuizAnswer.objects.filter(session__user_id=user_id, session_id=session_id).order_by('-id') + count = 0 + for answer in answers: + if not answer.is_correct: + count += 1 + else: + break + return count + +def analyze_response_time_trend(user_id, session_id): + answers = QuizAnswer.objects.filter(session__user_id=user_id, session_id=session_id).order_by('id')[-5:] + if len(answers) < 2: + return 'stable' + times = [a.response_time_ms for a in answers] + trend = times[-1] - times[0] + if trend > 500: + return 'increasing' + elif trend < -500: + return 'decreasing' + return 'stable' + +def calculate_error_entropy(user_id, concept_id): + return 0.5 + +def count_recent_errors_on_concept(user_id, concept_id): + # Using 0 since Concept mappings are fully implemented in Phase 3 + return 0 + +def calculate_prerequisite_gap(user_id, concept_id): + return 0.0 + +def get_heuristic_explanation(category, question, error_data): + explanations = { + 'Time-Pressure Fatigue': f"It looks like you rushed this answer! Take your time. The correct answer is: {question.correct_answer}", + 'Cognitive Overload': f"You've been studying for a while - maybe take a break! The answer is: {question.correct_answer}", + 'Conceptual Confusion': f"This might indicate you're confused on a core concept. Review this topic. The answer is: {question.correct_answer}" + } + return explanations.get(category, question.explanation) \ No newline at end of file diff --git a/templates/features/admin_dashboard.html b/templates/features/admin_dashboard.html new file mode 100644 index 0000000..e85ba2c --- /dev/null +++ b/templates/features/admin_dashboard.html @@ -0,0 +1,180 @@ +{% extends 'base.html' %} +{% load static %} + +{% block title %}StudyAI - Patent Metrics Dashboard{% endblock %} + +{% block extra_head %} + + + + + +{% endblock %} + +{% block content %} +
+
+

Patent Proof: Administration Dashboard

+ +
+ +
+ +
+
+
+

Learning Velocity

+

--

+ Concepts / Week +
+
+

Heuristic Efficiency

+

--%

+ Errors caught locally +
+
+

API Cost Saved

+

$--

+ Simulated USD +
+
+

Time to Exam Readiness

+

--

+ Weeks Predicted +
+
+
+
+ +
+ +
+
+

Feature #1: Adaptive Load / Flow State

+ +
+
+ + +
+
+

Feature #2: NLP Heuristic Gate vs LLM

+ +
+
+
+ +
+ +
+
+

Feature #3: Autonomous Knowledge Graph

+
+
+
+
+
+ + +{% endblock %} diff --git a/templates/quizzes/quiz_attempt.html b/templates/quizzes/quiz_attempt.html index 4330db2..cfe34df 100644 --- a/templates/quizzes/quiz_attempt.html +++ b/templates/quizzes/quiz_attempt.html @@ -202,20 +202,21 @@ return s.replace(/'/g, "\\'").replace(/"/g, '\\"'); } -function selectAnswer(qIndex, answer, optIndex, qId) { +async function selectAnswer(qIndex, answer, optIndex, qId) { + if (answers[qId]) return; // Prevent multiple submissions for same question answers[qId] = answer; questions[qIndex].options.forEach((_, j) => { const el = document.getElementById(`opt-${qIndex}-${j}`); el.classList.remove('selected'); + el.style.pointerEvents = 'none'; // Disable changing answer }); const sel = document.getElementById(`opt-${qIndex}-${optIndex}`); sel.classList.add('selected'); - document.getElementById(`qstatus-${qIndex}`).textContent = 'Answered ✓'; - document.getElementById(`qstatus-${qIndex}`).style.color = 'var(--success)'; - + document.getElementById(`qstatus-${qIndex}`).textContent = 'Submitting...'; + const done = Object.keys(answers).length; const pct = Math.round((done / questions.length) * 100); document.getElementById('progressBar').style.width = pct + '%'; @@ -227,17 +228,65 @@ document.getElementById('submitHint').style.fontWeight = '600'; } - // ── Cognitive Load Tracking ── - const is_correct = answer.trim().toLowerCase() === questions[qIndex].correct.trim().toLowerCase(); - const timeTaken = Math.round((Date.now() - window._lastInteractionTime) / 1000); + // ── Patent Feature: Real-Time Submission & Cognitive Load Tracking ── + const timeTaken = Math.round((Date.now() - window._lastInteractionTime)); window._lastInteractionTime = Date.now(); - if (window._cogLoadActive !== false) { - fetch('/ai/api/cognitive/update/', { + try { + let csrftoken = getCookie('csrftoken') || document.querySelector('[name=csrfmiddlewaretoken]')?.value; + const res = await fetch('/quizzes/submit-answer/', { method: 'POST', - headers: { 'Content-Type': 'application/json', 'X-CSRFToken': getCookie('csrftoken') || document.querySelector('[name=csrfmiddlewaretoken]')?.value }, - body: JSON.stringify({ is_correct: is_correct, time_taken_seconds: timeTaken, difficulty: DIFFICULTY }) - }).catch(e => console.log('CogLoad tracking error', e)); + headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrftoken }, + body: JSON.stringify({ + session_id: {{ session_id }}, + question_id: qId, + user_response: answer, + response_time_ms: timeTaken, + session_duration_minutes: (totalSeconds - seconds) / 60.0 + }) + }); + + const data = await res.json(); + + // Update Flow State UI (assuming there's an element we can add) + let headerBar = document.querySelector('.quiz-header-bar > .d-flex'); + let flowBadge = document.getElementById('flowStateBadge'); + if (!flowBadge) { + flowBadge = document.createElement('span'); + flowBadge.id = 'flowStateBadge'; + flowBadge.className = 'badge bg-info ms-2'; + headerBar.firstElementChild.appendChild(flowBadge); + } + + if (data.in_flow_zone) { + flowBadge.textContent = `Flow State: Optimal 🔥`; + flowBadge.className = 'badge bg-success ms-2'; + } else { + flowBadge.textContent = `Load: ${data.difficulty_adjustment.toUpperCase()}`; + flowBadge.className = 'badge bg-warning ms-2'; + } + + if (data.is_correct) { + document.getElementById(`qstatus-${qIndex}`).textContent = 'Correct ✓'; + document.getElementById(`qstatus-${qIndex}`).style.color = 'var(--success)'; + } else { + document.getElementById(`qstatus-${qIndex}`).textContent = 'Incorrect ✕'; + document.getElementById(`qstatus-${qIndex}`).style.color = 'var(--danger)'; + sel.style.borderColor = 'var(--danger)'; + sel.style.backgroundColor = 'rgba(220, 38, 38, 0.1)'; + + // Show AI feedback inline + const feedbackDiv = document.createElement('div'); + feedbackDiv.className = 'alert alert-danger mt-3 mb-0'; + feedbackDiv.style.fontSize = '0.9rem'; + const aiBadge = data.was_explained_by_llm ? 'AI' : 'System'; + feedbackDiv.innerHTML = `${aiBadge} ${data.error_category}: ${data.explanation}`; + document.getElementById(`qcard-${qIndex}`).appendChild(feedbackDiv); + } + + } catch(e) { + console.log('Submission error', e); + document.getElementById(`qstatus-${qIndex}`).textContent = 'Answered ✓'; } } diff --git a/test_gemini.py b/test_gemini.py deleted file mode 100644 index 1807658..0000000 --- a/test_gemini.py +++ /dev/null @@ -1,3 +0,0 @@ -from ai_engine.services.gemini_service import generate_schedule - -print(generate_schedule("Created"))