vikvang · uppusaikiran · May 22, 2025
diff --git a/ai/gemini.py b/ai/gemini.py
@@ -23,7 +23,17 @@ def _execute_model_request(self, text):
 
             This image contains a multiple choice question. Using the latest accurate information from search results, tell me which answer is correct. Only tell me the correct answer letter (A, B, C, D, etc.), no explanation needed.
 
-            Question: {text}"""
+            Question: {text}
+
+            IMPORTANT OUTPUT FORMATTING INSTRUCTIONS:
+            - If the question has multiple choice options (A, B, C, D, etc.), respond with ONLY the letter (e.g., 'A' or 'B')
+            - If the question asks for a number, respond with ONLY the number (e.g., '4' not 'four' or '4 times')
+            - If the question asks for a time period, respond with the most concise standard form (e.g., 'Quarterly' for questions about reporting frequency)
+            - If the question asks for a percentage, respond with ONLY the number and % symbol (e.g., '15%')
+            - If the question asks for a dollar amount, respond with ONLY the number and $ symbol (e.g., '$100')
+            - Do not include periods, explanatory text, or elaboration
+            - Do not include phrases like 'The answer is' or 'The correct answer is'
+            - Respond with the most standardized, concise form possible"""
 
             # Send the request with Google Search grounding enabled
             response = self.client.models.generate_content(

diff --git a/ai/gpt4.py b/ai/gpt4.py
@@ -27,7 +27,7 @@ def _execute_model_request(self, text):
                 },
                 {
                     "role": "user",
-                    "content": f"This contains a multiple choice question. Tell me which answer is correct. Only tell me the correct answer, no explanation needed.\n\n{text}"
+                    "content": f"This contains a multiple choice question. Tell me which answer is correct. Only tell me the correct answer, no explanation needed.\n\n{text}\n\nIMPORTANT OUTPUT FORMATTING INSTRUCTIONS:\n- If the question has multiple choice options (A, B, C, D, etc.), respond with ONLY the letter (e.g., 'A' or 'B')\n- If the question asks for a number, respond with ONLY the number (e.g., '4' not 'four' or '4 times')\n- If the question asks for a time period, respond with the most concise standard form (e.g., 'Quarterly' for questions about reporting frequency)\n- If the question asks for a percentage, respond with ONLY the number and % symbol (e.g., '15%')\n- If the question asks for a dollar amount, respond with ONLY the number and $ symbol (e.g., '$100')\n- Do not include periods, explanatory text, or elaboration\n- Do not include phrases like 'The answer is' or 'The correct answer is'\n- Respond with the most standardized, concise form possible"
                 }
             ],
             "max_tokens": 100

diff --git a/ai/perplexity.py b/ai/perplexity.py
@@ -27,7 +27,7 @@ def _execute_model_request(self, text):
                 },
                 {
                     "role": "user",
-                    "content": f"This image contains a multiple choice question. Using the latest information tell me which answer is correct. Only tell me the correct answer, no explanation needed.\n\n{text}"
+                    "content": f"This image contains a multiple choice question. Using the latest information tell me which answer is correct. Only tell me the correct answer, no explanation needed.\n\n{text}\n\nIMPORTANT OUTPUT FORMATTING INSTRUCTIONS:\n- If the question has multiple choice options (A, B, C, D, etc.), respond with ONLY the letter (e.g., 'A' or 'B')\n- If the question asks for a number, respond with ONLY the number (e.g., '4' not 'four' or '4 times')\n- If the question asks for a time period, respond with the most concise standard form (e.g., 'Quarterly' for questions about reporting frequency)\n- If the question asks for a percentage, respond with ONLY the number and % symbol (e.g., '15%')\n- If the question asks for a dollar amount, respond with ONLY the number and $ symbol (e.g., '$100')\n- Do not include periods, explanatory text, or elaboration\n- Do not include phrases like 'The answer is' or 'The correct answer is'\n- Respond with the most standardized, concise form possible"
                 }
             ]
         }

diff --git a/core/app.py b/core/app.py
@@ -346,29 +346,36 @@ def get_model_result(model_name):
                             sonar_result = results["sonar"]["result"]
                             gemini_result = results["gemini"]["result"]
 
+                            # Normalize all answers for semantic comparison
+                            from core.utils import normalize_answer
+                            gpt4_normalized = normalize_answer(gpt4_result)
+                            sonar_pro_normalized = normalize_answer(sonar_pro_result)
+                            sonar_normalized = normalize_answer(sonar_result)
+                            gemini_normalized = normalize_answer(gemini_result)
+
                             print("\n" + "="*60)
-                            if gpt4_result == sonar_pro_result == sonar_result == gemini_result:
-                                print("All models agree on the answer!")
-                            elif gpt4_result == sonar_pro_result == sonar_result:
-                                print("GPT-4, Sonar Pro, and Sonar agree, but Gemini differs")
-                            elif gpt4_result == sonar_pro_result == gemini_result:
-                                print("GPT-4, Sonar Pro, and Gemini agree, but Sonar differs")
-                            elif gpt4_result == sonar_result == gemini_result:
-                                print("GPT-4, Sonar, and Gemini agree, but Sonar Pro differs")
-                            elif sonar_pro_result == sonar_result == gemini_result:
-                                print("Sonar Pro, Sonar, and Gemini agree, but GPT-4 differs")
-                            elif gpt4_result == sonar_pro_result:
-                                print("GPT-4 and Sonar Pro agree, but Sonar and Gemini differ")
-                            elif gpt4_result == sonar_result:
-                                print("GPT-4 and Sonar agree, but Sonar Pro and Gemini differ")
-                            elif gpt4_result == gemini_result:
-                                print("GPT-4 and Gemini agree, but Sonar Pro and Sonar differ")
-                            elif sonar_pro_result == sonar_result:
-                                print("Sonar Pro and Sonar agree, but GPT-4 and Gemini differ")
-                            elif sonar_pro_result == gemini_result:
-                                print("Sonar Pro and Gemini agree, but GPT-4 and Sonar differ")
-                            elif sonar_result == gemini_result:
-                                print("Sonar and Gemini agree, but GPT-4 and Sonar Pro differ")
+                            if gpt4_normalized == sonar_pro_normalized == sonar_normalized == gemini_normalized:
+                                print("✅ All models agree on the answer!")
+                            elif gpt4_normalized == sonar_pro_normalized == sonar_normalized:
+                                print("⚠️ GPT-4, Sonar Pro, and Sonar agree, but Gemini differs")
+                            elif gpt4_normalized == sonar_pro_normalized == gemini_normalized:
+                                print("⚠️ GPT-4, Sonar Pro, and Gemini agree, but Sonar differs")
+                            elif gpt4_normalized == sonar_normalized == gemini_normalized:
+                                print("⚠️ GPT-4, Sonar, and Gemini agree, but Sonar Pro differs")
+                            elif sonar_pro_normalized == sonar_normalized == gemini_normalized:
+                                print("⚠️ Sonar Pro, Sonar, and Gemini agree, but GPT-4 differs")
+                            elif gpt4_normalized == sonar_pro_normalized:
+                                print("⚠️ GPT-4 and Sonar Pro agree, but Sonar and Gemini differ")
+                            elif gpt4_normalized == sonar_normalized:
+                                print("⚠️ GPT-4 and Sonar agree, but Sonar Pro and Gemini differ")
+                            elif gpt4_normalized == gemini_normalized:
+                                print("⚠️ GPT-4 and Gemini agree, but Sonar Pro and Sonar differ")
+                            elif sonar_pro_normalized == sonar_normalized:
+                                print("⚠️ Sonar Pro and Sonar agree, but GPT-4 and Gemini differ")
+                            elif sonar_pro_normalized == gemini_normalized:
+                                print("⚠️ Sonar Pro and Gemini agree, but GPT-4 and Sonar differ")
+                            elif sonar_normalized == gemini_normalized:
+                                print("⚠️ Sonar and Gemini agree, but GPT-4 and Sonar Pro differ")
                             else:
                                 print("❌ All models give different answers")
                             print("="*60 + "\n")

diff --git a/core/utils.py b/core/utils.py
@@ -0,0 +1,126 @@
+import re
+
+def normalize_answer(answer):
+    """Normalize answer text for comparison by removing formatting and extra whitespace"""
+    if not answer:
+        return ""
+
+    # Remove markdown formatting (bold, italic, etc.)
+    normalized = re.sub(r'\*\*([^*]+)\*\*', r'\1', answer)  # Remove **text**
+    normalized = re.sub(r'\*([^*]+)\*', r'\1', normalized)   # Remove *text*
+    normalized = re.sub(r'_([^_]+)_', r'\1', normalized)     # Remove _text_
+
+    # Remove extra whitespace and newlines
+    normalized = re.sub(r'\s+', ' ', normalized).strip()
+
+    # Remove leading/trailing punctuation that might be artifacts
+    normalized = normalized.strip('.,!?;:')
+
+    # Convert to lowercase for case-insensitive comparison
+    normalized = normalized.lower()
+
+    # Enhanced semantic normalization for common financial/business terms
+
+    # Handle frequency/time period equivalences
+    frequency_mappings = {
+        'quarterly': '4',
+        'four times a year': '4',
+        '4 times a year': '4',
+        'four times per year': '4',
+        '4 times per year': '4',
+        'every quarter': '4',
+        'every 3 months': '4',
+        'semi-annually': '2',
+        'twice a year': '2',
+        '2 times a year': '2',
+        'twice per year': '2',
+        '2 times per year': '2',
+        'every 6 months': '2',
+        'annually': '1',
+        'once a year': '1',
+        '1 time a year': '1',
+        'yearly': '1',
+        'monthly': '12',
+        '12 times a year': '12',
+        'twelve times a year': '12'
+    }
+
+    # Handle percentage equivalences
+    percentage_mappings = {
+        'fifty percent': '50%',
+        'twenty-five percent': '25%',
+        'ten percent': '10%',
+        'five percent': '5%',
+        'one percent': '1%',
+        'zero percent': '0%'
+    }
+
+    # Handle number word equivalences
+    number_mappings = {
+        'zero': '0',
+        'one': '1',
+        'two': '2',
+        'three': '3',
+        'four': '4',
+        'five': '5',
+        'six': '6',
+        'seven': '7',
+        'eight': '8',
+        'nine': '9',
+        'ten': '10',
+        'eleven': '11',
+        'twelve': '12',
+        'thirteen': '13',
+        'fourteen': '14',
+        'fifteen': '15',
+        'sixteen': '16',
+        'seventeen': '17',
+        'eighteen': '18',
+        'nineteen': '19',
+        'twenty': '20'
+    }
+
+    # Apply mappings
+    for phrase, standardized in frequency_mappings.items():
+        if phrase in normalized:
+            normalized = standardized
+            break
+
+    for phrase, standardized in percentage_mappings.items():
+        if phrase in normalized:
+            normalized = standardized
+            break
+
+    for word, number in number_mappings.items():
+        if normalized == word:
+            normalized = number
+            break
+
+    # Clean up common phrases that don't add value
+    cleanup_patterns = [
+        r'^the answer is\s*',
+        r'^the correct answer is\s*',
+        r'^answer:\s*',
+        r'^correct answer:\s*',
+        r'^based on.*?the.*?answer is\s*',
+        r'^according to.*?the.*?answer is\s*',
+        r'\..*$',  # Remove everything after first period
+    ]
+
+    for pattern in cleanup_patterns:
+        normalized = re.sub(pattern, '', normalized, flags=re.IGNORECASE)
+
+    # Final cleanup
+    normalized = normalized.strip('.,!?;: ')
+
+    # Extract just the core answer if it's a letter choice (A, B, C, D, etc.)
+    letter_match = re.search(r'\b([a-e])\b', normalized)
+    if letter_match:
+        normalized = letter_match.group(1)
+
+    # Extract just numbers if the answer appears to be numeric
+    number_match = re.search(r'\b(\d+(?:\.\d+)?%?)\b', normalized)
+    if number_match and len(normalized.split()) > 1:
+        normalized = number_match.group(1)
+
+    return normalized 
diff --git a/ui/renderer.py b/ui/renderer.py
@@ -1,4 +1,5 @@
 import cv2
+from core.utils import normalize_answer
 
 class TextRenderer:
     """Handles text rendering with wrapping and formatting"""
@@ -103,7 +104,12 @@ def render_result_overlay(frame, question_text, results, is_processing):
                 sonar_pro_result = results["sonar_pro"]["result"]
                 sonar_result = results["sonar"]["result"]
 
-                if gpt4_result == sonar_pro_result == sonar_result:
+                # Normalize answers for comparison
+                gpt4_normalized = normalize_answer(gpt4_result)
+                sonar_pro_normalized = normalize_answer(sonar_pro_result)
+                sonar_normalized = normalize_answer(sonar_result)
+
+                if gpt4_normalized == sonar_pro_normalized == sonar_normalized:
                     cv2.putText(display_frame, "All models agree!", (10, y_pos), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                 else:
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,7 +27,7 @@ def _execute_model_request(self, text): @@
                     },
                     {
                         "role": "user",
-                        "content": f"This image contains a multiple choice question. Using the latest information tell me which answer is correct. Only tell me the correct answer, no explanation needed.\n\n{text}"
+                        "content": f"This image contains a multiple choice question. Using the latest information tell me which answer is correct. Only tell me the correct answer, no explanation needed.\n\n{text}\n\nIMPORTANT OUTPUT FORMATTING INSTRUCTIONS:\n- If the question has multiple choice options (A, B, C, D, etc.), respond with ONLY the letter (e.g., 'A' or 'B')\n- If the question asks for a number, respond with ONLY the number (e.g., '4' not 'four' or '4 times')\n- If the question asks for a time period, respond with the most concise standard form (e.g., 'Quarterly' for questions about reporting frequency)\n- If the question asks for a percentage, respond with ONLY the number and % symbol (e.g., '15%')\n- If the question asks for a dollar amount, respond with ONLY the number and $ symbol (e.g., '$100')\n- Do not include periods, explanatory text, or elaboration\n- Do not include phrases like 'The answer is' or 'The correct answer is'\n- Respond with the most standardized, concise form possible"
                     }
                 ]
             }
@@ Expand Down @@