Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion ai/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,17 @@ def _execute_model_request(self, text):

This image contains a multiple choice question. Using the latest accurate information from search results, tell me which answer is correct. Only tell me the correct answer letter (A, B, C, D, etc.), no explanation needed.

Question: {text}"""
Question: {text}

IMPORTANT OUTPUT FORMATTING INSTRUCTIONS:
- If the question has multiple choice options (A, B, C, D, etc.), respond with ONLY the letter (e.g., 'A' or 'B')
- If the question asks for a number, respond with ONLY the number (e.g., '4' not 'four' or '4 times')
- If the question asks for a time period, respond with the most concise standard form (e.g., 'Quarterly' for questions about reporting frequency)
- If the question asks for a percentage, respond with ONLY the number and % symbol (e.g., '15%')
- If the question asks for a dollar amount, respond with ONLY the number and $ symbol (e.g., '$100')
- Do not include periods, explanatory text, or elaboration
- Do not include phrases like 'The answer is' or 'The correct answer is'
- Respond with the most standardized, concise form possible"""

# Send the request with Google Search grounding enabled
response = self.client.models.generate_content(
Expand Down
2 changes: 1 addition & 1 deletion ai/gpt4.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _execute_model_request(self, text):
},
{
"role": "user",
"content": f"This contains a multiple choice question. Tell me which answer is correct. Only tell me the correct answer, no explanation needed.\n\n{text}"
"content": f"This contains a multiple choice question. Tell me which answer is correct. Only tell me the correct answer, no explanation needed.\n\n{text}\n\nIMPORTANT OUTPUT FORMATTING INSTRUCTIONS:\n- If the question has multiple choice options (A, B, C, D, etc.), respond with ONLY the letter (e.g., 'A' or 'B')\n- If the question asks for a number, respond with ONLY the number (e.g., '4' not 'four' or '4 times')\n- If the question asks for a time period, respond with the most concise standard form (e.g., 'Quarterly' for questions about reporting frequency)\n- If the question asks for a percentage, respond with ONLY the number and % symbol (e.g., '15%')\n- If the question asks for a dollar amount, respond with ONLY the number and $ symbol (e.g., '$100')\n- Do not include periods, explanatory text, or elaboration\n- Do not include phrases like 'The answer is' or 'The correct answer is'\n- Respond with the most standardized, concise form possible"
}
],
"max_tokens": 100
Expand Down
2 changes: 1 addition & 1 deletion ai/perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _execute_model_request(self, text):
},
{
"role": "user",
"content": f"This image contains a multiple choice question. Using the latest information tell me which answer is correct. Only tell me the correct answer, no explanation needed.\n\n{text}"
"content": f"This image contains a multiple choice question. Using the latest information tell me which answer is correct. Only tell me the correct answer, no explanation needed.\n\n{text}\n\nIMPORTANT OUTPUT FORMATTING INSTRUCTIONS:\n- If the question has multiple choice options (A, B, C, D, etc.), respond with ONLY the letter (e.g., 'A' or 'B')\n- If the question asks for a number, respond with ONLY the number (e.g., '4' not 'four' or '4 times')\n- If the question asks for a time period, respond with the most concise standard form (e.g., 'Quarterly' for questions about reporting frequency)\n- If the question asks for a percentage, respond with ONLY the number and % symbol (e.g., '15%')\n- If the question asks for a dollar amount, respond with ONLY the number and $ symbol (e.g., '$100')\n- Do not include periods, explanatory text, or elaboration\n- Do not include phrases like 'The answer is' or 'The correct answer is'\n- Respond with the most standardized, concise form possible"
}
]
}
Expand Down
51 changes: 29 additions & 22 deletions core/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,29 +346,36 @@ def get_model_result(model_name):
sonar_result = results["sonar"]["result"]
gemini_result = results["gemini"]["result"]

# Normalize all answers for semantic comparison
from core.utils import normalize_answer
gpt4_normalized = normalize_answer(gpt4_result)
sonar_pro_normalized = normalize_answer(sonar_pro_result)
sonar_normalized = normalize_answer(sonar_result)
gemini_normalized = normalize_answer(gemini_result)

print("\n" + "="*60)
if gpt4_result == sonar_pro_result == sonar_result == gemini_result:
print("All models agree on the answer!")
elif gpt4_result == sonar_pro_result == sonar_result:
print("GPT-4, Sonar Pro, and Sonar agree, but Gemini differs")
elif gpt4_result == sonar_pro_result == gemini_result:
print("GPT-4, Sonar Pro, and Gemini agree, but Sonar differs")
elif gpt4_result == sonar_result == gemini_result:
print("GPT-4, Sonar, and Gemini agree, but Sonar Pro differs")
elif sonar_pro_result == sonar_result == gemini_result:
print("Sonar Pro, Sonar, and Gemini agree, but GPT-4 differs")
elif gpt4_result == sonar_pro_result:
print("GPT-4 and Sonar Pro agree, but Sonar and Gemini differ")
elif gpt4_result == sonar_result:
print("GPT-4 and Sonar agree, but Sonar Pro and Gemini differ")
elif gpt4_result == gemini_result:
print("GPT-4 and Gemini agree, but Sonar Pro and Sonar differ")
elif sonar_pro_result == sonar_result:
print("Sonar Pro and Sonar agree, but GPT-4 and Gemini differ")
elif sonar_pro_result == gemini_result:
print("Sonar Pro and Gemini agree, but GPT-4 and Sonar differ")
elif sonar_result == gemini_result:
print("Sonar and Gemini agree, but GPT-4 and Sonar Pro differ")
if gpt4_normalized == sonar_pro_normalized == sonar_normalized == gemini_normalized:
print("All models agree on the answer!")
elif gpt4_normalized == sonar_pro_normalized == sonar_normalized:
print("⚠️ GPT-4, Sonar Pro, and Sonar agree, but Gemini differs")
elif gpt4_normalized == sonar_pro_normalized == gemini_normalized:
print("⚠️ GPT-4, Sonar Pro, and Gemini agree, but Sonar differs")
elif gpt4_normalized == sonar_normalized == gemini_normalized:
print("⚠️ GPT-4, Sonar, and Gemini agree, but Sonar Pro differs")
elif sonar_pro_normalized == sonar_normalized == gemini_normalized:
print("⚠️ Sonar Pro, Sonar, and Gemini agree, but GPT-4 differs")
elif gpt4_normalized == sonar_pro_normalized:
print("⚠️ GPT-4 and Sonar Pro agree, but Sonar and Gemini differ")
elif gpt4_normalized == sonar_normalized:
print("⚠️ GPT-4 and Sonar agree, but Sonar Pro and Gemini differ")
elif gpt4_normalized == gemini_normalized:
print("⚠️ GPT-4 and Gemini agree, but Sonar Pro and Sonar differ")
elif sonar_pro_normalized == sonar_normalized:
print("⚠️ Sonar Pro and Sonar agree, but GPT-4 and Gemini differ")
elif sonar_pro_normalized == gemini_normalized:
print("⚠️ Sonar Pro and Gemini agree, but GPT-4 and Sonar differ")
elif sonar_normalized == gemini_normalized:
print("⚠️ Sonar and Gemini agree, but GPT-4 and Sonar Pro differ")
else:
print("❌ All models give different answers")
print("="*60 + "\n")
Expand Down
126 changes: 126 additions & 0 deletions core/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import re

def normalize_answer(answer):
"""Normalize answer text for comparison by removing formatting and extra whitespace"""
if not answer:
return ""

# Remove markdown formatting (bold, italic, etc.)
normalized = re.sub(r'\*\*([^*]+)\*\*', r'\1', answer) # Remove **text**
normalized = re.sub(r'\*([^*]+)\*', r'\1', normalized) # Remove *text*
normalized = re.sub(r'_([^_]+)_', r'\1', normalized) # Remove _text_

# Remove extra whitespace and newlines
normalized = re.sub(r'\s+', ' ', normalized).strip()

# Remove leading/trailing punctuation that might be artifacts
normalized = normalized.strip('.,!?;:')

# Convert to lowercase for case-insensitive comparison
normalized = normalized.lower()

# Enhanced semantic normalization for common financial/business terms

# Handle frequency/time period equivalences
frequency_mappings = {
'quarterly': '4',
'four times a year': '4',
'4 times a year': '4',
'four times per year': '4',
'4 times per year': '4',
'every quarter': '4',
'every 3 months': '4',
'semi-annually': '2',
'twice a year': '2',
'2 times a year': '2',
'twice per year': '2',
'2 times per year': '2',
'every 6 months': '2',
'annually': '1',
'once a year': '1',
'1 time a year': '1',
'yearly': '1',
'monthly': '12',
'12 times a year': '12',
'twelve times a year': '12'
}

# Handle percentage equivalences
percentage_mappings = {
'fifty percent': '50%',
'twenty-five percent': '25%',
'ten percent': '10%',
'five percent': '5%',
'one percent': '1%',
'zero percent': '0%'
}

# Handle number word equivalences
number_mappings = {
'zero': '0',
'one': '1',
'two': '2',
'three': '3',
'four': '4',
'five': '5',
'six': '6',
'seven': '7',
'eight': '8',
'nine': '9',
'ten': '10',
'eleven': '11',
'twelve': '12',
'thirteen': '13',
'fourteen': '14',
'fifteen': '15',
'sixteen': '16',
'seventeen': '17',
'eighteen': '18',
'nineteen': '19',
'twenty': '20'
}

# Apply mappings
for phrase, standardized in frequency_mappings.items():
if phrase in normalized:
normalized = standardized
break

for phrase, standardized in percentage_mappings.items():
if phrase in normalized:
normalized = standardized
break

for word, number in number_mappings.items():
if normalized == word:
normalized = number
break

# Clean up common phrases that don't add value
cleanup_patterns = [
r'^the answer is\s*',
r'^the correct answer is\s*',
r'^answer:\s*',
r'^correct answer:\s*',
r'^based on.*?the.*?answer is\s*',
r'^according to.*?the.*?answer is\s*',
r'\..*$', # Remove everything after first period
]

for pattern in cleanup_patterns:
normalized = re.sub(pattern, '', normalized, flags=re.IGNORECASE)

# Final cleanup
normalized = normalized.strip('.,!?;: ')

# Extract just the core answer if it's a letter choice (A, B, C, D, etc.)
letter_match = re.search(r'\b([a-e])\b', normalized)
if letter_match:
normalized = letter_match.group(1)

# Extract just numbers if the answer appears to be numeric
number_match = re.search(r'\b(\d+(?:\.\d+)?%?)\b', normalized)
if number_match and len(normalized.split()) > 1:
normalized = number_match.group(1)

return normalized
8 changes: 7 additions & 1 deletion ui/renderer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import cv2
from core.utils import normalize_answer

class TextRenderer:
"""Handles text rendering with wrapping and formatting"""
Expand Down Expand Up @@ -103,7 +104,12 @@ def render_result_overlay(frame, question_text, results, is_processing):
sonar_pro_result = results["sonar_pro"]["result"]
sonar_result = results["sonar"]["result"]

if gpt4_result == sonar_pro_result == sonar_result:
# Normalize answers for comparison
gpt4_normalized = normalize_answer(gpt4_result)
sonar_pro_normalized = normalize_answer(sonar_pro_result)
sonar_normalized = normalize_answer(sonar_result)

if gpt4_normalized == sonar_pro_normalized == sonar_normalized:
cv2.putText(display_frame, "All models agree!", (10, y_pos),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
else:
Expand Down