diff --git a/sdks/typescript/README.md b/sdks/typescript/README.md index 9eec627..51f8726 100644 --- a/sdks/typescript/README.md +++ b/sdks/typescript/README.md @@ -65,9 +65,7 @@ await evaluator.evaluate(text: string, grade: string) score: 'slightly complex' | 'moderately complex' | 'very complex' | 'exceedingly complex'; reasoning: string; metadata: { - promptVersion: string; model: string; - timestamp: Date; processingTimeMs: number; }; _internal: VocabularyComplexity; // Detailed analysis @@ -106,9 +104,7 @@ await evaluator.evaluate(text: string, grade: string) score: 'Slightly Complex' | 'Moderately Complex' | 'Very Complex' | 'Exceedingly Complex'; reasoning: string; metadata: { - promptVersion: string; model: string; - timestamp: Date; processingTimeMs: number; }; _internal: { @@ -195,9 +191,7 @@ await evaluator.evaluate(text: string) score: string; // e.g., 'K-1', '2-3', '4-5', '6-8', '9-10', '11-CCR' reasoning: string; metadata: { - promptVersion: string; model: string; - timestamp: Date; processingTimeMs: number; }; _internal: { diff --git a/sdks/typescript/src/evaluators/base.ts b/sdks/typescript/src/evaluators/base.ts index a910100..d4e48d6 100644 --- a/sdks/typescript/src/evaluators/base.ts +++ b/sdks/typescript/src/evaluators/base.ts @@ -158,7 +158,7 @@ export abstract class BaseEvaluator { // Initialize telemetry if enabled if (this.config.telemetry.enabled) { this.telemetryClient = new TelemetryClient({ - endpoint: 'https://api.learningcommons.org/v1/telemetry', + endpoint: 'https://api.learningcommons.org/evaluators-telemetry/v1/events', partnerKey: config.partnerKey, clientId: generateClientId(), enabled: true, diff --git a/sdks/typescript/src/evaluators/grade-level-appropriateness.ts b/sdks/typescript/src/evaluators/grade-level-appropriateness.ts index 3f43a61..9c5ad0a 100644 --- a/sdks/typescript/src/evaluators/grade-level-appropriateness.ts +++ b/sdks/typescript/src/evaluators/grade-level-appropriateness.ts @@ -2,10 +2,10 @@ import type { LLMProvider } from '../providers/index.js'; import { createProvider } from '../providers/index.js'; import { GradeLevelAppropriatenessSchema, - type GradeLevelAppropriateness, + type GradeLevelAppropriatenessInternal, } from '../schemas/grade-level-appropriateness.js'; import { getSystemPrompt, getUserPrompt } from '../prompts/grade-level-appropriateness/index.js'; -import type { EvaluationResult } from '../schemas/index.js'; +import type { EvaluationResult, GradeBand } from '../schemas/index.js'; import { BaseEvaluator, type BaseEvaluatorConfig } from './base.js'; import { ValidationError, wrapProviderError } from '../errors.js'; @@ -69,7 +69,7 @@ export class GradeLevelAppropriatenessEvaluator extends BaseEvaluator { * @throws {ValidationError} If text is empty or too short/long * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError) */ - async evaluate(text: string): Promise> { + async evaluate(text: string): Promise> { this.logger.info('Starting grade level appropriateness evaluation', { evaluator: 'grade-level-appropriateness', operation: 'evaluate', @@ -107,9 +107,7 @@ export class GradeLevelAppropriatenessEvaluator extends BaseEvaluator { score: response.data.grade, reasoning: response.data.reasoning, metadata: { - promptVersion: '1.2.0', model: 'google:gemini-2.5-pro', - timestamp: new Date(), processingTimeMs: latencyMs, }, _internal: response.data, @@ -186,7 +184,7 @@ export class GradeLevelAppropriatenessEvaluator extends BaseEvaluator { export async function evaluateGradeLevelAppropriateness( text: string, config: BaseEvaluatorConfig -): Promise> { +): Promise> { const evaluator = new GradeLevelAppropriatenessEvaluator(config); return evaluator.evaluate(text); } diff --git a/sdks/typescript/src/evaluators/index.ts b/sdks/typescript/src/evaluators/index.ts index 3a42ba2..7451765 100644 --- a/sdks/typescript/src/evaluators/index.ts +++ b/sdks/typescript/src/evaluators/index.ts @@ -23,6 +23,5 @@ export { export { TextComplexityEvaluator, evaluateTextComplexity, - type TextComplexityScore, - type TextComplexityInternal, + type TextComplexityResult, } from './text-complexity.js'; diff --git a/sdks/typescript/src/evaluators/sentence-structure.ts b/sdks/typescript/src/evaluators/sentence-structure.ts index c1b66d2..689ec87 100644 --- a/sdks/typescript/src/evaluators/sentence-structure.ts +++ b/sdks/typescript/src/evaluators/sentence-structure.ts @@ -6,6 +6,7 @@ import { type SentenceAnalysis, type SentenceFeatures, type ComplexityClassification, + type SentenceStructureInternal, } from '../schemas/sentence-structure.js'; import { calculateReadabilityMetrics, addEngineeredFeatures, featuresToJSON } from '../features/index.js'; import { @@ -14,39 +15,29 @@ import { getSystemPromptComplexity, getUserPromptComplexity, } from '../prompts/sentence-structure/index.js'; -import type { EvaluationResult, ComplexityLevel } from '../schemas/index.js'; +import type { EvaluationResult, TextComplexityLevel } from '../schemas/index.js'; import { BaseEvaluator, type BaseEvaluatorConfig } from './base.js'; import type { StageDetail } from '../telemetry/index.js'; import { ValidationError, wrapProviderError } from '../errors.js'; -/** - * Internal data structure for sentence structure evaluation - */ -interface SentenceStructureInternal { - sentenceAnalysis: SentenceAnalysis; - features: SentenceFeatures; - complexity: ComplexityClassification; -} - /** * Normalize complexity label to handle LLM output variations - * Ported from Python normalize_label function */ -function normalizeLabel(label: string | null | undefined): string | null { +function normalizeLabel(label: string | null | undefined): TextComplexityLevel | null { if (!label) { return null; } - const normalized = label.trim().toLowerCase(); - const mapping: Record = { - 'slightly complex': 'Slightly Complex', - 'moderately complex': 'Moderately Complex', - 'very complex': 'Very Complex', - 'exceedingly complex': 'Exceedingly Complex', - 'extremely complex': 'Exceedingly Complex', // Maps to Exceedingly Complex + const normalized = label.trim().toLowerCase().replace(/_/g, ' '); + const mapping: Record = { + 'slightly complex': 'Slightly complex', + 'moderately complex': 'Moderately complex', + 'very complex': 'Very complex', + 'exceedingly complex': 'Exceedingly complex', + 'extremely complex': 'Exceedingly complex', }; - return mapping[normalized] || null; // Return null if no mapping found + return mapping[normalized] ?? null; } /** @@ -57,11 +48,11 @@ function normalizeLabel(label: string | null | undefined): string | null { * 1. Analyze grammatical structure (sentence types, clauses, phrases, etc.) * 2. Classify complexity using features and grade-specific rubric * - * Based on SCASS Text Complexity rubric with 4 levels: - * - Slightly Complex - * - Moderately Complex - * - Very Complex - * - Exceedingly Complex + * Based on Qualitative Text Complexity rubric with 4 levels: + * - Slightly complex + * - Moderately complex + * - Very complex + * - Exceedingly complex * * @example * ```typescript @@ -70,7 +61,7 @@ function normalizeLabel(label: string | null | undefined): string | null { * }); * * const result = await evaluator.evaluate(text, "3"); - * console.log(result.score); // "Moderately Complex" + * console.log(result.score); // "Moderately complex" * console.log(result.reasoning); * ``` */ @@ -119,7 +110,7 @@ export class SentenceStructureEvaluator extends BaseEvaluator { async evaluate( text: string, grade: string - ): Promise> { + ): Promise> { this.logger.info('Starting sentence structure evaluation', { evaluator: 'sentence-structure', operation: 'evaluate', @@ -183,9 +174,7 @@ export class SentenceStructureEvaluator extends BaseEvaluator { score: complexityResponse.data.answer, reasoning: complexityResponse.data.reasoning, metadata: { - promptVersion: '1.2.0', model: 'openai:gpt-4o', - timestamp: new Date(), processingTimeMs: latencyMs, }, _internal: { @@ -338,7 +327,7 @@ export class SentenceStructureEvaluator extends BaseEvaluator { return { data: { ...response.data, - answer: normalizedAnswer as ComplexityLevel, + answer: normalizedAnswer, }, usage: response.usage, latencyMs: response.latencyMs, @@ -364,7 +353,7 @@ export async function evaluateSentenceStructure( text: string, grade: string, config: BaseEvaluatorConfig -): Promise> { +): Promise> { const evaluator = new SentenceStructureEvaluator(config); return evaluator.evaluate(text, grade); } diff --git a/sdks/typescript/src/evaluators/text-complexity.ts b/sdks/typescript/src/evaluators/text-complexity.ts index 5493a72..114ba03 100644 --- a/sdks/typescript/src/evaluators/text-complexity.ts +++ b/sdks/typescript/src/evaluators/text-complexity.ts @@ -1,27 +1,19 @@ import pLimit from 'p-limit'; import { VocabularyEvaluator } from './vocabulary.js'; import { SentenceStructureEvaluator } from './sentence-structure.js'; +import type { SentenceStructureInternal } from '../schemas/sentence-structure.js'; import type { BaseEvaluatorConfig } from './base.js'; import { BaseEvaluator } from './base.js'; -import type { EvaluationResult } from '../schemas/index.js'; +import type { EvaluationResult, TextComplexityLevel } from '../schemas/index.js'; +import type { VocabularyInternal } from '../schemas/vocabulary.js'; /** - * Internal data structure for text complexity evaluation - * Stores either successful evaluation results or errors from sub-evaluators + * Result map returned by TextComplexityEvaluator. + * Each key holds the full evaluation result from its sub-evaluator, or an error if it failed. */ -export interface TextComplexityInternal { - vocabulary: EvaluationResult | { error: Error }; - sentenceStructure: EvaluationResult | { error: Error }; -} - -/** - * Composite score for text complexity - */ -export interface TextComplexityScore { - /** Vocabulary complexity score */ - vocabulary: string; - /** Sentence structure complexity score */ - sentenceStructure: string; +export interface TextComplexityResult { + vocabulary: EvaluationResult | { error: Error }; + sentenceStructure: EvaluationResult | { error: Error }; } /** @@ -42,8 +34,9 @@ export interface TextComplexityScore { * }); * * const result = await evaluator.evaluate(text, "5"); - * console.log(result.score.vocabulary); - * console.log(result.score.sentenceStructure); + * if (!('error' in result.vocabulary)) { + * console.log(result.vocabulary.score); // "Moderately complex" + * } * ``` */ export class TextComplexityEvaluator extends BaseEvaluator { @@ -76,16 +69,16 @@ export class TextComplexityEvaluator extends BaseEvaluator { * Evaluate text complexity for a given text and grade level * * Runs vocabulary and sentence structure evaluations in parallel with concurrency control. + * If both sub-evaluators fail, throws an error. Otherwise returns a result map where + * failed sub-evaluators are represented as `{ error: Error }`. * * @param text - The text to evaluate * @param grade - The target grade level (3-12) - * @returns Evaluation result with composite complexity score - * @throws {Error} If text is empty or grade is invalid + * @returns Map of sub-evaluator results + * @throws {ValidationError} If text is empty or grade is invalid + * @throws {Error} If all sub-evaluators fail */ - async evaluate( - text: string, - grade: string - ): Promise> { + async evaluate(text: string, grade: string): Promise { this.logger.info('Starting text complexity evaluation', { evaluator: 'text-complexity', operation: 'evaluate', @@ -100,29 +93,23 @@ export class TextComplexityEvaluator extends BaseEvaluator { const startTime = Date.now(); // Run both evaluators in parallel with concurrency control - const [vocabResult, sentenceResult] = await Promise.all([ + const [vocabResult, sentenceResult]: [ + EvaluationResult | { error: Error }, + EvaluationResult | { error: Error }, + ] = await Promise.all([ this.limit(() => this.runSubEvaluator(this.vocabularyEvaluator, text, grade)), this.limit(() => this.runSubEvaluator(this.sentenceStructureEvaluator, text, grade)), ]); const latencyMs = Date.now() - startTime; - - // Build combined reasoning - const reasoning = this.buildCombinedReasoning(vocabResult, sentenceResult); - - // Check if any evaluations failed const vocabFailed = 'error' in vocabResult; const sentenceFailed = 'error' in sentenceResult; const hasFailures = vocabFailed || sentenceFailed; if (hasFailures) { const errors: string[] = []; - if (vocabFailed) { - errors.push(`Vocabulary evaluation failed: ${vocabResult.error.message}`); - } - if (sentenceFailed) { - errors.push(`Sentence structure evaluation failed: ${sentenceResult.error.message}`); - } + if (vocabFailed) errors.push(`Vocabulary: ${vocabResult.error.message}`); + if (sentenceFailed) errors.push(`Sentence structure: ${sentenceResult.error.message}`); this.logger.error('Text complexity evaluation completed with errors', { evaluator: 'text-complexity', @@ -132,32 +119,11 @@ export class TextComplexityEvaluator extends BaseEvaluator { processingTimeMs: latencyMs, }); - // If both failed, throw error if (vocabFailed && sentenceFailed) { - throw new Error( - `Text complexity evaluation failed: ${errors.join('; ')}` - ); + throw new Error(`Text complexity evaluation failed: ${errors.join('; ')}`); } } - const result = { - score: { - vocabulary: vocabFailed ? 'N/A' : vocabResult.score, - sentenceStructure: sentenceFailed ? 'N/A' : sentenceResult.score, - }, - reasoning, - metadata: { - promptVersion: '1.0', - model: 'composite:gemini-2.5-pro+gpt-4o', - timestamp: new Date(), - processingTimeMs: latencyMs, - }, - _internal: { - vocabulary: vocabResult, - sentenceStructure: sentenceResult, - }, - }; - // Send telemetry (fire-and-forget) this.sendTelemetry({ status: hasFailures ? 'error' : 'success', @@ -179,49 +145,23 @@ export class TextComplexityEvaluator extends BaseEvaluator { hasFailures, }); - return result; + return { vocabulary: vocabResult, sentenceStructure: sentenceResult }; } /** - * Run a sub-evaluator with error handling - * Returns the evaluation result or an error object + * Run a sub-evaluator with error handling. + * Returns the evaluation result or `{ error: Error }` if the evaluator throws. */ - private async runSubEvaluator( - evaluator: { evaluate(text: string, grade: string): Promise> }, + private async runSubEvaluator( + evaluator: { evaluate(text: string, grade: string): Promise> }, text: string, grade: string - ): Promise | { error: Error }> { + ): Promise | { error: Error }> { try { return await evaluator.evaluate(text, grade); } catch (error) { - return { - error: error instanceof Error ? error : new Error(String(error)), - }; - } - } - - /** - * Build combined reasoning from individual results - */ - private buildCombinedReasoning( - vocabResult: EvaluationResult | { error: Error }, - sentenceResult: EvaluationResult | { error: Error } - ): string { - const parts: string[] = []; - - if ('error' in vocabResult) { - parts.push(`Vocabulary Complexity: Evaluation failed - ${vocabResult.error.message}`); - } else { - parts.push(`Vocabulary Complexity (${vocabResult.score}):\n${vocabResult.reasoning}`); + return { error: error instanceof Error ? error : new Error(String(error)) }; } - - if ('error' in sentenceResult) { - parts.push(`Sentence Structure Complexity: Evaluation failed - ${sentenceResult.error.message}`); - } else { - parts.push(`Sentence Structure Complexity (${sentenceResult.score}):\n${sentenceResult.reasoning}`); - } - - return parts.join('\n\n'); } } @@ -244,7 +184,7 @@ export async function evaluateTextComplexity( text: string, grade: string, config: BaseEvaluatorConfig -): Promise> { +): Promise { const evaluator = new TextComplexityEvaluator(config); return evaluator.evaluate(text, grade); } diff --git a/sdks/typescript/src/evaluators/vocabulary.ts b/sdks/typescript/src/evaluators/vocabulary.ts index 9b145fa..f1733a7 100644 --- a/sdks/typescript/src/evaluators/vocabulary.ts +++ b/sdks/typescript/src/evaluators/vocabulary.ts @@ -2,7 +2,7 @@ import type { LLMProvider } from '../providers/index.js'; import { createProvider } from '../providers/index.js'; import { VocabularyComplexitySchema, - type VocabularyComplexity, + type VocabularyInternal, type BackgroundKnowledge, } from '../schemas/vocabulary.js'; import { calculateFleschKincaidGrade } from '../features/index.js'; @@ -11,7 +11,7 @@ import { getSystemPrompt, getUserPrompt, } from '../prompts/vocabulary/index.js'; -import type { EvaluationResult } from '../schemas/index.js'; +import type { EvaluationResult, TextComplexityLevel } from '../schemas/index.js'; import { BaseEvaluator, type BaseEvaluatorConfig } from './base.js'; import type { StageDetail } from '../telemetry/index.js'; import { ValidationError, wrapProviderError } from '../errors.js'; @@ -38,7 +38,7 @@ import { ValidationError, wrapProviderError } from '../errors.js'; * }); * * const result = await evaluator.evaluate(text, "3"); - * console.log(result.score); // "moderately complex" + * console.log(result.score); // "Moderately complex" * console.log(result.reasoning); * ``` */ @@ -97,7 +97,7 @@ export class VocabularyEvaluator extends BaseEvaluator { async evaluate( text: string, grade: string - ): Promise> { + ): Promise> { this.logger.info('Starting vocabulary evaluation', { evaluator: 'vocabulary', operation: 'evaluate', @@ -166,9 +166,7 @@ export class VocabularyEvaluator extends BaseEvaluator { score: complexityResponse.data.complexity_score, reasoning: complexityResponse.data.reasoning, metadata: { - promptVersion: '1.2.0', model: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`, - timestamp: new Date(), processingTimeMs: latencyMs, }, _internal: complexityResponse.data, @@ -281,7 +279,7 @@ export class VocabularyEvaluator extends BaseEvaluator { grade: string, backgroundKnowledge: string, fkLevel: number - ): Promise<{ data: VocabularyComplexity; usage: { inputTokens: number; outputTokens: number }; latencyMs: number }> { + ): Promise<{ data: VocabularyInternal; usage: { inputTokens: number; outputTokens: number }; latencyMs: number }> { const systemPrompt = getSystemPrompt(grade); const userPrompt = getUserPrompt(text, grade, backgroundKnowledge, fkLevel); @@ -326,7 +324,7 @@ export async function evaluateVocabulary( text: string, grade: string, config: BaseEvaluatorConfig -): Promise> { +): Promise> { const evaluator = new VocabularyEvaluator(config); return evaluator.evaluate(text, grade); } diff --git a/sdks/typescript/src/index.ts b/sdks/typescript/src/index.ts index 93da1d0..51f764c 100644 --- a/sdks/typescript/src/index.ts +++ b/sdks/typescript/src/index.ts @@ -2,12 +2,10 @@ export type { EvaluationResult, EvaluationMetadata, - BatchEvaluationResult, - BatchSummary, EvaluationError, } from './schemas/index.js'; -export { ComplexityLevel, GradeLevel, GradeBand } from './schemas/index.js'; +export { TextComplexityLevel, GradeBand } from './schemas/index.js'; // Error types export { @@ -40,6 +38,7 @@ export type { SentenceAnalysis, ComplexityClassification, SentenceFeatures, + SentenceStructureInternal, } from './schemas/sentence-structure.js'; export { @@ -48,17 +47,13 @@ export { } from './schemas/sentence-structure.js'; // Vocabulary exports -export type { - VocabularyComplexity, - VocabularyComplexityLevel, -} from './schemas/vocabulary.js'; +export type { VocabularyInternal } from './schemas/vocabulary.js'; // Grade Level Appropriateness exports -export type { GradeLevelAppropriateness } from './schemas/grade-level-appropriateness.js'; +export type { GradeLevelAppropriatenessInternal } from './schemas/grade-level-appropriateness.js'; export { GradeLevelAppropriatenessSchema } from './schemas/grade-level-appropriateness.js'; - export { VocabularyEvaluator, evaluateVocabulary, @@ -68,8 +63,7 @@ export { evaluateGradeLevelAppropriateness, TextComplexityEvaluator, evaluateTextComplexity, - type TextComplexityScore, - type TextComplexityInternal, + type TextComplexityResult, type BaseEvaluatorConfig, type TelemetryOptions, type EvaluatorMetadata, diff --git a/sdks/typescript/src/schemas/grade-level-appropriateness.ts b/sdks/typescript/src/schemas/grade-level-appropriateness.ts index e23e638..31af558 100644 --- a/sdks/typescript/src/schemas/grade-level-appropriateness.ts +++ b/sdks/typescript/src/schemas/grade-level-appropriateness.ts @@ -24,4 +24,4 @@ export const GradeLevelAppropriatenessSchema = z.object({ .describe('Scaffolding needed for the text to be appropriate for the alternative grade'), }); -export type GradeLevelAppropriateness = z.infer; +export type GradeLevelAppropriatenessInternal = z.infer; diff --git a/sdks/typescript/src/schemas/index.ts b/sdks/typescript/src/schemas/index.ts index f1b73d3..22784fe 100644 --- a/sdks/typescript/src/schemas/index.ts +++ b/sdks/typescript/src/schemas/index.ts @@ -1,15 +1,12 @@ export { - ComplexityLevel, - GradeLevel, + TextComplexityLevel, type EvaluationResult, type EvaluationMetadata, - type BatchEvaluationResult, - type BatchSummary, type EvaluationError, } from './outputs.js'; export { GradeBand, GradeLevelAppropriatenessSchema, - type GradeLevelAppropriateness, + type GradeLevelAppropriatenessInternal, } from './grade-level-appropriateness.js'; diff --git a/sdks/typescript/src/schemas/outputs.ts b/sdks/typescript/src/schemas/outputs.ts index 9ab807e..b0e4770 100644 --- a/sdks/typescript/src/schemas/outputs.ts +++ b/sdks/typescript/src/schemas/outputs.ts @@ -1,36 +1,23 @@ import { z } from 'zod'; /** - * Complexity levels for sentence structure evaluation + * Shared complexity levels used across all text complexity evaluators + * (Vocabulary, Sentence Structure, and any future sub-evaluators) */ -export const ComplexityLevel = z.enum([ - 'Slightly Complex', - 'Moderately Complex', - 'Very Complex', - 'Exceedingly Complex', +export const TextComplexityLevel = z.enum([ + 'Slightly complex', + 'Moderately complex', + 'Very complex', + 'Exceedingly complex', ]); -export type ComplexityLevel = z.infer; - -/** - * Grade levels for vocabulary evaluation - */ -export const GradeLevel = z.enum([ - 'Below Grade Level', - 'At Grade Level', - 'Above Grade Level', -]); - -export type GradeLevel = z.infer; +export type TextComplexityLevel = z.infer; /** * Metadata attached to all evaluation results */ export interface EvaluationMetadata { - evaluatorVersion?: string; - promptVersion: string; model: string; - timestamp: Date; processingTimeMs: number; } @@ -63,7 +50,6 @@ export interface EvaluationError { text: string; grade?: string; }; - timestamp: Date; } /** diff --git a/sdks/typescript/src/schemas/sentence-structure.ts b/sdks/typescript/src/schemas/sentence-structure.ts index 4f9522f..b912b68 100644 --- a/sdks/typescript/src/schemas/sentence-structure.ts +++ b/sdks/typescript/src/schemas/sentence-structure.ts @@ -1,5 +1,5 @@ import { z } from 'zod'; -import { ComplexityLevel } from './outputs.js'; +import { TextComplexityLevel } from './outputs.js'; /** * Stage 1: Detailed sentence analysis output (40+ metrics) @@ -69,11 +69,20 @@ export type SentenceAnalysis = z.infer; */ export const ComplexityClassificationSchema = z.object({ reasoning: z.string().describe('Detailed pedagogically appropriate reasoning'), - answer: ComplexityLevel, + answer: TextComplexityLevel, }); export type ComplexityClassification = z.infer; +/** + * Internal data structure for sentence structure evaluation + */ +export interface SentenceStructureInternal { + sentenceAnalysis: SentenceAnalysis; + features: SentenceFeatures; + complexity: ComplexityClassification; +} + /** * Engineered features computed from sentence analysis * These are calculated in TypeScript, not requested from LLM diff --git a/sdks/typescript/src/schemas/vocabulary.ts b/sdks/typescript/src/schemas/vocabulary.ts index f5b80d0..0badcad 100644 --- a/sdks/typescript/src/schemas/vocabulary.ts +++ b/sdks/typescript/src/schemas/vocabulary.ts @@ -1,33 +1,21 @@ import { z } from 'zod'; +import { TextComplexityLevel } from './outputs.js'; /** - * Vocabulary complexity levels matching Qual Text Complexity rubric (SAP) - */ -export const VocabularyComplexityLevel = z.enum([ - 'slightly complex', - 'moderately complex', - 'very complex', - 'exceedingly complex', -]); - -export type VocabularyComplexityLevel = z.infer; - -/** - * Vocabulary complexity evaluation output - * Ported from Python Output BaseModel + * Vocabulary evaluation output schema */ export const VocabularyComplexitySchema = z.object({ tier_2_words: z.string().describe('List of Tier 2 words (academic words)'), tier_3_words: z.string().describe('List of Tier 3 words (domain-specific)'), archaic_words: z.string().describe('List of Archaic words'), other_complex_words: z.string().describe('List of Other Complex words'), - complexity_score: VocabularyComplexityLevel.describe( + complexity_score: TextComplexityLevel.describe( 'The complexity of the text vocabulary' ), reasoning: z.string().describe('Detailed reasoning for the complexity rating'), }); -export type VocabularyComplexity = z.infer; +export type VocabularyInternal = z.infer; /** * Background knowledge assumption for a student at a given grade level diff --git a/sdks/typescript/tests/integration/sentence-structure.integration.test.ts b/sdks/typescript/tests/integration/sentence-structure.integration.test.ts index a9c9bd5..a1ccb8b 100644 --- a/sdks/typescript/tests/integration/sentence-structure.integration.test.ts +++ b/sdks/typescript/tests/integration/sentence-structure.integration.test.ts @@ -33,39 +33,39 @@ const TEST_CASES: BaseTestCase[] = [ // id: 'SS2', // grade: '2', // text: "The Roman Empire was a powerful empire that lasted for hundreds of years. It started as a small village in Italy and grew into a huge empire that controlled much of Europe, Asia, and Africa. The Roman Empire had many strong leaders like Julius Caesar and Augustus. These leaders helped the empire grow and become very powerful.\n \n\n The Roman Empire had a period of peace and prosperity called the Pax Romana. This time was good for the empire, but it didn't last forever. The empire started to have problems. The army became weaker, and the economy had problems. The empire was also attacked by groups of people called barbarians.\n \n\n The Roman Empire was divided into two parts: the Western Roman Empire and the Eastern Roman Empire. The Western Roman Empire eventually fell apart in 476 AD. The Eastern Roman Empire, also known as the Byzantine Empire, lasted for many more years. The Roman Empire left behind many things that we still use today, like the Roman alphabet and the calendar.", - // expected: 'moderately complex', - // acceptable: ['slightly complex', 'very complex'], + // expected: 'Moderately complex', + // acceptable: ['Slightly complex', 'Very complex'], // }, { id: 'SS3', grade: '3', text: "The hoisting gear consists of a double system of chains 13/16 in. in diameter placed side by side; each chain is anchored by an adjustable screw to the end of the jib, and, passing round the traveling carriage and down to the falling block, is taken along the jib over a sliding pulley which leads it on to the grooved barrel, 3 ft. 9 in. in diameter. In front of the barrel is placed an automatic winder which insures a proper coiling of the chain in the grooves. The motive power is derived from two cylinders 10 in. in diameter and 16 in. stroke, one being bolted to each side frame; these cylinders, which are provided with link motion and reversing gear, drive a steel crank shaft 2¾ in. in diameter; on this shaft is a steel sliding pinion which drives the barrel by a double purchase.", - expected: 'exceedingly complex', - acceptable: ['very complex'], + expected: 'Exceedingly complex', + acceptable: ['Very complex'], }, { id: 'SS4', grade: '4', text: "Before corals bleach, they do not show many other signs of feeling stressed. So, if we want to understand a coral's health, we have to study its cells. Inside cells we have a lot of information, including DNA, RNA, and proteins. These molecules can help us find clues about the communication between the coral and the algae. But also, these molecules can teach us how to know when corals are stressed.\nWhen an organism is stressed, every cell in its body will react. Everything will do its best to survive! In response to stress, the cell will use its DNA to make RNA, so that it can then make proteins that will fight off the stress. If an organism has been stressed before, it can respond to the stress faster and better. Think of it like visiting a city: the first time you visit, you will need a map to find your hotel. The more often you visit the city, the less you will need the map because you will remember, and you will get back to the hotel faster.", - expected: 'very complex', - acceptable: ['moderately complex', 'exceedingly complex'], + expected: 'Very complex', + acceptable: ['Moderately complex', 'Exceedingly complex'], }, { id: 'SS5', grade: '5', text: "Mesopotamia, located in present-day Iraq, is known as the 'Cradle of Civilization' because it was home to some of the earliest civilizations in the world. The region got its name from the ancient Greek words for 'land between the rivers,' referring to the Tigris and Euphrates rivers. These rivers provided water for the fertile land, making it perfect for farming. The regular flooding of the rivers made the land around them ideal for growing crops, which helped people settle down and form permanent villages. These villages eventually grew into cities, where people developed many of the characteristics of civilization, like organized government, complex buildings, and different social classes.\n \n\n The first civilizations in Mesopotamia were the Sumerians, who lived around 5,000 years ago. They invented the world's first written language, called cuneiform, which they used to keep track of things like food supplies and trade. They also developed a system of numbers, which helped them with math and measurement. The Sumerians built impressive cities like Ur, Eridu, and Uruk, which had populations of over 50,000 people. These cities were centers of learning and culture, and they helped spread knowledge and ideas throughout the region.\n \n\n Over time, other civilizations rose and fell in Mesopotamia, including the Akkadians, Babylonians, and Assyrians. Each civilization made its own contributions to the development of human society. The Babylonians are famous for their code of laws, which was one of the first written legal systems in the world. The Assyrians were known for their powerful military and their impressive palaces. Mesopotamia's history is full of amazing inventions and innovations that shaped the world we live in today.\n \n\n The development of civilization in Mesopotamia was not just about the fertile land and the rivers. Changes in climate and the environment also played a role. People had to become more organized and work together to survive. This led to the development of complex societies and governments. Mesopotamia's story is a reminder of how human ingenuity and adaptability can lead to amazing achievements.\n \n\n The 'Cradle of Civilization' is a term that refers to the regions where the earliest known human civilizations emerged. Mesopotamia is a prime example of this, as it was a place where people learned to live together, build cities, and develop new technologies that changed the course of human history. The innovations that came from Mesopotamia, like writing, mathematics, and agriculture, continue to influence our lives today. By studying ancient Mesopotamia, we can learn about the origins of our own civilization and the challenges and triumphs of early humans.", - expected: 'slightly complex', - acceptable: ['moderately complex'], + expected: 'Slightly complex', + acceptable: ['Moderately complex'], // TODO: Valiadate the test-case with additional data from Grade 5 - // expected: 'exceedingly complex', - // acceptable: ['very complex'], + // expected: 'Exceedingly complex', + // acceptable: ['Very complex'], }, { id: 'SS6', grade: '6', text: "Benjamin Franklin was a very important person in American history. He was born in Boston, Massachusetts in 1706. He was one of 17 children. Franklin did not go to school for very long. He learned to be a printer from his brother. Franklin was a very smart man. He invented many things, like bifocals, the Franklin stove, and the lightning rod. He also started the first public library in Philadelphia. Franklin was a writer, too. He wrote a book called *Poor Richard's Almanack*. It had many famous sayings, like \"Lost Time is never found again.\"\n\nFranklin was also a politician. He helped write the Declaration of Independence. He was a diplomat, too. He helped the United States get help from France during the Revolutionary War. He was a very busy man! Franklin was a scientist, a writer, a politician, and an inventor. He was a very important person in American history.\n\nFranklin was a very interesting person. He was a scientist who did experiments with electricity. He was a writer who wrote a book of sayings. He was a politician who helped the United States become independent. He was a diplomat who helped the United States get help from other countries. He was a very busy man!\n\nFranklin was a very smart man. He was a self-taught man who learned a lot on his own. He was a very creative man who invented many things. He was a very kind man who helped others. He was a very important man who helped shape the United States.\n\nFranklin was a very influential person. He was a leader who helped people. He was a thinker who came up with new ideas. He was a writer who shared his thoughts with others. He was a scientist who helped people understand the world. He was a very important person who helped make the United States what it is today.", - expected: 'slightly complex', - acceptable: ['moderately complex'], + expected: 'Slightly complex', + acceptable: ['Moderately complex'], }, ]; diff --git a/sdks/typescript/tests/integration/vocabulary.integration.test.ts b/sdks/typescript/tests/integration/vocabulary.integration.test.ts index 49b4662..4a60cac 100644 --- a/sdks/typescript/tests/integration/vocabulary.integration.test.ts +++ b/sdks/typescript/tests/integration/vocabulary.integration.test.ts @@ -34,50 +34,50 @@ const TEST_CASES: BaseTestCase[] = [ id: 'V3', grade: '3', text: 'Civil rights are rights that all people in a country have. The civil rights of a country apply to all the citizens within its borders. These rights are given by the laws of the country. Civil rights are sometimes thought to be the same as natural rights. In many countries civil rights include freedom of speech, freedom of the press, freedom of religion, and freedom of assembly. Civil rights also include the right to own property and the right to get fair and equal treatment from the government, from other citizens, and from private groups.', - expected: 'very complex', - acceptable: ['moderately complex', 'exceedingly complex'], + expected: 'Very complex', + acceptable: ['Moderately complex', 'Exceedingly complex'], }, { id: 'V4', grade: '4', text: 'Bluetooth is a protocol for wireless communication over short distances. It was developed in the 1990s, to reduce the number of cables. Devices such as mobile phones, laptops, PCs, printers, digital cameras and video game consoles can connect to each other, and exchange information. This is done using radio waves. It can be done securely. Bluetooth is only used for relatively short distances, like a few metres. There are different standards. Data rates vary. Currently, they are at 1-3 MBit per second.', - expected: 'exceedingly complex', - acceptable: ['very complex'], + expected: 'Exceedingly complex', + acceptable: ['Very complex'], }, { id: 'V5', grade: '5', text: `The scientific method is a way to learn about the world around us. It helps us figure out how things work. Scientists use the scientific method to test their ideas. They start by making observations and asking questions. Then, they make a guess, or a hypothesis, about what might be the answer. They use their hypothesis to make predictions about what will happen in an experiment. Scientists then test their predictions by doing experiments. If the results of the experiment match their predictions, then their hypothesis is supported. If the results don't match, then they need to change their hypothesis. Scientists repeat this process many times to make sure their hypothesis is correct. The scientific method is important because it helps us learn new things. It helps us understand the world around us. Scientists use the scientific method to make new discoveries and solve problems.`, - expected: 'slightly complex', - acceptable: ['moderately complex'], + expected: 'Slightly complex', + acceptable: ['Moderately complex'], }, { id: 'V6', grade: '6', text: `Chicago in 1871 was a city ready to burn. The city boasted having 59,500 buildings, many of them—such as the Courthouse and the Tribune Building—large and ornately decorated. The trouble was that about two-thirds of all these structures were made entirely of wood. Many of the remaining buildings (even the ones proclaimed to be 'fireproof') looked solid, but were actually jerrybuilt affairs; the stone or brick exteriors hid wooden frames and floors, all topped with highly flammable tar or shingle roofs. It was also a common practice to disguise wood as another kind of building material. The fancy exterior decorations on just about every building were carved from wood, then painted to look like stone or marble.`, - expected: 'very complex', - acceptable: ['moderately complex', 'exceedingly complex'], + expected: 'Very complex', + acceptable: ['Moderately complex', 'Exceedingly complex'], }, { id: 'V7', grade: '7', text: `The scientific method is a way of learning about the world around us. It's a process that helps us understand how things work and why they happen. It's not just for scientists; we all use the scientific method in our everyday lives, even if we don't realize it. The scientific method starts with an observation. We notice something interesting and want to know more about it. For example, you might notice that your plant is wilting. You might wonder why this is happening. Next, we form a hypothesis, which is a possible explanation for our observation. In our plant example, you might hypothesize that the plant is wilting because it needs more water. Then, we test our hypothesis by doing an experiment. We change something in our experiment to see if it affects the outcome. In our plant example, you could water the plant and see if it recovers. Based on the results of our experiment, we can either support or reject our hypothesis. If the plant recovers after being watered, then your hypothesis is supported. If the plant doesn't recover, then you need to come up with a new hypothesis. The scientific method is a powerful tool for learning and understanding the world around us. It's a process of asking questions, testing ideas, and drawing conclusions based on evidence. It's a way of thinking that helps us to be curious, to be critical, and to be open to new ideas.`, - expected: 'slightly complex', - acceptable: ['moderately complex'], + expected: 'Slightly complex', + acceptable: ['Moderately complex'], }, { id: 'V8', grade: '8', text: 'The American Revolution was a war for independence between the thirteen American colonies and Great Britain. The war started in 1775 and ended in 1783. The colonists wanted to be free from British rule. They wanted to make their own laws and govern themselves. The colonists were angry about new taxes that the British Parliament imposed on them. They felt that they were being taxed without having a say in how the money was spent. The colonists also felt that the British government was not treating them fairly. The war began with the Battles of Lexington and Concord in April 1775. The colonists, led by General George Washington, fought against the British army. The war was long and difficult, but the colonists eventually won. The colonists won the war because they had the support of the French. The French helped the colonists by providing them with soldiers, ships, and money. The colonists also had a strong leader in George Washington. He was a skilled military leader and he inspired the colonists to fight for their freedom. The American Revolution was a turning point in history. It showed that colonies could break free from their mother countries and become independent nations. The American Revolution also inspired other revolutions around the world.', - expected: 'slightly complex', - acceptable: ['moderately complex'], + expected: 'Slightly complex', + acceptable: ['Moderately complex'], }, { id: 'V9', grade: '9', text: `Mr. President: I would like to speak briefly and simply about a serious national condition. It is a national feeling of fear and frustration that could result in national suicide and the end of everything that we Americans hold dear. It is a condition that comes from the lack of effective leadership in either the Legislative Branch or the Executive Branch of our Government. That leadership is so lacking that serious and responsible proposals are being made that national advisory commissions be appointed to provide such critically needed leadership. I speak as briefly as possible because too much harm has already been done with irresponsible words of bitterness and selfish political opportunism. I speak as briefly as possible because the issue is too great to be obscured by eloquence. I speak simply and briefly in the hope that my words will be taken to heart. I speak as a Republican. I speak as a woman. I speak as a United States Senator. I speak as an American. The United States Senate has long enjoyed worldwide respect as the greatest deliberative body in the world. But recently that deliberative character has too often been debased to the level of a forum of hate and character assassination sheltered by the shield of congressional immunity. It is ironical that we Senators can in debate in the Senate directly or indirectly, by any form of words, impute to any American who is not a Senator any conduct or motive unworthy or unbecoming an American—and without that non-Senator American having any legal redress against us—yet if we say the same thing in the Senate about our colleagues we can be stopped on the grounds of being out of order. It is strange that we can verbally attack anyone else without restraint and with full protection and yet we hold ourselves above the same type of criticism here on the Senate Floor. Surely the United States Senate is big enough to take self-criticism and self-appraisal. Surely we should be able to take the same kind of character attacks that we "dish out" to outsiders. I think that it is high time for the United States Senate and its members to do some soul-searching—for us to weigh our consciences—on the manner in which we are performing our duty to the people of America—on the manner in which we are using or abusing our individual powers and privileges. I think that it is high time that we remembered that we have sworn to uphold and defend the Constitution. I think that it is high time that we remembered that the Constitution, as amended, speaks not only of the freedom of speech but also of trial by jury instead of trial by accusation. Whether it be a criminal prosecution in court or a character prosecution in the Senate, there is little practical distinction when the life of a person has been ruined. Those of us who shout the loudest about Americanism in making character assassinations are all too frequently those who, by our own words and acts, ignore some of the basic principles of Americanism: The right to criticize; The right to hold unpopular beliefs; The right to protest; The right of independent thought. The exercise of these rights should not cost one single American citizen his reputation or his right to a livelihood nor should he be in danger of losing his reputation or livelihood merely because he happens to know someone who holds unpopular beliefs. Who of us doesn't? Otherwise none of us could call our souls our own. Otherwise thought control would have set in. The American people are sick and tired of being afraid to speak their minds lest they be politically smeared as "Communists" or "Fascists" by their opponents. Freedom of speech is not what it used to be in America. It has been so abused by some that it is not exercised by others. The American people are sick and tired of seeing innocent people smeared and guilty people whitewashed. But there have been enough proved cases, such as the Amerasia case, the Hiss case, the Coplon case, the Gold case, to cause the nationwide distrust and strong suspicion that there may be something to the unproved, sensational accusations. I doubt if the Republican Party could—simply because I don't believe the American people will uphold any political party that puts political exploitation above national interest. Surely we Republicans aren't that desperate for victory. I don't want to see the Republican Party win that way. While it might be a fleeting victory for the Republican Party, it would be a more lasting defeat for the American people. Surely it would ultimately be suicide for the Republican Party and the two-party system that has protected our American liberties from the dictatorship of a one-party system. As members of the Minority Party, we do not have the primary authority to formulate the policy of our Government. But we do have the responsibility of rendering constructive criticism, of clarifying issues, of allaying fears by acting as responsible citizens. As a woman, I wonder how the mothers, wives, sisters, and daughters feel about the way in which members of their families have been politically mangled in the Senate debate—and I use the word "debate" advisedly. As a United States Senator, I am not proud of the way in which the Senate has been made a publicity platform for irresponsible sensationalism. I am not proud of the reckless abandon in which unproved charges have been hurled from the side of the aisle. I am not proud of the obviously staged, undignified countercharges that have been attempted in retaliation from the other side of the aisle. I don't like the way the Senate has been made a rendezvous for vilification, for selfish political gain at the sacrifice of individual reputations and national unity. I am not proud of the way we smear outsiders from the Floor of the Senate and hide behind the cloak of congressional immunity and still place ourselves beyond criticism on the Floor of the Senate. As an American, I am shocked at the way Republicans and Democrats alike are playing directly into the Communist design of "confuse, divide, and conquer." As an American, I don't want a Democratic Administration "whitewash" or "cover-up" any more than I want a Republican smear or witch hunt. As an American, I condemn a Republican "Fascist" just as much I condemn a Democratic "Communist." I condemn a Democrat "Fascist" just as much as I condemn a Republican "Communist." They are equally dangerous to you and me and to our country. As an American, I want to see our nation recapture the strength and unity it once had when we fought the enemy instead of ourselves. It is with these thoughts that I have drafted what I call a "Declaration of Conscience." I am gratified that Senator Tobey, Senator Aiken, Senator Morse, Senator Ives, Senator Thye, and Senator Hendrickson have concurred in that declaration and have authorized me to announce their concurrence.`, - expected: 'very complex', - acceptable: ['moderately complex', 'exceedingly complex'], + expected: 'Very complex', + acceptable: ['Moderately complex', 'Exceedingly complex'], }, ]; diff --git a/sdks/typescript/tests/unit/evaluators/grade-level-appropriateness.test.ts b/sdks/typescript/tests/unit/evaluators/grade-level-appropriateness.test.ts index 7e04d9b..915b9d7 100644 --- a/sdks/typescript/tests/unit/evaluators/grade-level-appropriateness.test.ts +++ b/sdks/typescript/tests/unit/evaluators/grade-level-appropriateness.test.ts @@ -165,15 +165,11 @@ describe('GradeLevelAppropriatenessEvaluator - Evaluation Flow', () => { expect(result._internal).toHaveProperty('reasoning'); // Verify metadata structure - expect(result.metadata).toHaveProperty('promptVersion'); expect(result.metadata).toHaveProperty('model'); - expect(result.metadata).toHaveProperty('timestamp'); expect(result.metadata).toHaveProperty('processingTimeMs'); // Verify metadata values - expect(result.metadata.promptVersion).toBe('1.2.0'); expect(result.metadata.model).toBe('google:gemini-2.5-pro'); - expect(result.metadata.timestamp).toBeInstanceOf(Date); expect(result.metadata.processingTimeMs).toBeGreaterThanOrEqual(0); // Verify _internal values diff --git a/sdks/typescript/tests/unit/evaluators/sentence-structure.test.ts b/sdks/typescript/tests/unit/evaluators/sentence-structure.test.ts index a4971e2..b3f99f3 100644 --- a/sdks/typescript/tests/unit/evaluators/sentence-structure.test.ts +++ b/sdks/typescript/tests/unit/evaluators/sentence-structure.test.ts @@ -128,7 +128,7 @@ describe('SentenceStructureEvaluator - Evaluation Flow', () => { // Mock complexity classification response vi.mocked(mockComplexityProvider.generateStructured).mockResolvedValue({ data: { - answer: 'Slightly Complex', + answer: 'Slightly complex', reasoning: 'The text uses simple sentence structures appropriate for third grade.', }, model: 'gpt-4o', @@ -143,7 +143,7 @@ describe('SentenceStructureEvaluator - Evaluation Flow', () => { const result = await evaluator.evaluate(testText, testGrade); // Verify result structure - expect(result.score).toBe('Slightly Complex'); + expect(result.score).toBe('Slightly complex'); expect(result.reasoning).toContain('simple sentence structures'); expect(result.metadata).toBeDefined(); expect(result.metadata.model).toBe('openai:gpt-4o'); @@ -217,7 +217,7 @@ describe('SentenceStructureEvaluator - Evaluation Flow', () => { vi.mocked(mockComplexityProvider.generateStructured).mockResolvedValue({ data: { - answer: 'Moderately Complex', + answer: 'Moderately complex', reasoning: 'Detailed reasoning here', }, model: 'gpt-4o', @@ -234,15 +234,11 @@ describe('SentenceStructureEvaluator - Evaluation Flow', () => { expect(result).toHaveProperty('_internal'); // Verify metadata structure - expect(result.metadata).toHaveProperty('promptVersion'); expect(result.metadata).toHaveProperty('model'); - expect(result.metadata).toHaveProperty('timestamp'); expect(result.metadata).toHaveProperty('processingTimeMs'); // Verify metadata values - expect(result.metadata.promptVersion).toBe('1.2.0'); expect(result.metadata.model).toBe('openai:gpt-4o'); - expect(result.metadata.timestamp).toBeInstanceOf(Date); expect(result.metadata.processingTimeMs).toBeGreaterThanOrEqual(0); }); }); diff --git a/sdks/typescript/tests/unit/evaluators/text-complexity.test.ts b/sdks/typescript/tests/unit/evaluators/text-complexity.test.ts index a3c48bf..1044611 100644 --- a/sdks/typescript/tests/unit/evaluators/text-complexity.test.ts +++ b/sdks/typescript/tests/unit/evaluators/text-complexity.test.ts @@ -16,7 +16,7 @@ vi.mock('../../../src/providers/index.js', () => ({ data: { complexity_score: 'moderately complex', reasoning: 'Test reasoning', - answer: 'Moderately Complex', + answer: 'Moderately complex', }, usage: { inputTokens: 100, outputTokens: 50 }, latencyMs: 100, @@ -106,24 +106,20 @@ describe('TextComplexityEvaluator', () => { // Mock the child evaluators' evaluate methods vocabSpy = vi.spyOn((evaluator as any).vocabularyEvaluator, 'evaluate').mockResolvedValue({ - score: 'moderately complex', + score: 'Moderately complex', reasoning: 'Vocabulary test reasoning', metadata: { - promptVersion: '1.0', model: 'gemini-2.5-pro + gpt-4o', - timestamp: new Date(), processingTimeMs: 100, }, _internal: {}, }); sentenceSpy = vi.spyOn((evaluator as any).sentenceStructureEvaluator, 'evaluate').mockResolvedValue({ - score: 'Moderately Complex', + score: 'Moderately complex', reasoning: 'Sentence structure test reasoning', metadata: { - promptVersion: '1.0', model: 'gpt-4o', - timestamp: new Date(), processingTimeMs: 100, }, _internal: {}, @@ -141,15 +137,10 @@ describe('TextComplexityEvaluator', () => { const result = await evaluator.evaluate(text, grade); expect(result).toBeDefined(); - expect(result.score).toBeDefined(); - expect(result.score.vocabulary).toBeDefined(); - expect(result.score.sentenceStructure).toBeDefined(); - expect(result.reasoning).toBeDefined(); - expect(result.metadata).toBeDefined(); - expect(result.metadata.model).toBe('composite:gemini-2.5-pro+gpt-4o'); - expect(result._internal).toBeDefined(); - expect(result._internal!.vocabulary).toBeDefined(); - expect(result._internal!.sentenceStructure).toBeDefined(); + expect(result.vocabulary).toBeDefined(); + expect(result.sentenceStructure).toBeDefined(); + expect('error' in result.vocabulary).toBe(false); + expect('error' in result.sentenceStructure).toBe(false); }); it('should validate text input', async () => { @@ -201,8 +192,8 @@ describe('TextComplexityEvaluator', () => { // Allow some overhead but should be significantly less than 200ms expect(duration).toBeLessThan(200); - expect('error' in result._internal!.vocabulary).toBe(false); - expect('error' in result._internal!.sentenceStructure).toBe(false); + expect('error' in result.vocabulary).toBe(false); + expect('error' in result.sentenceStructure).toBe(false); }); it('should handle partial failures gracefully', async () => { @@ -215,11 +206,9 @@ describe('TextComplexityEvaluator', () => { const result = await evaluator.evaluate(text, grade); expect(result).toBeDefined(); - expect('error' in result._internal!.vocabulary).toBe(true); - expect((result._internal!.vocabulary as { error: Error }).error).toBeDefined(); - expect('error' in result._internal!.sentenceStructure).toBe(false); - expect(result.score.vocabulary).toBe('N/A'); - expect(result.score.sentenceStructure).not.toBe('N/A'); + expect('error' in result.vocabulary).toBe(true); + expect((result.vocabulary as { error: Error }).error).toBeDefined(); + expect('error' in result.sentenceStructure).toBe(false); }); it('should throw when both evaluators fail', async () => { @@ -239,27 +228,23 @@ describe('TextComplexityEvaluator', () => { const text = 'The cat sat on the mat.'; const grade = '5'; - // Override vocabulary to return "moderately complex" + // Override vocabulary to return "Moderately complex" vocabSpy.mockResolvedValue({ - score: 'moderately complex', + score: 'Moderately complex', reasoning: 'Vocab reasoning', metadata: { - promptVersion: '1.0', model: 'gemini-2.5-pro', - timestamp: new Date(), processingTimeMs: 100, }, _internal: {}, }); - // Override sentence structure to return "Slightly Complex" + // Override sentence structure to return "Slightly complex" sentenceSpy.mockResolvedValue({ - score: 'Slightly Complex', + score: 'Slightly complex', reasoning: 'Sentence reasoning', metadata: { - promptVersion: '1.0', model: 'gpt-4o', - timestamp: new Date(), processingTimeMs: 100, }, _internal: {}, @@ -267,34 +252,36 @@ describe('TextComplexityEvaluator', () => { const result = await evaluator.evaluate(text, grade); - expect(result.score.vocabulary).toBe('moderately complex'); - expect(result.score.sentenceStructure).toBe('Slightly Complex'); + expect('error' in result.vocabulary).toBe(false); + expect('error' in result.sentenceStructure).toBe(false); + if (!('error' in result.vocabulary)) { + expect(result.vocabulary.score).toBe('Moderately complex'); + } + if (!('error' in result.sentenceStructure)) { + expect(result.sentenceStructure.score).toBe('Slightly complex'); + } }); - it('should build combined reasoning from both evaluators', async () => { + it('should preserve individual sub-evaluator reasoning', async () => { const text = 'The cat sat on the mat.'; const grade = '5'; // Override both evaluators with specific reasoning vocabSpy.mockResolvedValue({ - score: 'moderately complex', + score: 'Moderately complex', reasoning: 'This is the vocabulary reasoning.', metadata: { - promptVersion: '1.0', model: 'gemini-2.5-pro', - timestamp: new Date(), processingTimeMs: 100, }, _internal: {}, }); sentenceSpy.mockResolvedValue({ - score: 'Slightly Complex', + score: 'Slightly complex', reasoning: 'This is the sentence structure reasoning.', metadata: { - promptVersion: '1.0', model: 'gpt-4o', - timestamp: new Date(), processingTimeMs: 100, }, _internal: {}, @@ -302,10 +289,12 @@ describe('TextComplexityEvaluator', () => { const result = await evaluator.evaluate(text, grade); - expect(result.reasoning).toContain('Vocabulary Complexity'); - expect(result.reasoning).toContain('This is the vocabulary reasoning.'); - expect(result.reasoning).toContain('Sentence Structure Complexity'); - expect(result.reasoning).toContain('This is the sentence structure reasoning.'); + if (!('error' in result.vocabulary)) { + expect(result.vocabulary.reasoning).toBe('This is the vocabulary reasoning.'); + } + if (!('error' in result.sentenceStructure)) { + expect(result.sentenceStructure.reasoning).toBe('This is the sentence structure reasoning.'); + } }); }); diff --git a/sdks/typescript/tests/unit/evaluators/vocabulary.test.ts b/sdks/typescript/tests/unit/evaluators/vocabulary.test.ts index 9792acb..dee51e4 100644 --- a/sdks/typescript/tests/unit/evaluators/vocabulary.test.ts +++ b/sdks/typescript/tests/unit/evaluators/vocabulary.test.ts @@ -96,7 +96,7 @@ describe('VocabularyEvaluator - Evaluation Flow', () => { // Mock complexity evaluation response vi.mocked(mockComplexityProvider.generateStructured).mockResolvedValue({ data: { - complexity_score: 'moderately complex', + complexity_score: 'Moderately complex', reasoning: 'The text uses grade-appropriate vocabulary.', factors: ['Academic terminology', 'Clear structure'], }, @@ -112,7 +112,7 @@ describe('VocabularyEvaluator - Evaluation Flow', () => { const result = await evaluator.evaluate(testText, testGrade); // Verify result structure - expect(result.score).toBe('moderately complex'); + expect(result.score).toBe('Moderately complex'); expect(result.reasoning).toContain('grade-appropriate vocabulary'); expect(result.metadata).toBeDefined(); expect(result.metadata.model).toBe('openai:gpt-4o-2024-11-20 + openai:gpt-4.1-2025-04-14'); @@ -190,7 +190,7 @@ describe('VocabularyEvaluator - Evaluation Flow', () => { vi.mocked(mockComplexityProvider.generateStructured).mockResolvedValue({ data: { - complexity_score: 'moderately complex', + complexity_score: 'Moderately complex', reasoning: 'Detailed reasoning here', factors: ['Factor 1', 'Factor 2'], }, @@ -208,15 +208,11 @@ describe('VocabularyEvaluator - Evaluation Flow', () => { expect(result).toHaveProperty('_internal'); // Verify metadata structure - expect(result.metadata).toHaveProperty('promptVersion'); expect(result.metadata).toHaveProperty('model'); - expect(result.metadata).toHaveProperty('timestamp'); expect(result.metadata).toHaveProperty('processingTimeMs'); // Verify metadata values - expect(result.metadata.promptVersion).toBe('1.2.0'); expect(result.metadata.model).toBe('openai:gpt-4o-2024-11-20 + openai:gpt-4.1-2025-04-14'); - expect(result.metadata.timestamp).toBeInstanceOf(Date); expect(result.metadata.processingTimeMs).toBeGreaterThanOrEqual(0); // Mocked calls can be instant (0ms) }); @@ -228,7 +224,7 @@ describe('VocabularyEvaluator - Evaluation Flow', () => { }); const mockComplexityData = { - complexity_score: 'moderately complex', + complexity_score: 'Moderately complex', reasoning: 'Detailed reasoning', factors: ['Factor 1', 'Factor 2'], analysis: 'Deep analysis',