-
Notifications
You must be signed in to change notification settings - Fork 7
feat: Add TextComplexityEvaluator and metadata-driven evaluator system #15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,7 @@ import { | |
| type TelemetryMetadata, | ||
| type TokenUsage, | ||
| } from '../telemetry/index.js'; | ||
| import { ValidationError } from '../errors.js'; | ||
| import { ConfigurationError, ValidationError } from '../errors.js'; | ||
| import { createLogger, LogLevel, type Logger } from '../logger.js'; | ||
|
|
||
| /** | ||
|
|
@@ -80,6 +80,25 @@ export interface BaseEvaluatorConfig { | |
| logLevel?: LogLevel; | ||
| } | ||
|
|
||
| /** | ||
| * Evaluator metadata interface | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the reviewer - Making configuration and validation shared code across evals |
||
| * Each evaluator must provide this metadata as static properties | ||
| */ | ||
| export interface EvaluatorMetadata { | ||
| /** Unique identifier for the evaluator (e.g., 'vocabulary', 'sentence-structure') */ | ||
| readonly id: string; | ||
| /** Human-readable name (e.g., 'Vocabulary', 'Sentence Structure') */ | ||
| readonly name: string; | ||
| /** Brief description of what the evaluator does */ | ||
| readonly description: string; | ||
| /** Supported grade levels (e.g., ['3', '4', '5', ...]) */ | ||
| readonly supportedGrades: readonly string[]; | ||
|
Comment on lines
+94
to
+95
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the shape new evals are taking, it looks like grade isn't a core attribute. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the previous playground, we ended up setting up an input fields framework. Each eval would define what type of inputs it used, e.g.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll address this in a different PR |
||
| /** Whether this evaluator requires a Google API key */ | ||
| readonly requiresGoogleKey: boolean; | ||
| /** Whether this evaluator requires an OpenAI API key */ | ||
| readonly requiresOpenAIKey: boolean; | ||
| } | ||
|
Comment on lines
+85
to
+100
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||
|
|
||
| /** | ||
| * Abstract base class for all evaluators | ||
| * | ||
|
|
@@ -88,6 +107,9 @@ export interface BaseEvaluatorConfig { | |
| * - Text validation | ||
| * - Grade validation (with overridable default) | ||
| * - Metadata creation | ||
| * | ||
| * Concrete evaluators must implement: | ||
| * - static metadata: Provide evaluator metadata (see EvaluatorMetadata interface) | ||
| */ | ||
| export abstract class BaseEvaluator { | ||
| protected telemetryClient?: TelemetryClient; | ||
|
|
@@ -96,9 +118,34 @@ export abstract class BaseEvaluator { | |
| telemetry: Required<TelemetryOptions>; | ||
| }; | ||
|
|
||
| /** | ||
| * Static metadata for the evaluator | ||
| * | ||
| * Concrete evaluators MUST define this property. | ||
| * | ||
| * @example | ||
| * ```typescript | ||
| * class MyEvaluator extends BaseEvaluator { | ||
| * static readonly metadata = { | ||
| * id: 'my-evaluator', | ||
| * name: 'My Evaluator', | ||
| * description: 'Does something useful', | ||
| * supportedGrades: ['3', '4', '5'], | ||
| * requiresGoogleKey: true, | ||
| * requiresOpenAIKey: false, | ||
| * }; | ||
| * } | ||
| * ``` | ||
| */ | ||
| static readonly metadata: EvaluatorMetadata; | ||
|
|
||
| constructor(config: BaseEvaluatorConfig) { | ||
| // Initialize logger | ||
| this.logger = createLogger(config.logger, config.logLevel ?? LogLevel.WARN); | ||
|
|
||
| // Validate required API keys based on metadata | ||
| this.validateApiKeys(config); | ||
|
|
||
| // Normalize telemetry config | ||
| const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry); | ||
|
|
||
|
|
@@ -120,6 +167,38 @@ export abstract class BaseEvaluator { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Get metadata for this evaluator instance | ||
| * @throws {ConfigurationError} If the subclass has not defined static metadata | ||
| */ | ||
| protected get metadata(): EvaluatorMetadata { | ||
| const meta = (this.constructor as typeof BaseEvaluator).metadata; | ||
| if (!meta) { | ||
| throw new ConfigurationError( | ||
| `${this.constructor.name} must define a static readonly metadata block.` | ||
| ); | ||
| } | ||
| return meta; | ||
| } | ||
|
|
||
| /** | ||
| * Validate that required API keys are provided based on metadata | ||
| * @throws {ConfigurationError} If required API keys are missing | ||
| */ | ||
| private validateApiKeys(config: BaseEvaluatorConfig): void { | ||
| if (this.metadata.requiresGoogleKey && !config.googleApiKey) { | ||
| throw new ConfigurationError( | ||
| `Google API key is required for ${this.metadata.name} evaluator. Pass googleApiKey in config.` | ||
| ); | ||
| } | ||
|
|
||
| if (this.metadata.requiresOpenAIKey && !config.openaiApiKey) { | ||
| throw new ConfigurationError( | ||
| `OpenAI API key is required for ${this.metadata.name} evaluator. Pass openaiApiKey in config.` | ||
| ); | ||
| } | ||
| } | ||
|
Comment on lines
+188
to
+200
|
||
|
|
||
| /** | ||
| * Normalize telemetry config to standard format | ||
| */ | ||
|
|
@@ -149,10 +228,12 @@ export abstract class BaseEvaluator { | |
| } | ||
|
|
||
| /** | ||
| * Get the evaluator type identifier (e.g., "vocabulary", "sentence-structure") | ||
| * Must be implemented by concrete evaluators | ||
| * Get the evaluator type identifier from metadata | ||
| * @returns The evaluator type ID (e.g., "vocabulary", "sentence-structure") | ||
| */ | ||
| protected abstract getEvaluatorType(): string; | ||
| protected getEvaluatorType(): string { | ||
| return this.metadata.id; | ||
| } | ||
|
|
||
| /** | ||
| * Validate text meets requirements | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Different, but usable, shape than we currently have.
We need to get alignment from data, eng, design on how we use the eval outputs.