diff --git a/IMPLEMENTATION_ISSUE_623.md b/IMPLEMENTATION_ISSUE_623.md new file mode 100644 index 00000000..77fbb45b --- /dev/null +++ b/IMPLEMENTATION_ISSUE_623.md @@ -0,0 +1,441 @@ +# Issue #623: Cohort & Retention Analysis API + +## Overview + +This implementation adds a comprehensive cohort and retention analysis API to the Trivela platform, +enabling campaign operators to answer questions like "of users who registered in week N, how many +claimed by week N+k?" + +## Features Implemented + +### 1. **Database Schema** (Migration 011) + +- `user_activities` table: Tracks all user events (registered, claimed, active) +- `cohort_stats` table: Precomputed cohort statistics for performance +- `retention_data` table: Precomputed retention curves + +### 2. **Data Access Layer** + +- **Repository** (`sqliteCohortRepository.js`): Complete data access for cohort analysis + - Record user activities + - Save/retrieve cohort statistics + - Save/retrieve retention data + - Support for cache invalidation + +### 3. **Business Logic Service** + +- **Service** (`cohortService.js`): + - Compute cohorts by registration period + - Calculate retention curves with offset tracking + - Support for multiple granularities (day, week, month) + - Support for multiple metrics (claimed, active) + - Deterministic and testable outputs + - Caching with recomputation support + +### 4. **REST API Endpoints** + +All endpoints under `/api/v1/campaigns/:campaignId/cohorts` (requires API key): + +#### Cohort Analysis + +- `GET /campaigns/:campaignId/cohorts` - Get full cohort analysis with retention curves + - Query params: `granularity` (day/week/month), `metric` (claimed/active), `recompute` (bool) +- `GET /campaigns/:campaignId/cohorts/:cohortPeriod/retention` - Get retention curve for specific + cohort + - Query params: `granularity`, `metric` + +#### Recomputation + +- `POST /campaigns/:campaignId/cohorts/recompute` - Force recomputation of cohort data + - Query params: `granularity`, `metric` + +#### Activity Recording + +- `POST /campaigns/:campaignId/activities` - Record user activity (for testing/manual entry) + - Body: `{ userAddress, activityType, occurredAt?, metadata? }` + +### 5. **Validation & Testing** + +- Zod schemas for request/response validation +- Comprehensive unit tests with deterministic fixtures +- Hand-computed expected values for verification +- Tests cover all granularities and metric types + +## Technical Design + +### Cohort Definition + +A **cohort** is a group of users who registered in the same time period (day, week, or month). +Cohorts are identified by period strings: + +- Day: `2024-01-15` +- Week: `2024-W03` (ISO week number) +- Month: `2024-01` + +### Retention Calculation + +**Retention** measures how many users from a cohort performed an activity at a given offset from +their registration: + +- Offset 0: Same period as registration +- Offset 1: One period later +- Offset 2: Two periods later +- etc. + +**Retention Rate** = (Users who performed activity at offset) / (Cohort size) × 100% + +### Period Handling + +- **UTC timezone**: All timestamps are normalized to UTC +- **Week numbering**: ISO 8601 week-date system (week 1 contains first Thursday) +- **Period boundaries**: Inclusive start, exclusive end + +### Deterministic Assignment + +The algorithm assigns users to cohorts based on their registration timestamp: + +```javascript +registrationDate → getPeriodString(date, granularity) → cohortPeriod +``` + +Activities are matched to cohorts, and offset is calculated: + +```javascript +cohortPeriod + activityPeriod + granularity → offset +``` + +### Caching Strategy + +- **Precomputation**: Cohort stats and retention data are computed once and cached +- **Recomputation**: Can be triggered manually or when `recompute=true` +- **Cache invalidation**: `clearCache()` removes all cached data for a campaign + +## Usage Examples + +### Example 1: Weekly Cohort Analysis for Claims + +```bash +# Get weekly cohorts with claim retention +curl "http://localhost:3001/api/v1/campaigns/1/cohorts?granularity=week&metric=claimed" \ + -H "X-API-Key: your-api-key" +``` + +**Response:** + +```json +{ + "campaignId": "1", + "granularity": "week", + "metricType": "claimed", + "cohorts": [ + { + "cohortPeriod": "2024-W01", + "cohortSize": 150, + "periodStart": "2024-01-01T00:00:00.000Z", + "periodEnd": "2024-01-08T00:00:00.000Z", + "retention": [ + { "offset": 0, "userCount": 100, "retentionRate": 66.67 }, + { "offset": 1, "userCount": 75, "retentionRate": 50.0 }, + { "offset": 2, "userCount": 45, "retentionRate": 30.0 } + ] + }, + { + "cohortPeriod": "2024-W02", + "cohortSize": 200, + "periodStart": "2024-01-08T00:00:00.000Z", + "periodEnd": "2024-01-15T00:00:00.000Z", + "retention": [ + { "offset": 0, "userCount": 140, "retentionRate": 70.0 }, + { "offset": 1, "userCount": 100, "retentionRate": 50.0 } + ] + } + ] +} +``` + +### Example 2: Daily Cohort Analysis for Active Users + +```bash +# Get daily cohorts with active user retention +curl "http://localhost:3001/api/v1/campaigns/1/cohorts?granularity=day&metric=active" \ + -H "X-API-Key: your-api-key" +``` + +### Example 3: Get Specific Cohort Retention Curve + +```bash +# Get retention curve for week 1 +curl "http://localhost:3001/api/v1/campaigns/1/cohorts/2024-W01/retention?granularity=week&metric=claimed" \ + -H "X-API-Key: your-api-key" +``` + +**Response:** + +```json +{ + "cohortPeriod": "2024-W01", + "cohortSize": 150, + "retention": [ + { "offset": 0, "userCount": 100, "retentionRate": 66.67 }, + { "offset": 1, "userCount": 75, "retentionRate": 50.0 }, + { "offset": 2, "userCount": 45, "retentionRate": 30.0 }, + { "offset": 3, "userCount": 30, "retentionRate": 20.0 } + ] +} +``` + +### Example 4: Force Recomputation + +```bash +# Recompute cohort data (after reconciliation or data updates) +curl -X POST "http://localhost:3001/api/v1/campaigns/1/cohorts/recompute?granularity=week&metric=claimed" \ + -H "X-API-Key: your-api-key" +``` + +### Example 5: Record User Activities + +```bash +# Record user registration +curl -X POST "http://localhost:3001/api/v1/campaigns/1/activities" \ + -H "X-API-Key: your-api-key" \ + -H "Content-Type: application/json" \ + -d '{ + "userAddress": "GABC...XYZ", + "activityType": "registered", + "occurredAt": "2024-01-15T10:30:00Z" + }' + +# Record user claim +curl -X POST "http://localhost:3001/api/v1/campaigns/1/activities" \ + -H "X-API-Key: your-api-key" \ + -H "Content-Type: application/json" \ + -d '{ + "userAddress": "GABC...XYZ", + "activityType": "claimed", + "occurredAt": "2024-01-20T14:15:00Z" + }' +``` + +## Database Schema + +### user_activities + +| Column | Type | Description | +| ------------- | ------- | --------------------------------- | +| id | INTEGER | Primary key | +| campaign_id | INTEGER | Foreign key to campaigns | +| user_address | TEXT | User identifier (wallet address) | +| activity_type | TEXT | 'registered', 'claimed', 'active' | +| occurred_at | TEXT | ISO 8601 timestamp (UTC) | +| ledger | INTEGER | Optional: on-chain ledger number | +| tx_hash | TEXT | Optional: transaction hash | +| metadata | TEXT | JSON blob for additional context | +| created_at | TEXT | Record creation timestamp | + +**Indexes:** + +- `campaign_id` +- `campaign_id, user_address` +- `campaign_id, activity_type` +- `campaign_id, occurred_at` +- `campaign_id, user_address, activity_type` + +### cohort_stats + +| Column | Type | Description | +| ------------- | ------- | ------------------------------------ | +| id | INTEGER | Primary key | +| campaign_id | INTEGER | Foreign key to campaigns | +| cohort_period | TEXT | Period identifier (e.g., '2024-W01') | +| cohort_size | INTEGER | Number of users in cohort | +| granularity | TEXT | 'day', 'week', 'month' | +| period_start | TEXT | ISO 8601 timestamp (period start) | +| period_end | TEXT | ISO 8601 timestamp (period end) | +| computed_at | TEXT | When this was computed | + +**Unique constraint:** `(campaign_id, cohort_period, granularity)` + +### retention_data + +| Column | Type | Description | +| ------------- | ------- | -------------------------------------- | +| id | INTEGER | Primary key | +| campaign_id | INTEGER | Foreign key to campaigns | +| cohort_period | TEXT | Period identifier | +| offset_period | INTEGER | Offset from cohort (0, 1, 2, ...) | +| metric_type | TEXT | 'claimed', 'active' | +| user_count | INTEGER | Number of users who performed activity | +| granularity | TEXT | 'day', 'week', 'month' | +| computed_at | TEXT | When this was computed | + +**Unique constraint:** `(campaign_id, cohort_period, offset_period, metric_type, granularity)` + +## Integration Points + +### With Event Indexer + +The cohort system can be integrated with the existing event indexer (`eventIndexer.js`) to +automatically record user activities from on-chain events: + +- `credit` events → record as "registered" +- `claim` events → record as "claimed" +- Contract interactions → record as "active" + +### With Dashboard UI + +The retention data is structured for easy visualization: + +- Cohort table view (rows = cohorts, columns = offset periods) +- Retention curves (line charts showing decay over time) +- Comparative cohort analysis + +## Edge Cases Handled + +### 1. Timezone/Period Boundaries (UTC) + +- All timestamps normalized to UTC +- Period boundaries use UTC midnight +- ISO 8601 week numbering (first Thursday rule) + +### 2. Small Cohorts + +- System reports actual counts, not suppressed +- Frontend can flag low-n cohorts (e.g., < 30 users) +- Retention rates always calculated, even for small cohorts + +### 3. Re-computation After Reconciliation + +- `recompute` flag clears cache and recomputes from raw data +- Idempotent: safe to run multiple times +- Preserves historical activity data + +### 4. Users Without Registration + +- System requires registration activity first +- Activities before registration are ignored (shouldn't happen in normal flow) +- Missing cohort assignment results in activity being skipped + +### 5. Multiple Activities + +- Same user can have multiple activities in different periods +- Each activity counted separately +- Deduplication at query level (distinct users per offset) + +## Performance Considerations + +### Caching Strategy + +- First query computes and caches all cohorts + retention +- Subsequent queries read from cache (fast) +- Recomputation only when explicitly requested or data changes + +### Query Optimization + +- Indexed queries on `campaign_id`, `occurred_at`, `activity_type` +- Precomputed aggregations avoid expensive GROUP BY on reads +- Retention data denormalized for fast lookup + +### Scalability + +- Computation time: O(N) where N = number of activities +- Storage: O(C × P) where C = cohorts, P = max offset periods +- Typical dataset: 1000 cohorts × 52 weeks = 52K rows (small) + +## Testing Strategy + +### Deterministic Fixture Tests + +Tests use hand-computed expected values: + +```javascript +// Week 1 (2024-W01): 3 users register +// Week 2 (2024-W02): 2 users register +// Various claims at different offsets +// Expected: Week 1 cohort size = 3, offset 0 retention = 66.67%, etc. +``` + +### Coverage + +- ✅ All granularities (day, week, month) +- ✅ All metric types (claimed, active) +- ✅ Specific cohort queries +- ✅ Recomputation and cache clearing +- ✅ Empty cohort handling +- ✅ Error cases (non-existent cohorts) + +### Test Results + +All 8 cohort service tests passing with deterministic, hand-verified outputs. + +## Files Changed/Created + +### New Files + +- `backend/src/db/migrations/011_cohort_retention_tables.js` - Database schema +- `backend/src/dal/sqliteCohortRepository.js` - Data access layer +- `backend/src/services/cohortService.js` - Business logic +- `backend/src/routes/cohorts.js` - API routes +- `backend/src/services/cohortService.test.js` - Unit tests +- `IMPLEMENTATION_ISSUE_623.md` - This documentation + +### Modified Files + +- `backend/src/dal/index.js` - Integrated cohort repository +- `backend/src/index.js` - Registered cohort service and routes + +## Acceptance Criteria + +✅ **A known fixture yields the expected cohort/retention curves** + +- Implemented deterministic test with hand-computed values +- Week 1 cohort: 3 users, retention verified at offsets 0, 1, 2 +- Week 2 cohort: 2 users, retention verified at offset 0 +- All retention rates match expected percentages + +## Future Enhancements + +1. **Automated activity recording**: Integrate with event indexer for automatic tracking +2. **Cohort comparison**: API endpoint to compare retention curves between cohorts +3. **Survival analysis**: Kaplan-Meier curves for long-term retention +4. **Predictive retention**: ML models to forecast future retention +5. **Segment-based cohorts**: Group by user attributes (country, device, referral source) +6. **Export functionality**: CSV/JSON export of cohort data +7. **Real-time updates**: WebSocket notifications when new cohort data is available + +## Security Considerations + +- All endpoints require API key authentication +- Rate limiting applies to all cohort endpoints +- Campaign ID validation prevents unauthorized access +- SQL injection protected via parameterized queries +- User addresses can be hashed for privacy + +## Deployment Notes + +### Database Migration + +Run migration before deploying: + +```bash +npm run db:migrate +``` + +### Environment Variables + +No new environment variables required. Uses existing: + +- `DB_PATH` - Database file location +- `RATE_LIMIT_*` - Rate limiting configuration + +### Backward Compatibility + +- New endpoints only, no breaking changes +- Existing APIs unchanged +- Migration is additive (no data loss) + +--- + +**Issue**: #623 +**Status**: ✅ Complete +**Author**: Williams-1604 +**Date**: 2026-06-18 diff --git a/backend/src/dal/index.js b/backend/src/dal/index.js index 639730bf..ff1d4692 100644 --- a/backend/src/dal/index.js +++ b/backend/src/dal/index.js @@ -13,6 +13,7 @@ import { assertApiKeyRepository } from './apiKeyRepository.js'; import { createSqliteApiKeyRepository } from './sqliteApiKeyRepository.js'; import { createSqliteFailedJobRepository } from './sqliteFailedJobRepository.js'; import { createSqliteVariantRepository } from './sqliteVariantRepository.js'; +import { createSqliteCohortRepository } from './sqliteCohortRepository.js'; import { createPool, isPostgresUrl } from './pg/pgClient.js'; import { createSqliteAllowlistRepository } from './sqliteAllowlistRepository.js'; @@ -79,6 +80,7 @@ export async function createDal({ webhooks: webhookRepository ?? new WebhookRepository(db), referrals: createSqliteReferralRepository({ db }), variants: createSqliteVariantRepository({ db }), + cohorts: createSqliteCohortRepository({ db }), apiKeys: assertApiKeyRepository(apiKeyRepository ?? createSqliteApiKeyRepository({ db })), failedJobs: failedJobRepository ?? createSqliteFailedJobRepository({ db }), allowlists: allowlistRepository ?? createSqliteAllowlistRepository({ db }), diff --git a/backend/src/dal/sqliteCohortRepository.js b/backend/src/dal/sqliteCohortRepository.js new file mode 100644 index 00000000..2c22e33a --- /dev/null +++ b/backend/src/dal/sqliteCohortRepository.js @@ -0,0 +1,258 @@ +// @ts-check + +/** + * Repository for cohort and retention data access + * @param {{db: any}} params + */ +export function createSqliteCohortRepository({ db }) { + /** + * Record a user activity event + * @param {object} params + * @param {number} params.campaignId + * @param {string} params.userAddress + * @param {string} params.activityType - 'registered', 'claimed', 'active' + * @param {string} params.occurredAt - ISO 8601 timestamp + * @param {number} [params.ledger] + * @param {string} [params.txHash] + * @param {object} [params.metadata] + */ + function recordActivity({ + campaignId, + userAddress, + activityType, + occurredAt, + ledger, + txHash, + metadata = {}, + }) { + const stmt = db.prepare(` + INSERT INTO user_activities + (campaign_id, user_address, activity_type, occurred_at, ledger, tx_hash, metadata) + VALUES (?, ?, ?, ?, ?, ?, ?) + `); + + return stmt.run( + Number(campaignId), + userAddress, + activityType, + occurredAt, + ledger || null, + txHash || null, + JSON.stringify(metadata), + ); + } + + /** + * Get cohort size for a specific period + * @param {number} campaignId + * @param {string} cohortPeriod - e.g., '2024-W01', '2024-01', '2024-01-01' + * @param {string} granularity - 'day', 'week', 'month' + * @returns {number} + */ + function getCohortSize(campaignId, cohortPeriod, granularity) { + const stmt = db.prepare(` + SELECT cohort_size + FROM cohort_stats + WHERE campaign_id = ? AND cohort_period = ? AND granularity = ? + `); + + const row = stmt.get(Number(campaignId), cohortPeriod, granularity); + return row?.cohort_size || 0; + } + + /** + * Save precomputed cohort statistics + * @param {object} params + * @param {number} params.campaignId + * @param {string} params.cohortPeriod + * @param {number} params.cohortSize + * @param {string} params.granularity + * @param {string} params.periodStart + * @param {string} params.periodEnd + */ + function saveCohortStats({ + campaignId, + cohortPeriod, + cohortSize, + granularity, + periodStart, + periodEnd, + }) { + const stmt = db.prepare(` + INSERT INTO cohort_stats + (campaign_id, cohort_period, cohort_size, granularity, period_start, period_end) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT(campaign_id, cohort_period, granularity) + DO UPDATE SET + cohort_size = excluded.cohort_size, + period_start = excluded.period_start, + period_end = excluded.period_end, + computed_at = datetime('now') + `); + + return stmt.run( + Number(campaignId), + cohortPeriod, + cohortSize, + granularity, + periodStart, + periodEnd, + ); + } + + /** + * Save precomputed retention data + * @param {object} params + * @param {number} params.campaignId + * @param {string} params.cohortPeriod + * @param {number} params.offsetPeriod + * @param {string} params.metricType - 'claimed', 'active' + * @param {number} params.userCount + * @param {string} params.granularity + */ + function saveRetentionData({ + campaignId, + cohortPeriod, + offsetPeriod, + metricType, + userCount, + granularity, + }) { + const stmt = db.prepare(` + INSERT INTO retention_data + (campaign_id, cohort_period, offset_period, metric_type, user_count, granularity) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT(campaign_id, cohort_period, offset_period, metric_type, granularity) + DO UPDATE SET + user_count = excluded.user_count, + computed_at = datetime('now') + `); + + return stmt.run( + Number(campaignId), + cohortPeriod, + offsetPeriod, + metricType, + userCount, + granularity, + ); + } + + /** + * Get retention data for a cohort + * @param {number} campaignId + * @param {string} cohortPeriod + * @param {string} metricType + * @param {string} granularity + * @returns {Array<{offsetPeriod: number, userCount: number, retentionRate: number}>} + */ + function getRetentionData(campaignId, cohortPeriod, metricType, granularity) { + const stmt = db.prepare(` + SELECT + r.offset_period as offsetPeriod, + r.user_count as userCount, + c.cohort_size as cohortSize, + CASE + WHEN c.cohort_size > 0 + THEN CAST(r.user_count AS REAL) / c.cohort_size + ELSE 0 + END as retentionRate + FROM retention_data r + JOIN cohort_stats c + ON r.campaign_id = c.campaign_id + AND r.cohort_period = c.cohort_period + AND r.granularity = c.granularity + WHERE r.campaign_id = ? + AND r.cohort_period = ? + AND r.metric_type = ? + AND r.granularity = ? + ORDER BY r.offset_period ASC + `); + + return stmt.all(Number(campaignId), cohortPeriod, metricType, granularity); + } + + /** + * Get all cohort periods for a campaign + * @param {number} campaignId + * @param {string} granularity + * @returns {Array<{cohortPeriod: string, cohortSize: number, periodStart: string, periodEnd: string}>} + */ + function getCohorts(campaignId, granularity) { + const stmt = db.prepare(` + SELECT + cohort_period as cohortPeriod, + cohort_size as cohortSize, + period_start as periodStart, + period_end as periodEnd + FROM cohort_stats + WHERE campaign_id = ? AND granularity = ? + ORDER BY period_start ASC + `); + + return stmt.all(Number(campaignId), granularity); + } + + /** + * Get user activities for analysis + * @param {object} params + * @param {number} params.campaignId + * @param {string} [params.startDate] + * @param {string} [params.endDate] + * @param {string} [params.activityType] + * @returns {Array<{userAddress: string, activityType: string, occurredAt: string}>} + */ + function getUserActivities({ campaignId, startDate, endDate, activityType }) { + let sql = ` + SELECT + user_address as userAddress, + activity_type as activityType, + occurred_at as occurredAt + FROM user_activities + WHERE campaign_id = ? + `; + + /** @type {Array} */ + const params = [Number(campaignId)]; + + if (startDate) { + sql += ` AND occurred_at >= ?`; + params.push(startDate); + } + + if (endDate) { + sql += ` AND occurred_at < ?`; + params.push(endDate); + } + + if (activityType) { + sql += ` AND activity_type = ?`; + params.push(activityType); + } + + sql += ` ORDER BY occurred_at ASC`; + + const stmt = db.prepare(sql); + return stmt.all(...params); + } + + /** + * Clear cached cohort and retention data for recomputation + * @param {number} campaignId + */ + function clearCache(campaignId) { + db.prepare(`DELETE FROM cohort_stats WHERE campaign_id = ?`).run(Number(campaignId)); + db.prepare(`DELETE FROM retention_data WHERE campaign_id = ?`).run(Number(campaignId)); + } + + return { + recordActivity, + getCohortSize, + saveCohortStats, + saveRetentionData, + getRetentionData, + getCohorts, + getUserActivities, + clearCache, + }; +} diff --git a/backend/src/db/migrations/011_cohort_retention_tables.js b/backend/src/db/migrations/011_cohort_retention_tables.js new file mode 100644 index 00000000..ece92521 --- /dev/null +++ b/backend/src/db/migrations/011_cohort_retention_tables.js @@ -0,0 +1,92 @@ +export const version = 11; +export const description = 'Add event tables for cohort and retention analysis'; + +export function up(db) { + // Create tables for tracking user events for cohort analysis + db.exec(` + -- User registration/activity tracking + CREATE TABLE IF NOT EXISTS user_activities ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + campaign_id INTEGER NOT NULL, + user_address TEXT NOT NULL, + activity_type TEXT NOT NULL, -- 'registered', 'claimed', 'active' + occurred_at TEXT NOT NULL, -- ISO 8601 timestamp + ledger INTEGER, + tx_hash TEXT, + metadata TEXT, -- JSON for additional data + created_at TEXT NOT NULL DEFAULT (datetime('now')), + FOREIGN KEY (campaign_id) REFERENCES campaigns(id) ON DELETE CASCADE + ); + + CREATE INDEX IF NOT EXISTS idx_user_activities_campaign + ON user_activities(campaign_id); + CREATE INDEX IF NOT EXISTS idx_user_activities_user + ON user_activities(campaign_id, user_address); + CREATE INDEX IF NOT EXISTS idx_user_activities_type + ON user_activities(campaign_id, activity_type); + CREATE INDEX IF NOT EXISTS idx_user_activities_occurred + ON user_activities(campaign_id, occurred_at); + CREATE INDEX IF NOT EXISTS idx_user_activities_user_type + ON user_activities(campaign_id, user_address, activity_type); + + -- Precomputed cohort statistics for performance + CREATE TABLE IF NOT EXISTS cohort_stats ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + campaign_id INTEGER NOT NULL, + cohort_period TEXT NOT NULL, -- e.g., '2024-W01', '2024-01', '2024-01-01' + cohort_size INTEGER NOT NULL DEFAULT 0, + granularity TEXT NOT NULL, -- 'day', 'week', 'month' + period_start TEXT NOT NULL, -- ISO 8601 timestamp + period_end TEXT NOT NULL, -- ISO 8601 timestamp + computed_at TEXT NOT NULL DEFAULT (datetime('now')), + UNIQUE(campaign_id, cohort_period, granularity), + FOREIGN KEY (campaign_id) REFERENCES campaigns(id) ON DELETE CASCADE + ); + + CREATE INDEX IF NOT EXISTS idx_cohort_stats_campaign + ON cohort_stats(campaign_id); + CREATE INDEX IF NOT EXISTS idx_cohort_stats_period + ON cohort_stats(campaign_id, cohort_period); + + -- Precomputed retention data + CREATE TABLE IF NOT EXISTS retention_data ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + campaign_id INTEGER NOT NULL, + cohort_period TEXT NOT NULL, + offset_period INTEGER NOT NULL, -- 0, 1, 2, 3... (weeks/days/months after cohort) + metric_type TEXT NOT NULL, -- 'claimed', 'active' + user_count INTEGER NOT NULL DEFAULT 0, + granularity TEXT NOT NULL, -- 'day', 'week', 'month' + computed_at TEXT NOT NULL DEFAULT (datetime('now')), + UNIQUE(campaign_id, cohort_period, offset_period, metric_type, granularity), + FOREIGN KEY (campaign_id) REFERENCES campaigns(id) ON DELETE CASCADE + ); + + CREATE INDEX IF NOT EXISTS idx_retention_data_campaign + ON retention_data(campaign_id); + CREATE INDEX IF NOT EXISTS idx_retention_data_cohort + ON retention_data(campaign_id, cohort_period); + CREATE INDEX IF NOT EXISTS idx_retention_data_metric + ON retention_data(campaign_id, metric_type); + `); +} + +export function down(db) { + db.exec(` + DROP INDEX IF EXISTS idx_retention_data_metric; + DROP INDEX IF EXISTS idx_retention_data_cohort; + DROP INDEX IF EXISTS idx_retention_data_campaign; + DROP TABLE IF EXISTS retention_data; + + DROP INDEX IF EXISTS idx_cohort_stats_period; + DROP INDEX IF EXISTS idx_cohort_stats_campaign; + DROP TABLE IF EXISTS cohort_stats; + + DROP INDEX IF EXISTS idx_user_activities_user_type; + DROP INDEX IF EXISTS idx_user_activities_occurred; + DROP INDEX IF EXISTS idx_user_activities_type; + DROP INDEX IF EXISTS idx_user_activities_user; + DROP INDEX IF EXISTS idx_user_activities_campaign; + DROP TABLE IF NOT EXISTS user_activities; + `); +} diff --git a/backend/src/index.js b/backend/src/index.js index 01fe94f5..01a846b0 100644 --- a/backend/src/index.js +++ b/backend/src/index.js @@ -51,6 +51,8 @@ import { parseAllowlistCsv, validateGAddress, MAX_ALLOWLIST_ROWS } from './lib/a import { createEmbedRoute } from './routes/embed.js'; import { createVariantRoutes } from './routes/variants.js'; import { createVariantService } from './services/variantService.js'; +import { createCohortRoutes } from './routes/cohorts.js'; +import { createCohortService } from './services/cohortService.js'; const DEFAULT_PORT = 3001; const DEFAULT_RATE_LIMIT_WINDOW_MS = 60_000; @@ -235,6 +237,7 @@ export async function createApp(options = {}) { const webhookRepository = dal.webhooks; const referralRepository = dal.referrals; const variantRepository = dal.variants; + const cohortRepository = dal.cohorts; const apiKeyRepository = dal.apiKeys; const failedJobRepository = options.failedJobRepository ?? dal.failedJobs; const allowlistRepository = dal.allowlists; @@ -251,6 +254,7 @@ export async function createApp(options = {}) { logger: log, }); const variantService = createVariantService({ variantRepo: variantRepository }); + const cohortService = createCohortService({ cohortRepo: cohortRepository }); const shortCacheTtlMs = normalizePositiveInteger( /** @type {any} */ (options.shortCacheTtlMs) ?? process.env.SHORT_CACHE_TTL_MS, DEFAULT_SHORT_CACHE_TTL_MS, @@ -1526,6 +1530,13 @@ export async function createApp(options = {}) { campaignRepo: campaignRepository, }); app.use(prefix, rateLimiter, requireApiKey, variantRouter); + + // Cohort and retention analysis routes (Issue #623) + const cohortRouter = createCohortRoutes({ + cohortService, + campaignRepo: campaignRepository, + }); + app.use(prefix, rateLimiter, requireApiKey, cohortRouter); } registerApiRoutes(API_V1_PREFIX); diff --git a/backend/src/routes/cohorts.js b/backend/src/routes/cohorts.js new file mode 100644 index 00000000..9021b43d --- /dev/null +++ b/backend/src/routes/cohorts.js @@ -0,0 +1,222 @@ +// @ts-check +import express from 'express'; +import { z } from 'zod'; + +/** + * Cohort and retention analysis API routes + * @param {object} params + * @param {ReturnType} params.cohortService + * @param {ReturnType} params.campaignRepo + */ +export function createCohortRoutes({ cohortService, campaignRepo }) { + const router = express.Router(); + + // Validation schemas + const granularitySchema = z.enum(['day', 'week', 'month']); + const metricTypeSchema = z.enum(['claimed', 'active']); + + const cohortAnalysisQuerySchema = z.object({ + granularity: granularitySchema.optional().default('week'), + metric: metricTypeSchema.optional().default('claimed'), + recompute: z + .string() + .optional() + .transform((val) => val === 'true'), + }); + + const retentionCurveQuerySchema = z.object({ + granularity: granularitySchema.optional().default('week'), + metric: metricTypeSchema.optional().default('claimed'), + }); + + const recordActivitySchema = z.object({ + userAddress: z.string().min(1), + activityType: z.enum(['registered', 'claimed', 'active']), + occurredAt: z.string().optional(), + metadata: z.record(z.unknown()).optional(), + }); + + /** + * GET /api/v1/campaigns/:campaignId/cohorts + * Get cohort analysis with retention curves + */ + router.get('/:campaignId/cohorts', async (req, res, next) => { + try { + const campaignId = Number(req.params.campaignId); + + // Verify campaign exists + const campaign = campaignRepo.getCampaignById(campaignId); + if (!campaign) { + return res.status(404).json({ error: 'Campaign not found' }); + } + + // Parse and validate query params + const query = cohortAnalysisQuerySchema.parse(req.query); + + // Get cohort analysis + const analysis = await cohortService.getCohortAnalysis( + campaignId, + query.granularity, + query.metric, + { recompute: query.recompute }, + ); + + res.json({ + campaignId, + granularity: query.granularity, + metricType: query.metric, + cohorts: analysis, + }); + } catch (err) { + if (err instanceof z.ZodError) { + return res.status(400).json({ + error: 'Validation error', + details: err.errors, + }); + } + next(err); + } + }); + + /** + * GET /api/v1/campaigns/:campaignId/cohorts/:cohortPeriod/retention + * Get retention curve for a specific cohort + */ + router.get('/:campaignId/cohorts/:cohortPeriod/retention', async (req, res, next) => { + try { + const campaignId = Number(req.params.campaignId); + const { cohortPeriod } = req.params; + + // Verify campaign exists + const campaign = campaignRepo.getCampaignById(campaignId); + if (!campaign) { + return res.status(404).json({ error: 'Campaign not found' }); + } + + // Parse and validate query params + const query = retentionCurveQuerySchema.parse(req.query); + + // Get retention curve + const curve = await cohortService.getRetentionCurve( + campaignId, + cohortPeriod, + query.granularity, + query.metric, + ); + + res.json(curve); + } catch (err) { + if (err instanceof z.ZodError) { + return res.status(400).json({ + error: 'Validation error', + details: err.errors, + }); + } + if (err.message && err.message.includes('Cohort not found')) { + return res.status(404).json({ error: err.message }); + } + next(err); + } + }); + + /** + * POST /api/v1/campaigns/:campaignId/cohorts/recompute + * Force recomputation of cohort and retention data + */ + router.post('/:campaignId/cohorts/recompute', async (req, res, next) => { + try { + const campaignId = Number(req.params.campaignId); + + // Verify campaign exists + const campaign = campaignRepo.getCampaignById(campaignId); + if (!campaign) { + return res.status(404).json({ error: 'Campaign not found' }); + } + + const query = cohortAnalysisQuerySchema.parse(req.query); + + // Recompute + await cohortService.computeCohorts(campaignId, query.granularity, query.metric); + + res.json({ + success: true, + message: 'Cohort data recomputed successfully', + campaignId, + granularity: query.granularity, + metricType: query.metric, + }); + } catch (err) { + if (err instanceof z.ZodError) { + return res.status(400).json({ + error: 'Validation error', + details: err.errors, + }); + } + next(err); + } + }); + + /** + * POST /api/v1/campaigns/:campaignId/activities + * Record a user activity (for testing and manual data entry) + */ + router.post('/:campaignId/activities', async (req, res, next) => { + try { + const campaignId = Number(req.params.campaignId); + + // Verify campaign exists + const campaign = campaignRepo.getCampaignById(campaignId); + if (!campaign) { + return res.status(404).json({ error: 'Campaign not found' }); + } + + // Validate request body + const data = recordActivitySchema.parse(req.body); + + // Record activity based on type + let result; + switch (data.activityType) { + case 'registered': + result = cohortService.recordRegistration( + campaignId, + data.userAddress, + data.occurredAt, + data.metadata, + ); + break; + case 'claimed': + result = cohortService.recordClaim( + campaignId, + data.userAddress, + data.occurredAt, + data.metadata, + ); + break; + case 'active': + result = cohortService.recordActive( + campaignId, + data.userAddress, + data.occurredAt, + data.metadata, + ); + break; + } + + res.status(201).json({ + success: true, + message: `Activity '${data.activityType}' recorded for user ${data.userAddress}`, + activityId: result.lastInsertRowid, + }); + } catch (err) { + if (err instanceof z.ZodError) { + return res.status(400).json({ + error: 'Validation error', + details: err.errors, + }); + } + next(err); + } + }); + + return router; +} diff --git a/backend/src/services/cohortService.js b/backend/src/services/cohortService.js new file mode 100644 index 00000000..b202b575 --- /dev/null +++ b/backend/src/services/cohortService.js @@ -0,0 +1,371 @@ +// @ts-check + +/** + * Cohort and retention analysis service + * Computes cohorts by registration period and retention curves by subsequent activity + * + * @param {object} params + * @param {ReturnType} params.cohortRepo + */ +export function createCohortService({ cohortRepo }) { + /** + * Get period string based on granularity + * @param {Date} date + * @param {string} granularity - 'day', 'week', 'month' + * @returns {string} + */ + function getPeriodString(date, granularity) { + const year = date.getUTCFullYear(); + const month = String(date.getUTCMonth() + 1).padStart(2, '0'); + const day = String(date.getUTCDate()).padStart(2, '0'); + + switch (granularity) { + case 'day': + return `${year}-${month}-${day}`; + case 'week': { + const weekNumber = getWeekNumber(date); + return `${year}-W${String(weekNumber).padStart(2, '0')}`; + } + case 'month': + return `${year}-${month}`; + default: + throw new Error(`Invalid granularity: ${granularity}`); + } + } + + /** + * Get ISO week number + * @param {Date} date + * @returns {number} + */ + function getWeekNumber(date) { + const d = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate())); + const dayNum = d.getUTCDay() || 7; + d.setUTCDate(d.getUTCDate() + 4 - dayNum); + const yearStart = new Date(Date.UTC(d.getUTCFullYear(), 0, 1)); + return Math.ceil(((d.getTime() - yearStart.getTime()) / 86400000 + 1) / 7); + } + + /** + * Get period start and end dates + * @param {string} periodString + * @param {string} granularity + * @returns {{start: Date, end: Date}} + */ + function getPeriodBounds(periodString, granularity) { + if (granularity === 'day') { + const [year, month, day] = periodString.split('-').map(Number); + const start = new Date(Date.UTC(year, month - 1, day, 0, 0, 0)); + const end = new Date(Date.UTC(year, month - 1, day + 1, 0, 0, 0)); + return { start, end }; + } + + if (granularity === 'week') { + const [yearWeek, weekNum] = periodString.split('-W'); + const year = Number(yearWeek); + const week = Number(weekNum); + + // Get first day of year + const jan1 = new Date(Date.UTC(year, 0, 1)); + const jan1Day = jan1.getUTCDay() || 7; // Monday = 1, Sunday = 7 + + // Calculate start of week 1 + const week1Start = new Date(jan1); + week1Start.setUTCDate(jan1.getUTCDate() + (1 - jan1Day) + (week - 1) * 7); + + const start = week1Start; + const end = new Date(week1Start); + end.setUTCDate(start.getUTCDate() + 7); + + return { start, end }; + } + + if (granularity === 'month') { + const [year, month] = periodString.split('-').map(Number); + const start = new Date(Date.UTC(year, month - 1, 1, 0, 0, 0)); + const end = new Date(Date.UTC(year, month, 1, 0, 0, 0)); + return { start, end }; + } + + throw new Error(`Invalid granularity: ${granularity}`); + } + + /** + * Calculate offset between two periods + * @param {string} cohortPeriod + * @param {string} activityPeriod + * @param {string} granularity + * @returns {number} + */ + function calculateOffset(cohortPeriod, activityPeriod, granularity) { + if (granularity === 'day') { + const [y1, m1, d1] = cohortPeriod.split('-').map(Number); + const [y2, m2, d2] = activityPeriod.split('-').map(Number); + const date1 = new Date(Date.UTC(y1, m1 - 1, d1)); + const date2 = new Date(Date.UTC(y2, m2 - 1, d2)); + return Math.floor((date2.getTime() - date1.getTime()) / (1000 * 60 * 60 * 24)); + } + + if (granularity === 'week') { + const [y1, w1] = cohortPeriod.split('-W').map(Number); + const [y2, w2] = activityPeriod.split('-W').map(Number); + return (y2 - y1) * 52 + (w2 - w1); + } + + if (granularity === 'month') { + const [y1, m1] = cohortPeriod.split('-').map(Number); + const [y2, m2] = activityPeriod.split('-').map(Number); + return (y2 - y1) * 12 + (m2 - m1); + } + + return 0; + } + + /** + * Compute cohorts and retention curves from raw activity data + * @param {number} campaignId + * @param {string} granularity - 'day', 'week', 'month' + * @param {string} metricType - 'claimed', 'active' + * @returns {Promise} + */ + async function computeCohorts(campaignId, granularity = 'week', metricType = 'claimed') { + // Get all user registration activities + const registrations = cohortRepo.getUserActivities({ + campaignId, + activityType: 'registered', + }); + + // Get all metric activities + const activities = cohortRepo.getUserActivities({ + campaignId, + activityType: metricType, + }); + + // Group registrations by cohort period + /** @type {Map>} */ + const cohorts = new Map(); + + for (const reg of registrations) { + const regDate = new Date(reg.occurredAt); + const periodString = getPeriodString(regDate, granularity); + + if (!cohorts.has(periodString)) { + cohorts.set(periodString, new Set()); + } + cohorts.get(periodString).add(reg.userAddress); + } + + // Save cohort stats + for (const [cohortPeriod, users] of cohorts.entries()) { + const bounds = getPeriodBounds(cohortPeriod, granularity); + cohortRepo.saveCohortStats({ + campaignId, + cohortPeriod, + cohortSize: users.size, + granularity, + periodStart: bounds.start.toISOString(), + periodEnd: bounds.end.toISOString(), + }); + } + + // Build user -> cohort mapping + /** @type {Map} */ + const userCohortMap = new Map(); + for (const [cohortPeriod, users] of cohorts.entries()) { + for (const user of users) { + userCohortMap.set(user, cohortPeriod); + } + } + + // Compute retention by cohort and offset + /** @type {Map>>} */ + const retentionByOffset = new Map(); + + for (const activity of activities) { + const userCohort = userCohortMap.get(activity.userAddress); + if (!userCohort) continue; // User not in any cohort (shouldn't happen) + + const activityDate = new Date(activity.occurredAt); + const activityPeriod = getPeriodString(activityDate, granularity); + const offset = calculateOffset(userCohort, activityPeriod, granularity); + + if (offset < 0) continue; // Activity before registration (shouldn't happen) + + if (!retentionByOffset.has(userCohort)) { + retentionByOffset.set(userCohort, new Map()); + } + + const cohortOffsets = retentionByOffset.get(userCohort); + if (!cohortOffsets.has(offset)) { + cohortOffsets.set(offset, new Set()); + } + + cohortOffsets.get(offset).add(activity.userAddress); + } + + // Save retention data + for (const [cohortPeriod, offsetMap] of retentionByOffset.entries()) { + for (const [offset, users] of offsetMap.entries()) { + cohortRepo.saveRetentionData({ + campaignId, + cohortPeriod, + offsetPeriod: offset, + metricType, + userCount: users.size, + granularity, + }); + } + } + } + + /** + * Get cohort analysis with retention curves + * @param {number} campaignId + * @param {string} granularity - 'day', 'week', 'month' + * @param {string} metricType - 'claimed', 'active' + * @param {object} [options] + * @param {boolean} [options.recompute=false] - Force recomputation + * @returns {Promise}>>} + */ + async function getCohortAnalysis( + campaignId, + granularity = 'week', + metricType = 'claimed', + options = {}, + ) { + const { recompute = false } = options; + + // Check if we need to compute + const existingCohorts = cohortRepo.getCohorts(campaignId, granularity); + + if (existingCohorts.length === 0 || recompute) { + if (recompute) { + cohortRepo.clearCache(campaignId); + } + await computeCohorts(campaignId, granularity, metricType); + } + + // Fetch cohorts + const cohorts = cohortRepo.getCohorts(campaignId, granularity); + + // Fetch retention data for each cohort + return cohorts.map((cohort) => { + const retention = cohortRepo.getRetentionData( + campaignId, + cohort.cohortPeriod, + metricType, + granularity, + ); + + return { + ...cohort, + retention: retention.map((r) => ({ + offset: r.offsetPeriod, + userCount: r.userCount, + retentionRate: Number((r.retentionRate * 100).toFixed(2)), + })), + }; + }); + } + + /** + * Get retention curve for a specific cohort + * @param {number} campaignId + * @param {string} cohortPeriod + * @param {string} granularity + * @param {string} metricType + * @returns {Promise<{cohortPeriod: string, cohortSize: number, retention: Array<{offset: number, userCount: number, retentionRate: number}>}>} + */ + async function getRetentionCurve( + campaignId, + cohortPeriod, + granularity = 'week', + metricType = 'claimed', + ) { + const cohortSize = cohortRepo.getCohortSize(campaignId, cohortPeriod, granularity); + + if (cohortSize === 0) { + throw new Error(`Cohort not found: ${cohortPeriod}`); + } + + const retention = cohortRepo.getRetentionData( + campaignId, + cohortPeriod, + metricType, + granularity, + ); + + return { + cohortPeriod, + cohortSize, + retention: retention.map((r) => ({ + offset: r.offsetPeriod, + userCount: r.userCount, + retentionRate: Number((r.retentionRate * 100).toFixed(2)), + })), + }; + } + + /** + * Record user registration + * @param {number} campaignId + * @param {string} userAddress + * @param {string} [occurredAt] - ISO 8601 timestamp, defaults to now + * @param {object} [metadata] + */ + function recordRegistration(campaignId, userAddress, occurredAt, metadata = {}) { + const timestamp = occurredAt || new Date().toISOString(); + return cohortRepo.recordActivity({ + campaignId, + userAddress, + activityType: 'registered', + occurredAt: timestamp, + metadata, + }); + } + + /** + * Record user claim activity + * @param {number} campaignId + * @param {string} userAddress + * @param {string} [occurredAt] - ISO 8601 timestamp, defaults to now + * @param {object} [metadata] + */ + function recordClaim(campaignId, userAddress, occurredAt, metadata = {}) { + const timestamp = occurredAt || new Date().toISOString(); + return cohortRepo.recordActivity({ + campaignId, + userAddress, + activityType: 'claimed', + occurredAt: timestamp, + metadata, + }); + } + + /** + * Record user active status + * @param {number} campaignId + * @param {string} userAddress + * @param {string} [occurredAt] - ISO 8601 timestamp, defaults to now + * @param {object} [metadata] + */ + function recordActive(campaignId, userAddress, occurredAt, metadata = {}) { + const timestamp = occurredAt || new Date().toISOString(); + return cohortRepo.recordActivity({ + campaignId, + userAddress, + activityType: 'active', + occurredAt: timestamp, + metadata, + }); + } + + return { + getCohortAnalysis, + getRetentionCurve, + computeCohorts, + recordRegistration, + recordClaim, + recordActive, + }; +} diff --git a/backend/src/services/cohortService.test.js b/backend/src/services/cohortService.test.js new file mode 100644 index 00000000..e5b913c4 --- /dev/null +++ b/backend/src/services/cohortService.test.js @@ -0,0 +1,248 @@ +// @ts-check +import assert from 'node:assert/strict'; +import test from 'node:test'; +import Database from 'better-sqlite3'; +import { runMigrations } from '../db/migrate.js'; +import { createSqliteCohortRepository } from '../dal/sqliteCohortRepository.js'; +import { createCohortService } from './cohortService.js'; + +/** + * Create test database and repository + */ +async function makeTestCohort() { + const db = new Database(':memory:'); + await runMigrations(db); + + // Create test campaign + db.prepare( + `INSERT INTO campaigns (id, name, slug, created_at, updated_at) VALUES (?, ?, ?, ?, ?)`, + ).run(1, 'Test Campaign', 'test-campaign', '2024-01-01T00:00:00Z', '2024-01-01T00:00:00Z'); + + const cohortRepo = createSqliteCohortRepository({ db }); + const cohortService = createCohortService({ cohortRepo }); + + return { db, cohortRepo, cohortService }; +} + +test('cohortService - deterministic fixture test with hand-computed values', async () => { + const { cohortService } = await makeTestCohort(); + + // Week 1 (2024-W01): 3 users register + cohortService.recordRegistration(1, 'USER_A', '2024-01-01T10:00:00Z'); + cohortService.recordRegistration(1, 'USER_B', '2024-01-03T14:00:00Z'); + cohortService.recordRegistration(1, 'USER_C', '2024-01-05T18:00:00Z'); + + // Week 2 (2024-W02): 2 users register + cohortService.recordRegistration(1, 'USER_D', '2024-01-08T09:00:00Z'); + cohortService.recordRegistration(1, 'USER_E', '2024-01-10T16:00:00Z'); + + // USER_A claims in week 0 (same week as registration) + cohortService.recordClaim(1, 'USER_A', '2024-01-02T12:00:00Z'); + + // USER_B claims in week 1 (1 week after registration) + cohortService.recordClaim(1, 'USER_B', '2024-01-09T10:00:00Z'); + + // USER_C claims in week 0 + cohortService.recordClaim(1, 'USER_C', '2024-01-06T08:00:00Z'); + + // USER_D claims in week 0 + cohortService.recordClaim(1, 'USER_D', '2024-01-08T15:00:00Z'); + + // USER_A claims again in week 2 (2 weeks after registration) + cohortService.recordClaim(1, 'USER_A', '2024-01-15T11:00:00Z'); + + // Compute cohorts + const analysis = await cohortService.getCohortAnalysis(1, 'week', 'claimed'); + + assert.equal(analysis.length, 2, 'Should have 2 cohorts'); + + // Week 1 cohort (2024-W01) + const cohort1 = analysis.find((c) => c.cohortPeriod === '2024-W01'); + assert.ok(cohort1, 'Week 1 cohort should exist'); + assert.equal(cohort1.cohortSize, 3, 'Week 1 cohort should have 3 users'); + + // Week 1 retention: + // - Offset 0 (same week): USER_A, USER_C claimed = 2 users (66.67%) + // - Offset 1 (1 week later): USER_B claimed = 1 user (33.33%) + // - Offset 2 (2 weeks later): USER_A claimed = 1 user (33.33%) + const week1Retention = cohort1.retention; + assert.ok(week1Retention.length >= 1, 'Week 1 should have retention data'); + + const week1Offset0 = week1Retention.find((r) => r.offset === 0); + assert.ok(week1Offset0, 'Week 1 offset 0 should exist'); + assert.equal(week1Offset0.userCount, 2, 'Week 1 offset 0 should have 2 users'); + assert.equal(week1Offset0.retentionRate, 66.67, 'Week 1 offset 0 retention should be 66.67%'); + + const week1Offset1 = week1Retention.find((r) => r.offset === 1); + assert.ok(week1Offset1, 'Week 1 offset 1 should exist'); + assert.equal(week1Offset1.userCount, 1, 'Week 1 offset 1 should have 1 user'); + assert.equal(week1Offset1.retentionRate, 33.33, 'Week 1 offset 1 retention should be 33.33%'); + + const week1Offset2 = week1Retention.find((r) => r.offset === 2); + assert.ok(week1Offset2, 'Week 1 offset 2 should exist'); + assert.equal(week1Offset2.userCount, 1, 'Week 1 offset 2 should have 1 user'); + assert.equal(week1Offset2.retentionRate, 33.33, 'Week 1 offset 2 retention should be 33.33%'); + + // Week 2 cohort (2024-W02) + const cohort2 = analysis.find((c) => c.cohortPeriod === '2024-W02'); + assert.ok(cohort2, 'Week 2 cohort should exist'); + assert.equal(cohort2.cohortSize, 2, 'Week 2 cohort should have 2 users'); + + // Week 2 retention: + // - Offset 0: USER_D claimed = 1 user (50%) + const week2Retention = cohort2.retention; + const week2Offset0 = week2Retention.find((r) => r.offset === 0); + assert.ok(week2Offset0, 'Week 2 offset 0 should exist'); + assert.equal(week2Offset0.userCount, 1, 'Week 2 offset 0 should have 1 user'); + assert.equal(week2Offset0.retentionRate, 50, 'Week 2 offset 0 retention should be 50%'); +}); + +test('cohortService - day granularity', async () => { + const { cohortService } = await makeTestCohort(); + + // Day 1: 2 users register + cohortService.recordRegistration(1, 'USER_A', '2024-01-01T10:00:00Z'); + cohortService.recordRegistration(1, 'USER_B', '2024-01-01T14:00:00Z'); + + // Day 2: 1 user registers + cohortService.recordRegistration(1, 'USER_C', '2024-01-02T10:00:00Z'); + + // USER_A claims on day 0 and day 1 + cohortService.recordClaim(1, 'USER_A', '2024-01-01T12:00:00Z'); + cohortService.recordClaim(1, 'USER_A', '2024-01-02T12:00:00Z'); + + // USER_B claims on day 1 + cohortService.recordClaim(1, 'USER_B', '2024-01-02T15:00:00Z'); + + const analysis = await cohortService.getCohortAnalysis(1, 'day', 'claimed'); + + assert.equal(analysis.length, 2, 'Should have 2 daily cohorts'); + + const day1Cohort = analysis.find((c) => c.cohortPeriod === '2024-01-01'); + assert.ok(day1Cohort, 'Day 1 cohort should exist'); + assert.equal(day1Cohort.cohortSize, 2, 'Day 1 cohort should have 2 users'); + + const day1Offset0 = day1Cohort.retention.find((r) => r.offset === 0); + assert.equal(day1Offset0?.userCount, 1, 'Day 1 offset 0 should have 1 user'); + + const day1Offset1 = day1Cohort.retention.find((r) => r.offset === 1); + assert.equal(day1Offset1?.userCount, 2, 'Day 1 offset 1 should have 2 users'); +}); + +test('cohortService - month granularity', async () => { + const { cohortService } = await makeTestCohort(); + + // January: 2 users + cohortService.recordRegistration(1, 'USER_A', '2024-01-15T10:00:00Z'); + cohortService.recordRegistration(1, 'USER_B', '2024-01-20T14:00:00Z'); + + // February: 1 user + cohortService.recordRegistration(1, 'USER_C', '2024-02-05T10:00:00Z'); + + // USER_A claims in January (month 0) and February (month 1) + cohortService.recordClaim(1, 'USER_A', '2024-01-16T12:00:00Z'); + cohortService.recordClaim(1, 'USER_A', '2024-02-10T12:00:00Z'); + + // USER_B claims in February (month 1) + cohortService.recordClaim(1, 'USER_B', '2024-02-12T15:00:00Z'); + + const analysis = await cohortService.getCohortAnalysis(1, 'month', 'claimed'); + + assert.equal(analysis.length, 2, 'Should have 2 monthly cohorts'); + + const janCohort = analysis.find((c) => c.cohortPeriod === '2024-01'); + assert.ok(janCohort, 'January cohort should exist'); + assert.equal(janCohort.cohortSize, 2, 'January cohort should have 2 users'); + + const janOffset0 = janCohort.retention.find((r) => r.offset === 0); + assert.equal(janOffset0?.userCount, 1, 'January offset 0 should have 1 user'); + + const janOffset1 = janCohort.retention.find((r) => r.offset === 1); + assert.equal(janOffset1?.userCount, 2, 'January offset 1 should have 2 users'); + assert.equal(janOffset1?.retentionRate, 100, 'January offset 1 retention should be 100%'); +}); + +test('cohortService - active metric type', async () => { + const { cohortService } = await makeTestCohort(); + + cohortService.recordRegistration(1, 'USER_A', '2024-01-01T10:00:00Z'); + cohortService.recordRegistration(1, 'USER_B', '2024-01-01T14:00:00Z'); + + // USER_A is active in week 0 + cohortService.recordActive(1, 'USER_A', '2024-01-02T12:00:00Z'); + + // Both users are active in week 1 + cohortService.recordActive(1, 'USER_A', '2024-01-08T12:00:00Z'); + cohortService.recordActive(1, 'USER_B', '2024-01-09T15:00:00Z'); + + const analysis = await cohortService.getCohortAnalysis(1, 'week', 'active'); + + const cohort = analysis[0]; + assert.equal(cohort.cohortSize, 2, 'Cohort should have 2 users'); + + const offset0 = cohort.retention.find((r) => r.offset === 0); + assert.equal(offset0?.userCount, 1, 'Week 0 should have 1 active user'); + + const offset1 = cohort.retention.find((r) => r.offset === 1); + assert.equal(offset1?.userCount, 2, 'Week 1 should have 2 active users'); + assert.equal(offset1?.retentionRate, 100, 'Week 1 retention should be 100%'); +}); + +test('cohortService - getRetentionCurve for specific cohort', async () => { + const { cohortService } = await makeTestCohort(); + + cohortService.recordRegistration(1, 'USER_A', '2024-01-01T10:00:00Z'); + cohortService.recordRegistration(1, 'USER_B', '2024-01-01T14:00:00Z'); + + cohortService.recordClaim(1, 'USER_A', '2024-01-02T12:00:00Z'); + cohortService.recordClaim(1, 'USER_B', '2024-01-09T15:00:00Z'); + + // Force computation first + await cohortService.computeCohorts(1, 'week', 'claimed'); + + const curve = await cohortService.getRetentionCurve(1, '2024-W01', 'week', 'claimed'); + + assert.equal(curve.cohortPeriod, '2024-W01'); + assert.equal(curve.cohortSize, 2); + assert.equal(curve.retention.length, 2); +}); + +test('cohortService - recompute clears cache', async () => { + const { cohortService, cohortRepo } = await makeTestCohort(); + + cohortService.recordRegistration(1, 'USER_A', '2024-01-01T10:00:00Z'); + cohortService.recordClaim(1, 'USER_A', '2024-01-02T12:00:00Z'); + + // First computation + await cohortService.getCohortAnalysis(1, 'week', 'claimed'); + + // Add more data + cohortService.recordRegistration(1, 'USER_B', '2024-01-03T10:00:00Z'); + cohortService.recordClaim(1, 'USER_B', '2024-01-04T12:00:00Z'); + + // Recompute + const analysis = await cohortService.getCohortAnalysis(1, 'week', 'claimed', { recompute: true }); + + const cohort = analysis[0]; + assert.equal(cohort.cohortSize, 2, 'Cohort should now have 2 users after recompute'); +}); + +test('cohortService - empty cohort handling', async () => { + const { cohortService } = await makeTestCohort(); + + const analysis = await cohortService.getCohortAnalysis(1, 'week', 'claimed'); + + assert.equal(analysis.length, 0, 'Should have no cohorts when no data exists'); +}); + +test('cohortService - throws error for non-existent cohort', async () => { + const { cohortService } = await makeTestCohort(); + + await assert.rejects( + async () => { + await cohortService.getRetentionCurve(1, '2024-W01', 'week', 'claimed'); + }, + /Cohort not found/, + 'Should throw error for non-existent cohort', + ); +});