Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/agent/loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ import {
updateAgentStatus,
recordRebalanceCheck,
recordRebalanceTriggered,
recordDbOperation
recordDbOperation,
recordBackgroundJob,
recordExternalServiceError
} from '../utils/metrics';

let isRunning = false;
Expand Down Expand Up @@ -134,6 +136,7 @@ async function rebalanceCheckJob(): Promise<void> {
// Record Prometheus metrics
recordRebalanceCheck('success');
recordDbOperation('rebalance_check', duration / 1000);
recordBackgroundJob('rebalance_check', 'success', duration / 1000);

logger.info(`${jobName} completed`, {
duration,
Expand All @@ -155,6 +158,7 @@ async function rebalanceCheckJob(): Promise<void> {
// Record Prometheus metrics
recordRebalanceCheck('failed');
recordDbOperation('rebalance_check', duration / 1000);
recordBackgroundJob('rebalance_check', 'failed', duration / 1000);

await logAgentAction('ANALYZE', 'FAILED', {
input: { correlationId },
Expand Down Expand Up @@ -198,6 +202,7 @@ async function snapshotJob(): Promise<void> {
const duration = Date.now() - startTime;
// Record Prometheus metrics
recordDbOperation('snapshot_job', duration / 1000);
recordBackgroundJob('snapshot', 'success', duration / 1000);
logger.info(`${jobName} scheduled`, { duration });
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
Expand All @@ -208,6 +213,7 @@ async function snapshotJob(): Promise<void> {
});
// Record Prometheus metrics
recordDbOperation('snapshot_job', duration / 1000);
recordBackgroundJob('snapshot', 'failed', duration / 1000);
}
});
}
Expand Down Expand Up @@ -254,10 +260,13 @@ export async function startAgentLoop(): Promise<void> {
try {
logger.info('Daily protocol scan started', { correlationId });
updateAgentHeartbeat();
const scanStart = Date.now();
const protocols = await scanAllProtocols();
const scanDuration = (Date.now() - scanStart) / 1000;
await logAgentAction('SCAN', 'SUCCESS', {
input: { correlationId, protocolsScanned: protocols.length },
});
recordBackgroundJob('protocol_scan', 'success', scanDuration);
logger.info('Daily protocol scan complete', {
correlationId,
protocolsScanned: protocols.length,
Expand All @@ -267,6 +276,7 @@ export async function startAgentLoop(): Promise<void> {
correlationId,
error: error instanceof Error ? error.message : 'Unknown error',
});
recordBackgroundJob('protocol_scan', 'failed', 0);
await logAgentAction('SCAN', 'FAILED', {
input: { correlationId },
error: error instanceof Error ? error.message : 'Unknown error',
Expand Down
6 changes: 6 additions & 0 deletions src/jobs/sessionCleanup.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
import db from '../db';
import { logger } from '../utils/logger';
import { config } from '../config/env';
import { recordBackgroundJob } from '../utils/metrics';

/**
* Delete all sessions whose expiration timestamp is in the past.
* Safe to call multiple times — it is idempotent.
*/
export async function cleanupExpiredSessions(): Promise<void> {
const startTime = Date.now();
try {
const result = await db.session.deleteMany({
where: { expiresAt: { lt: new Date() } },
});
const duration = (Date.now() - startTime) / 1000;
if (result.count > 0) {
logger.info(`[SessionCleanup] Removed ${result.count} expired session(s)`);
}
recordBackgroundJob('session_cleanup', 'success', duration);
} catch (error) {
const duration = (Date.now() - startTime) / 1000;
logger.error('[SessionCleanup] Failed to clean up sessions:', error);
recordBackgroundJob('session_cleanup', 'failed', duration);
}
}

Expand Down
6 changes: 6 additions & 0 deletions src/middleware/logger.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
import { Request, Response, NextFunction } from 'express'
import { logger } from '../utils/logger'
import { recordHttpRequest } from '../utils/metrics'

export function requestLogger(req: Request, res: Response, next: NextFunction) {
const start = Date.now()

res.on('finish', () => {
const duration = Date.now() - start
const durationSeconds = duration / 1000

logger.info(`${req.method} ${req.path}`, {
correlationId: req.correlationId,
status: res.statusCode,
duration: `${duration}ms`,
ip: req.ip,
})

const route = req.route?.path || req.path
recordHttpRequest(req.method, route, res.statusCode, durationSeconds)
})

next()
Expand Down
55 changes: 55 additions & 0 deletions src/utils/metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ const register = new client.Registry()
// Add default metrics (CPU, memory, etc.)
client.collectDefaultMetrics({ register })

// ── Global default label: env ────────────────────────────────────────────────
// Every metric emitted through this registry will carry `env` automatically,
// satisfying the acceptance criterion without modifying every Counter/Histogram.
register.setDefaultLabels({
env: process.env.NODE_ENV || 'development',
})

// ── Event Processing Metrics ─────────────────────────────────────────────────────

export const eventsProcessedTotal = new client.Counter({
Expand Down Expand Up @@ -175,6 +182,32 @@ export const analyticsRequestDuration = new client.Histogram({
registers: [register],
})

// ── Background Job Metrics ──────────────────────────────────────────────────

export const backgroundJobsTotal = new client.Counter({
name: 'background_jobs_total',
help: 'Total number of background job executions',
labelNames: ['job', 'status'] as const,
registers: [register],
})

export const backgroundJobDuration = new client.Histogram({
name: 'background_job_duration_seconds',
help: 'Duration of background job executions in seconds',
labelNames: ['job'] as const,
buckets: [0.1, 0.5, 1, 2, 5, 10, 30, 60],
registers: [register],
})

// ── External Service Error Metrics ──────────────────────────────────────────

export const externalServiceErrorsTotal = new client.Counter({
name: 'external_service_errors_total',
help: 'Total number of external service errors',
labelNames: ['service', 'error_type'] as const,
registers: [register],
})

// ── Helper Functions ─────────────────────────────────────────────────────────────

/**
Expand Down Expand Up @@ -284,6 +317,28 @@ export function recordAnalyticsRequest(
analyticsRequestDuration.observe({ endpoint }, durationSeconds)
}

/**
* Record a background job execution
*/
export function recordBackgroundJob(
job: string,
status: 'success' | 'failed',
durationSeconds: number
): void {
backgroundJobsTotal.inc({ job, status })
backgroundJobDuration.observe({ job }, durationSeconds)
}

/**
* Record an external service error
*/
export function recordExternalServiceError(
service: string,
errorType: string
): void {
externalServiceErrorsTotal.inc({ service, error_type: errorType })
}

/**
* Get metrics for Prometheus scraping
*/
Expand Down
Loading