Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "hawk.workers",
"private": true,
"version": "0.1.1",
"version": "0.1.2",
"description": "Hawk workers",
"repository": "git@github.com:codex-team/hawk.workers.git",
"license": "BUSL-1.1",
Expand Down
2 changes: 1 addition & 1 deletion workers/grouper/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "hawk-worker-grouper",
"version": "0.0.1",
"version": "0.0.2",
"description": "Accepts processed errors from language-workers and saves it to the DB with grouping of similar ones. ",
"main": "src/index.ts",
"repository": "https://github.com/codex-team/hawk.workers/tree/master/workers/grouper",
Expand Down
108 changes: 100 additions & 8 deletions workers/grouper/src/data-filter.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import type { EventAddons, EventData } from '@hawk.so/types';
import { unsafeFields } from '../../../lib/utils/unsafeFields';

/**
* Maximum depth for object traversal to prevent excessive memory allocations
*/
const MAX_TRAVERSAL_DEPTH = 20;

/**
* Recursively iterate through object and call function on each key
*
Expand All @@ -18,7 +23,12 @@ function forAll(obj: Record<string, unknown>, callback: (path: string[], key: st
if (!(typeof value === 'object' && !Array.isArray(value))) {
callback(path, key, current);
} else {
visit(value, [...path, key]);
/**
* Limit path depth to prevent excessive memory allocations from deep nesting
* This reduces GC pressure and memory usage for deeply nested objects
*/
const newPath = path.length < MAX_TRAVERSAL_DEPTH ? path.concat(key) : path;
visit(value, newPath);
}
}
};
Expand All @@ -36,24 +46,88 @@ export default class DataFilter {
private filteredValuePlaceholder = '[filtered]';

/**
* Possibly sensitive keys
* Possibly sensitive keys (lowercase; keys are compared via key.toLowerCase())
*/
private possiblySensitiveDataKeys = new Set([
'pan',
'secret',
'credentials',
'card[number]',
'password',
/**
* Authorization and sessions
*/
'auth',
'authorization',
'access_token',
'accesstoken',
'token',
'jwt',
'session',
'sessionid',
'session_id',
/**
* API keys and secure tokens
*/
'api_key',
'apikey',
'x-api-key',
'x-auth-token',
'bearer',
'client_secret',
'secret',
'credentials',
/**
* Passwords
*/
'password',
'passwd',
'mysql_pwd',
'oldpassword',
'old-password',
'old_password',
'newpassword',
'new-password',
'new_password',
/**
* Encryption keys
*/
'private_key',
'ssh_key',
/**
* Payments data
*/
'card',
'cardnumber',
'card[number]',
'creditcard',
'credit_card',
'pan',
'pin',
'security_code',
'stripetoken',
'cloudpayments_public_id',
'cloudpayments_secret',
/**
* Config and connections
*/
'dsn',
/**
* Personal data
*/
'ssn',
]);

/**
* Bank card PAN Regex
*/
private bankCardRegex = /^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\d{3})\d{11})$/g;

/**
* MongoDB ObjectId Regex (24 hexadecimal characters)
*/
private objectIdRegex = /^[0-9a-fA-F]{24}$/;

/**
* UUID Regex - matches UUIDs with all dashes (8-4-4-4-12 format) or no dashes (32 hex chars)
*/
private uuidRegex = /^(?:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}|[0-9a-fA-F]{32})$/;

/**
* Accept event and process 'addons' and 'context' fields.
* It mutates the original object
Expand Down Expand Up @@ -96,12 +170,30 @@ export default class DataFilter {
return value;
}

/**
* Check if value matches MongoDB ObjectId pattern (24 hex chars)
* ObjectIds should not be filtered
*/
if (this.objectIdRegex.test(value)) {
return value;
}

/**
* Check if value matches UUID pattern (with or without dashes)
* UUIDs should not be filtered
*/
if (this.uuidRegex.test(value)) {
return value;
}

/**
* Remove all non-digit chars
*/
const clean = value.replace(/\D/g, '');

// Reset last index to 0
/**
* Reset last index to 0
*/
this.bankCardRegex.lastIndex = 0;
if (!this.bankCardRegex.test(clean)) {
return value;
Expand Down
42 changes: 40 additions & 2 deletions workers/grouper/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,29 @@ import type { RepetitionDBScheme } from '../types/repetition';
import { DatabaseReadWriteError, DiffCalculationError, ValidationError } from '../../../lib/workerErrors';
import { decodeUnsafeFields, encodeUnsafeFields } from '../../../lib/utils/unsafeFields';
import { MS_IN_SEC } from '../../../lib/utils/consts';
import TimeMs from '../../../lib/utils/time';
import DataFilter from './data-filter';
import RedisHelper from './redisHelper';
import { computeDelta } from './utils/repetitionDiff';
import { rightTrim } from '../../../lib/utils/string';
import { hasValue } from '../../../lib/utils/hasValue';

/**
* eslint does not count decorators as a variable usage
*/
/* eslint-disable-next-line no-unused-vars */
import { memoize } from '../../../lib/memoize';

/**
* eslint does not count decorators as a variable usage
*/
/* eslint-disable-next-line no-unused-vars */
const MEMOIZATION_TTL = Number(process.env.MEMOIZATION_TTL ?? 0);
const MEMOIZATION_TTL = 600_000;

/**
* Cache cleanup interval in minutes
*/
const CACHE_CLEANUP_INTERVAL_MINUTES = 5;

/**
* Error code of MongoDB key duplication error
Expand Down Expand Up @@ -72,6 +82,11 @@ export default class GrouperWorker extends Worker {
*/
private redis = new RedisHelper();

/**
* Interval for periodic cache cleanup to prevent memory leaks from unbounded cache growth
*/
private cacheCleanupInterval: NodeJS.Timeout | null = null;

/**
* Start consuming messages
*/
Expand All @@ -85,13 +100,30 @@ export default class GrouperWorker extends Worker {

await this.redis.initialize();
console.log('redis initialized');

/**
* Start periodic cache cleanup to prevent memory leaks from unbounded cache growth
* Runs every 5 minutes to clear old cache entries
*/
this.cacheCleanupInterval = setInterval(() => {
this.clearCache();
}, CACHE_CLEANUP_INTERVAL_MINUTES * TimeMs.MINUTE);

await super.start();
}

/**
* Finish everything
*/
public async finish(): Promise<void> {
/**
* Clear cache cleanup interval to prevent resource leaks
*/
if (this.cacheCleanupInterval) {
clearInterval(this.cacheCleanupInterval);
this.cacheCleanupInterval = null;
}

await super.finish();
this.prepareCache();
await this.eventsDb.close();
Expand Down Expand Up @@ -237,6 +269,12 @@ export default class GrouperWorker extends Worker {
} as RepetitionDBScheme;

repetitionId = await this.saveRepetition(task.projectId, newRepetition);

/**
* Clear the large event payload references to allow garbage collection
* This prevents memory leaks from retaining full event objects after delta is computed
*/
delta = undefined;
}

/**
Expand Down Expand Up @@ -334,7 +372,7 @@ export default class GrouperWorker extends Worker {
* @param projectId - where to find
* @param title - title of the event to find similar one
*/
@memoize({ max: 200, ttl: MEMOIZATION_TTL, strategy: 'hash', skipCache: [undefined] })
@memoize({ max: 50, ttl: MEMOIZATION_TTL, strategy: 'hash', skipCache: [undefined] })
private async findSimilarEvent(projectId: string, title: string): Promise<GroupedEventDBScheme | undefined> {
/**
* If no match by Levenshtein, try matching by patterns
Expand Down
Loading
Loading