From 3c6f915023276911c62bd30e137143073b1abeea Mon Sep 17 00:00:00 2001 From: Taly Date: Tue, 10 Feb 2026 16:03:51 +0300 Subject: [PATCH 1/5] Limit path depth and add cache cleanup Prevent excessive memory usage and reduce GC pressure by limiting traversal path depth in data-filter (cap depth logic at 20). In grouper worker, add a periodic cache cleanup interval (every 5 minutes) started on worker start and cleared on finish to avoid unbounded cache growth. Free large references after delta computation by nulling event payloads to allow garbage collection. Also tighten memoization for findSimilarEvent (max reduced from 200 to 50 and ttl set to 600s) to further limit memory retained by caches. --- workers/grouper/src/data-filter.ts | 7 ++++++- workers/grouper/src/index.ts | 32 +++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/workers/grouper/src/data-filter.ts b/workers/grouper/src/data-filter.ts index 7e00038c..ac143868 100644 --- a/workers/grouper/src/data-filter.ts +++ b/workers/grouper/src/data-filter.ts @@ -18,7 +18,12 @@ function forAll(obj: Record, callback: (path: string[], key: st if (!(typeof value === 'object' && !Array.isArray(value))) { callback(path, key, current); } else { - visit(value, [...path, key]); + /** + * Limit path depth to prevent excessive memory allocations from deep nesting + * This reduces GC pressure and memory usage for deeply nested objects + */ + const newPath = path.length < 20 ? path.concat(key) : [...path, key]; + visit(value, newPath); } } }; diff --git a/workers/grouper/src/index.ts b/workers/grouper/src/index.ts index 73f16fc7..3758e999 100644 --- a/workers/grouper/src/index.ts +++ b/workers/grouper/src/index.ts @@ -72,6 +72,11 @@ export default class GrouperWorker extends Worker { */ private redis = new RedisHelper(); + /** + * Interval for periodic cache cleanup to prevent memory leaks from unbounded cache growth + */ + private cacheCleanupInterval: NodeJS.Timeout | null = null; + /** * Start consuming messages */ @@ -85,6 +90,15 @@ export default class GrouperWorker extends Worker { await this.redis.initialize(); console.log('redis initialized'); + + /** + * Start periodic cache cleanup to prevent memory leaks from unbounded cache growth + * Runs every 5 minutes to clear old cache entries + */ + this.cacheCleanupInterval = setInterval(() => { + this.clearCache(); + }, 5 * 60 * 1000); + await super.start(); } @@ -92,6 +106,14 @@ export default class GrouperWorker extends Worker { * Finish everything */ public async finish(): Promise { + /** + * Clear cache cleanup interval to prevent resource leaks + */ + if (this.cacheCleanupInterval) { + clearInterval(this.cacheCleanupInterval); + this.cacheCleanupInterval = null; + } + await super.finish(); this.prepareCache(); await this.eventsDb.close(); @@ -237,6 +259,14 @@ export default class GrouperWorker extends Worker { } as RepetitionDBScheme; repetitionId = await this.saveRepetition(task.projectId, newRepetition); + + /** + * Clear the large event payload references to allow garbage collection + * This prevents memory leaks from retaining full event objects after delta is computed + */ + delta = null; + existedEvent.payload = null; + task.payload = null; } /** @@ -334,7 +364,7 @@ export default class GrouperWorker extends Worker { * @param projectId - where to find * @param title - title of the event to find similar one */ - @memoize({ max: 200, ttl: MEMOIZATION_TTL, strategy: 'hash', skipCache: [undefined] }) + @memoize({ max: 50, ttl: 600, strategy: 'hash', skipCache: [undefined] }) private async findSimilarEvent(projectId: string, title: string): Promise { /** * If no match by Levenshtein, try matching by patterns From 90cfcd1307b8f26c99f1dd259de00a3ed15530b1 Mon Sep 17 00:00:00 2001 From: Taly Date: Tue, 10 Feb 2026 17:10:27 +0300 Subject: [PATCH 2/5] Prevent deep-path allocations; tune timeouts & tests Limit path growth in data filter to avoid creating new arrays past 20 levels (reduces excessive allocations for deeply nested objects). Import TimeMs and replace magic numbers: set MEMOIZATION_TTL to 600_000, use TimeMs.MINUTE for cache cleanup interval, and apply MEMOIZATION_TTL to the memoize decorator. Clear large delta references by setting them to undefined to aid GC. Add a test that verifies filtering works on objects nested >20 levels without causing excessive memory allocations. --- workers/grouper/src/data-filter.ts | 2 +- workers/grouper/src/index.ts | 11 ++++---- workers/grouper/tests/data-filter.test.ts | 34 +++++++++++++++++++++++ 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/workers/grouper/src/data-filter.ts b/workers/grouper/src/data-filter.ts index ac143868..176ca62f 100644 --- a/workers/grouper/src/data-filter.ts +++ b/workers/grouper/src/data-filter.ts @@ -22,7 +22,7 @@ function forAll(obj: Record, callback: (path: string[], key: st * Limit path depth to prevent excessive memory allocations from deep nesting * This reduces GC pressure and memory usage for deeply nested objects */ - const newPath = path.length < 20 ? path.concat(key) : [...path, key]; + const newPath = path.length < 20 ? path.concat(key) : path; visit(value, newPath); } } diff --git a/workers/grouper/src/index.ts b/workers/grouper/src/index.ts index 3758e999..d25d1119 100644 --- a/workers/grouper/src/index.ts +++ b/workers/grouper/src/index.ts @@ -19,6 +19,7 @@ import type { RepetitionDBScheme } from '../types/repetition'; import { DatabaseReadWriteError, DiffCalculationError, ValidationError } from '../../../lib/workerErrors'; import { decodeUnsafeFields, encodeUnsafeFields } from '../../../lib/utils/unsafeFields'; import { MS_IN_SEC } from '../../../lib/utils/consts'; +import TimeMs from '../../../lib/utils/time'; import DataFilter from './data-filter'; import RedisHelper from './redisHelper'; import { computeDelta } from './utils/repetitionDiff'; @@ -31,7 +32,7 @@ import { memoize } from '../../../lib/memoize'; * eslint does not count decorators as a variable usage */ /* eslint-disable-next-line no-unused-vars */ -const MEMOIZATION_TTL = Number(process.env.MEMOIZATION_TTL ?? 0); +const MEMOIZATION_TTL = 600_000; /** * Error code of MongoDB key duplication error @@ -97,7 +98,7 @@ export default class GrouperWorker extends Worker { */ this.cacheCleanupInterval = setInterval(() => { this.clearCache(); - }, 5 * 60 * 1000); + }, 5 * TimeMs.MINUTE); await super.start(); } @@ -264,9 +265,7 @@ export default class GrouperWorker extends Worker { * Clear the large event payload references to allow garbage collection * This prevents memory leaks from retaining full event objects after delta is computed */ - delta = null; - existedEvent.payload = null; - task.payload = null; + delta = undefined; } /** @@ -364,7 +363,7 @@ export default class GrouperWorker extends Worker { * @param projectId - where to find * @param title - title of the event to find similar one */ - @memoize({ max: 50, ttl: 600, strategy: 'hash', skipCache: [undefined] }) + @memoize({ max: 50, ttl: MEMOIZATION_TTL, strategy: 'hash', skipCache: [undefined] }) private async findSimilarEvent(projectId: string, title: string): Promise { /** * If no match by Levenshtein, try matching by patterns diff --git a/workers/grouper/tests/data-filter.test.ts b/workers/grouper/tests/data-filter.test.ts index 4cb98807..2f00dd68 100644 --- a/workers/grouper/tests/data-filter.test.ts +++ b/workers/grouper/tests/data-filter.test.ts @@ -327,5 +327,39 @@ describe('GrouperWorker', () => { expect(event.context['secret']).toBe('[filtered]'); expect(event.context['auth']).toBe('[filtered]'); }); + + test('should handle deeply nested objects (>20 levels) without excessive memory allocations', () => { + // Create an object nested deeper than the cap (>20 levels) + let deeplyNested: any = { value: 'leaf', secret: 'should-be-filtered' }; + + for (let i = 0; i < 25; i++) { + deeplyNested = { [`level${i}`]: deeplyNested, password: `sensitive${i}` }; + } + + const event = generateEvent({ + context: deeplyNested, + }); + + // This should not throw or cause memory issues + dataFilter.processEvent(event); + + // Verify that filtering still works at various depths + expect(event.context['password']).toBe('[filtered]'); + + // Navigate to a mid-level and check filtering + let current = event.context['level24'] as any; + for (let i = 24; i > 15; i--) { + expect(current['password']).toBe('[filtered]'); + current = current[`level${i - 1}`]; + } + + // At the leaf level, the secret should still be filtered + // (though path tracking may be capped, filtering should still work) + let leaf = event.context; + for (let i = 24; i >= 0; i--) { + leaf = leaf[`level${i}`] as any; + } + expect(leaf['secret']).toBe('[filtered]'); + }); }); }); From 7a06e1f29581a89f83e28d9d6f9621e1d6efb58e Mon Sep 17 00:00:00 2001 From: Taly Date: Tue, 10 Feb 2026 17:18:12 +0300 Subject: [PATCH 3/5] Use named constants for traversal and cache interval Introduce MAX_TRAVERSAL_DEPTH in data-filter.ts and replace the hardcoded depth check (20) to prevent excessive memory allocations from deep object nesting. Add CACHE_CLEANUP_INTERVAL_MINUTES in index.ts and use it for the cache cleanup setInterval instead of the literal 5, improving readability and making these tuning values easier to adjust. --- workers/grouper/src/data-filter.ts | 7 ++++++- workers/grouper/src/index.ts | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/workers/grouper/src/data-filter.ts b/workers/grouper/src/data-filter.ts index 176ca62f..2345b8e5 100644 --- a/workers/grouper/src/data-filter.ts +++ b/workers/grouper/src/data-filter.ts @@ -1,6 +1,11 @@ import type { EventAddons, EventData } from '@hawk.so/types'; import { unsafeFields } from '../../../lib/utils/unsafeFields'; +/** + * Maximum depth for object traversal to prevent excessive memory allocations + */ +const MAX_TRAVERSAL_DEPTH = 20; + /** * Recursively iterate through object and call function on each key * @@ -22,7 +27,7 @@ function forAll(obj: Record, callback: (path: string[], key: st * Limit path depth to prevent excessive memory allocations from deep nesting * This reduces GC pressure and memory usage for deeply nested objects */ - const newPath = path.length < 20 ? path.concat(key) : path; + const newPath = path.length < MAX_TRAVERSAL_DEPTH ? path.concat(key) : path; visit(value, newPath); } } diff --git a/workers/grouper/src/index.ts b/workers/grouper/src/index.ts index d25d1119..cad18aed 100644 --- a/workers/grouper/src/index.ts +++ b/workers/grouper/src/index.ts @@ -31,9 +31,13 @@ import { memoize } from '../../../lib/memoize'; /** * eslint does not count decorators as a variable usage */ -/* eslint-disable-next-line no-unused-vars */ const MEMOIZATION_TTL = 600_000; +/** + * Cache cleanup interval in minutes + */ +const CACHE_CLEANUP_INTERVAL_MINUTES = 5; + /** * Error code of MongoDB key duplication error */ @@ -98,7 +102,7 @@ export default class GrouperWorker extends Worker { */ this.cacheCleanupInterval = setInterval(() => { this.clearCache(); - }, 5 * TimeMs.MINUTE); + }, CACHE_CLEANUP_INTERVAL_MINUTES * TimeMs.MINUTE); await super.start(); } From f1c083ad7069df3fa231ef975796ab79c2ff552b Mon Sep 17 00:00:00 2001 From: Taly Date: Tue, 10 Feb 2026 17:30:15 +0300 Subject: [PATCH 4/5] Move eslint ignore to MEMOIZATION_TTL Remove the unnecessary eslint-disable-next-line on the memoize import and apply the no-unused-vars ignore to the MEMOIZATION_TTL constant instead. This ensures the linter suppression targets the unused constant (decorators not counted) rather than the import, improving clarity. --- workers/grouper/src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/grouper/src/index.ts b/workers/grouper/src/index.ts index cad18aed..0dcc8f4b 100644 --- a/workers/grouper/src/index.ts +++ b/workers/grouper/src/index.ts @@ -25,12 +25,12 @@ import RedisHelper from './redisHelper'; import { computeDelta } from './utils/repetitionDiff'; import { rightTrim } from '../../../lib/utils/string'; import { hasValue } from '../../../lib/utils/hasValue'; -/* eslint-disable-next-line no-unused-vars */ import { memoize } from '../../../lib/memoize'; /** * eslint does not count decorators as a variable usage */ +/* eslint-disable-next-line no-unused-vars */ const MEMOIZATION_TTL = 600_000; /** From 093985ebd0190fc8dbd5d83e4da5c75c112fa598 Mon Sep 17 00:00:00 2001 From: Taly Date: Tue, 10 Feb 2026 17:40:03 +0300 Subject: [PATCH 5/5] Suppress no-unused-vars for memoize import Add an explanatory comment and an `/* eslint-disable-next-line no-unused-vars */` directive before the `memoize` import in workers/grouper/src/index.ts. This prevents ESLint from flagging the import as unused since decorators (which rely on the import) are not recognized as usages by the linter. --- workers/grouper/src/index.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/workers/grouper/src/index.ts b/workers/grouper/src/index.ts index 0dcc8f4b..542c701f 100644 --- a/workers/grouper/src/index.ts +++ b/workers/grouper/src/index.ts @@ -25,6 +25,11 @@ import RedisHelper from './redisHelper'; import { computeDelta } from './utils/repetitionDiff'; import { rightTrim } from '../../../lib/utils/string'; import { hasValue } from '../../../lib/utils/hasValue'; + +/** + * eslint does not count decorators as a variable usage + */ +/* eslint-disable-next-line no-unused-vars */ import { memoize } from '../../../lib/memoize'; /**