From ce815e5f29d6271c4fbffb6ed53eca603221463e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Thu, 12 Jan 2023 11:26:59 +0100 Subject: [PATCH] feat: report app names only if below a threshold (#2737) ## About the changes Introduce a snapshot version of instanceStats inside instance-stats-service to provide a cached state of the statistics without compromising the DB. ### Important notes Some rule-of-thumb applied in the PR that can be changed: 1. The snapshot refresh time 2. The threshold to report appName with the metrics ## Discussion points 1. The snapshot could be limited to just the information needed (things like `hasOIDC` don't change until there's a restart), to optimize the memory usage 3. metrics.ts (used to expose Prometheus metrics) has a [refresh interval of 2hs](https://github.com/Unleash/unleash/blob/2d16730cc24ca9d62bd7eaff451853e58cb83482/src/lib/metrics.ts#L189-L195), but with this implementation, we could remove that background task and rely on the snapshot 4. We could additionally update the snapshot every time someone queries the DB to fetch stats (`getStats()` method), but it may increase complexity without a significant benefit Co-authored-by: Mateusz Kwasniewski Co-authored-by: Simon Hornby --- src/lib/app.ts | 8 +++- src/lib/middleware/response-time-metrics.ts | 11 ++++- src/lib/services/index.ts | 5 ++ .../services/instance-stats-service.test.ts | 47 +++++++++++++++++++ src/lib/services/instance-stats-service.ts | 34 +++++++++++++- src/lib/services/scheduler-service.test.ts | 22 +++++++-- src/lib/services/scheduler-service.ts | 10 +++- 7 files changed, 126 insertions(+), 11 deletions(-) create mode 100644 src/lib/services/instance-stats-service.test.ts diff --git a/src/lib/app.ts b/src/lib/app.ts index 1e0226d03b..44f24bca3a 100644 --- a/src/lib/app.ts +++ b/src/lib/app.ts @@ -45,7 +45,13 @@ export default async function getApp( app.set('port', config.server.port); app.locals.baseUriPath = baseUriPath; if (config.server.serverMetrics && config.eventBus) { - app.use(responseTimeMetrics(config.eventBus, config.flagResolver)); + app.use( + responseTimeMetrics( + config.eventBus, + config.flagResolver, + services.instanceStatsService, + ), + ); } app.use(requestLogger(config)); diff --git a/src/lib/middleware/response-time-metrics.ts b/src/lib/middleware/response-time-metrics.ts index 16ea0f7481..3cce701f2a 100644 --- a/src/lib/middleware/response-time-metrics.ts +++ b/src/lib/middleware/response-time-metrics.ts @@ -2,21 +2,28 @@ import * as responseTime from 'response-time'; import EventEmitter from 'events'; import { REQUEST_TIME } from '../metric-events'; import { IFlagResolver } from '../types/experimental'; +import { InstanceStatsService } from 'lib/services'; // eslint-disable-next-line @typescript-eslint/naming-convention const _responseTime = responseTime.default; +const appNameReportingThreshold = 100; + export function responseTimeMetrics( eventBus: EventEmitter, flagResolver: IFlagResolver, + instanceStatsService: Pick, ): any { return _responseTime((req, res, time) => { const { statusCode } = res; - const pathname = req.route ? req.baseUrl + req.route.path : '(hidden)'; let appName; - if (flagResolver.isEnabled('responseTimeWithAppName')) { + if ( + flagResolver.isEnabled('responseTimeWithAppName') && + (instanceStatsService.getAppCountSnapshot('7d') || + appNameReportingThreshold) < appNameReportingThreshold + ) { appName = req.headers['unleash-appname'] ?? req.query.appName; } diff --git a/src/lib/services/index.ts b/src/lib/services/index.ts index 30012da735..0f4d2950f9 100644 --- a/src/lib/services/index.ts +++ b/src/lib/services/index.ts @@ -152,6 +152,11 @@ export const createServices = ( minutesToMilliseconds(3), ); + schedulerService.schedule( + instanceStatsService.refreshStatsSnapshot.bind(instanceStatsService), + minutesToMilliseconds(5), + ); + return { accessService, addonService, diff --git a/src/lib/services/instance-stats-service.test.ts b/src/lib/services/instance-stats-service.test.ts new file mode 100644 index 0000000000..2f24ffa886 --- /dev/null +++ b/src/lib/services/instance-stats-service.test.ts @@ -0,0 +1,47 @@ +import { createTestConfig } from '../../test/config/test-config'; +import { InstanceStatsService } from './instance-stats-service'; +import createStores from '../../test/fixtures/store'; +import VersionService from './version-service'; + +let instanceStatsService: InstanceStatsService; +let versionService: VersionService; + +beforeEach(() => { + const config = createTestConfig(); + const stores = createStores(); + versionService = new VersionService(stores, config); + instanceStatsService = new InstanceStatsService( + stores, + config, + versionService, + ); + + jest.spyOn(instanceStatsService, 'refreshStatsSnapshot'); + jest.spyOn(instanceStatsService, 'getStats'); + + // validate initial state without calls to these methods + expect(instanceStatsService.refreshStatsSnapshot).toBeCalledTimes(0); + expect(instanceStatsService.getStats).toBeCalledTimes(0); +}); + +test('get snapshot should not call getStats', async () => { + await instanceStatsService.refreshStatsSnapshot(); + expect(instanceStatsService.getStats).toBeCalledTimes(1); + + // subsequent calls to getStatsSnapshot don't call getStats + for (let i = 0; i < 3; i++) { + const stats = instanceStatsService.getStatsSnapshot(); + expect(stats.clientApps).toStrictEqual([ + { range: 'allTime', count: 0 }, + { range: '30d', count: 0 }, + { range: '7d', count: 0 }, + ]); + } + // after querying the stats snapshot no call to getStats should be issued + expect(instanceStatsService.getStats).toBeCalledTimes(1); +}); + +test('before the snapshot is refreshed we can still get the appCount', async () => { + expect(instanceStatsService.refreshStatsSnapshot).toBeCalledTimes(0); + expect(instanceStatsService.getAppCountSnapshot('7d')).toBeUndefined(); +}); diff --git a/src/lib/services/instance-stats-service.ts b/src/lib/services/instance-stats-service.ts index 8aadace9cc..b900557432 100644 --- a/src/lib/services/instance-stats-service.ts +++ b/src/lib/services/instance-stats-service.ts @@ -66,6 +66,10 @@ export class InstanceStatsService { private clientInstanceStore: IClientInstanceStore; + private snapshot?: InstanceStats; + + private appCount?: Partial<{ [key in TimeRange]: number }>; + constructor( { featureToggleStore, @@ -111,7 +115,23 @@ export class InstanceStatsService { this.logger = getLogger('services/stats-service.js'); } - async getToggleCount(): Promise { + async refreshStatsSnapshot(): Promise { + try { + this.snapshot = await this.getStats(); + const appCountReplacement = {}; + this.snapshot.clientApps?.forEach((appCount) => { + appCountReplacement[appCount.range] = appCount.count; + }); + this.appCount = appCountReplacement; + } catch (error) { + this.logger.warn( + 'Unable to retrieve statistics. This will be retried', + error, + ); + } + } + + getToggleCount(): Promise { return this.featureToggleStore.count({ archived: false, }); @@ -133,9 +153,11 @@ export class InstanceStatsService { return settings?.enabled || false; } + /** + * use getStatsSnapshot for low latency, sacrificing data-freshness + */ async getStats(): Promise { const versionInfo = this.versionService.getVersionInfo(); - const [ featureToggles, users, @@ -184,6 +206,10 @@ export class InstanceStatsService { }; } + getStatsSnapshot(): InstanceStats | undefined { + return this.snapshot; + } + async getLabeledAppCounts(): Promise< { range: TimeRange; count: number }[] > { @@ -207,6 +233,10 @@ export class InstanceStatsService { ]; } + getAppCountSnapshot(range: TimeRange): number | undefined { + return this.appCount?.[range]; + } + async getSignedStats(): Promise { const instanceStats = await this.getStats(); diff --git a/src/lib/services/scheduler-service.test.ts b/src/lib/services/scheduler-service.test.ts index 35401b52d4..ec38ac36ee 100644 --- a/src/lib/services/scheduler-service.test.ts +++ b/src/lib/services/scheduler-service.test.ts @@ -23,6 +23,16 @@ const getLogger = () => { return logger; }; +test('Schedules job immediately', async () => { + const schedulerService = new SchedulerService(getLogger()); + const job = jest.fn(); + + schedulerService.schedule(job, 10); + + expect(job).toBeCalledTimes(1); + schedulerService.stop(); +}); + test('Can schedule a single regular job', async () => { const schedulerService = new SchedulerService(getLogger()); const job = jest.fn(); @@ -30,7 +40,7 @@ test('Can schedule a single regular job', async () => { schedulerService.schedule(job, 10); await ms(15); - expect(job).toBeCalledTimes(1); + expect(job).toBeCalledTimes(2); schedulerService.stop(); }); @@ -43,8 +53,8 @@ test('Can schedule multiple jobs at the same interval', async () => { schedulerService.schedule(anotherJob, 10); await ms(15); - expect(job).toBeCalledTimes(1); - expect(anotherJob).toBeCalledTimes(1); + expect(job).toBeCalledTimes(2); + expect(anotherJob).toBeCalledTimes(2); schedulerService.stop(); }); @@ -57,8 +67,8 @@ test('Can schedule multiple jobs at the different intervals', async () => { schedulerService.schedule(anotherJob, 20); await ms(25); - expect(job).toBeCalledTimes(2); - expect(anotherJob).toBeCalledTimes(1); + expect(job).toBeCalledTimes(3); + expect(anotherJob).toBeCalledTimes(2); schedulerService.stop(); }); @@ -75,6 +85,7 @@ test('Can handle crash of a async job', async () => { schedulerService.stop(); expect(logger.getRecords()).toEqual([ ['scheduled job failed', 'async reason'], + ['scheduled job failed', 'async reason'], ]); }); @@ -91,5 +102,6 @@ test('Can handle crash of a sync job', async () => { schedulerService.stop(); expect(logger.getRecords()).toEqual([ ['scheduled job failed', new Error('sync reason')], + ['scheduled job failed', new Error('sync reason')], ]); }); diff --git a/src/lib/services/scheduler-service.ts b/src/lib/services/scheduler-service.ts index 7c0246b4e7..64fe37a8a9 100644 --- a/src/lib/services/scheduler-service.ts +++ b/src/lib/services/scheduler-service.ts @@ -9,7 +9,10 @@ export default class SchedulerService { this.logger = getLogger('/services/scheduler-service.ts'); } - schedule(scheduledFunction: () => void, timeMs: number): void { + async schedule( + scheduledFunction: () => void, + timeMs: number, + ): Promise { this.intervalIds.push( setInterval(async () => { try { @@ -19,6 +22,11 @@ export default class SchedulerService { } }, timeMs).unref(), ); + try { + await scheduledFunction(); + } catch (e) { + this.logger.error('scheduled job failed', e); + } } stop(): void {