mirror of
https://github.com/Unleash/unleash.git
synced 2025-02-09 00:18:00 +01:00
chore: add a class to handle aggreggation queries (#8446)
## About the changes We have many aggregation queries that run on a schedule:f63496d47f/src/lib/metrics.ts (L714-L719)
These staticCounters are usually doing db query aggregations that traverse tables and we run all of them in parallel:f63496d47f/src/lib/metrics.ts (L410-L412)
This can add strain to the db. This PR suggests a way of handling these queries in a more structured way, allowing us to run them sequentially (therefore spreading the load):f02fe87835/src/lib/metrics-gauge.ts (L38-L40)
As an additional benefit, we get both the gauge definition and the queries in a single place:f02fe87835/src/lib/metrics.ts (L131-L141)
This PR only tackles 1 metric, and it only focuses on gauges to gather initial feedback. The plan is to migrate these metrics and eventually incorporate more types (e.g. counters) --------- Co-authored-by: Nuno Góis <github@nunogois.com>
This commit is contained in:
parent
88f396f6b6
commit
a9f9be1efa
@ -4,11 +4,18 @@ import createStores from '../../../test/fixtures/store';
|
||||
import VersionService from '../../services/version-service';
|
||||
import { createFakeGetActiveUsers } from './getActiveUsers';
|
||||
import { createFakeGetProductionChanges } from './getProductionChanges';
|
||||
|
||||
import { registerPrometheusMetrics } from '../../metrics';
|
||||
import { register } from 'prom-client';
|
||||
import type { IClientInstanceStore } from '../../types';
|
||||
let instanceStatsService: InstanceStatsService;
|
||||
let versionService: VersionService;
|
||||
|
||||
let clientInstanceStore: IClientInstanceStore;
|
||||
let updateMetrics: () => Promise<void>;
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
|
||||
register.clear();
|
||||
|
||||
const config = createTestConfig();
|
||||
const stores = createStores();
|
||||
versionService = new VersionService(
|
||||
@ -17,6 +24,7 @@ beforeEach(() => {
|
||||
createFakeGetActiveUsers(),
|
||||
createFakeGetProductionChanges(),
|
||||
);
|
||||
clientInstanceStore = stores.clientInstanceStore;
|
||||
instanceStatsService = new InstanceStatsService(
|
||||
stores,
|
||||
config,
|
||||
@ -25,23 +33,28 @@ beforeEach(() => {
|
||||
createFakeGetProductionChanges(),
|
||||
);
|
||||
|
||||
jest.spyOn(instanceStatsService, 'refreshAppCountSnapshot');
|
||||
jest.spyOn(instanceStatsService, 'getLabeledAppCounts');
|
||||
const { collectDbMetrics } = registerPrometheusMetrics(
|
||||
config,
|
||||
stores,
|
||||
undefined as unknown as string,
|
||||
config.eventBus,
|
||||
instanceStatsService,
|
||||
);
|
||||
updateMetrics = collectDbMetrics;
|
||||
|
||||
jest.spyOn(clientInstanceStore, 'getDistinctApplicationsCount');
|
||||
jest.spyOn(instanceStatsService, 'getStats');
|
||||
|
||||
// validate initial state without calls to these methods
|
||||
expect(instanceStatsService.refreshAppCountSnapshot).toHaveBeenCalledTimes(
|
||||
0,
|
||||
);
|
||||
expect(instanceStatsService.getStats).toHaveBeenCalledTimes(0);
|
||||
});
|
||||
|
||||
test('get snapshot should not call getStats', async () => {
|
||||
await instanceStatsService.refreshAppCountSnapshot();
|
||||
expect(instanceStatsService.getLabeledAppCounts).toHaveBeenCalledTimes(1);
|
||||
await updateMetrics();
|
||||
expect(
|
||||
clientInstanceStore.getDistinctApplicationsCount,
|
||||
).toHaveBeenCalledTimes(3);
|
||||
expect(instanceStatsService.getStats).toHaveBeenCalledTimes(0);
|
||||
|
||||
// subsequent calls to getStatsSnapshot don't call getStats
|
||||
for (let i = 0; i < 3; i++) {
|
||||
const { clientApps } = await instanceStatsService.getStats();
|
||||
expect(clientApps).toStrictEqual([
|
||||
@ -51,12 +64,11 @@ test('get snapshot should not call getStats', async () => {
|
||||
]);
|
||||
}
|
||||
// after querying the stats snapshot no call to getStats should be issued
|
||||
expect(instanceStatsService.getLabeledAppCounts).toHaveBeenCalledTimes(1);
|
||||
expect(
|
||||
clientInstanceStore.getDistinctApplicationsCount,
|
||||
).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
test('before the snapshot is refreshed we can still get the appCount', async () => {
|
||||
expect(instanceStatsService.refreshAppCountSnapshot).toHaveBeenCalledTimes(
|
||||
0,
|
||||
);
|
||||
expect(instanceStatsService.getAppCountSnapshot('7d')).toBeUndefined();
|
||||
});
|
||||
|
@ -109,9 +109,9 @@ export class InstanceStatsService {
|
||||
|
||||
private appCount?: Partial<{ [key in TimeRange]: number }>;
|
||||
|
||||
private getActiveUsers: GetActiveUsers;
|
||||
getActiveUsers: GetActiveUsers;
|
||||
|
||||
private getProductionChanges: GetProductionChanges;
|
||||
getProductionChanges: GetProductionChanges;
|
||||
|
||||
private featureStrategiesReadModel: IFeatureStrategiesReadModel;
|
||||
|
||||
@ -180,25 +180,6 @@ export class InstanceStatsService {
|
||||
this.featureStrategiesReadModel = featureStrategiesReadModel;
|
||||
}
|
||||
|
||||
async refreshAppCountSnapshot(): Promise<
|
||||
Partial<{ [key in TimeRange]: number }>
|
||||
> {
|
||||
try {
|
||||
this.appCount = await this.getLabeledAppCounts();
|
||||
return this.appCount;
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
'Unable to retrieve statistics. This will be retried',
|
||||
error,
|
||||
);
|
||||
return {
|
||||
'7d': 0,
|
||||
'30d': 0,
|
||||
allTime: 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
getProjectModeCount(): Promise<ProjectModeCount[]> {
|
||||
return this.projectStore.getProjectModeCounts();
|
||||
}
|
||||
@ -231,9 +212,6 @@ export class InstanceStatsService {
|
||||
return settings?.enabled || false;
|
||||
}
|
||||
|
||||
/**
|
||||
* use getStatsSnapshot for low latency, sacrificing data-freshness
|
||||
*/
|
||||
async getStats(): Promise<InstanceStats> {
|
||||
const versionInfo = await this.versionService.getVersionInfo();
|
||||
const [
|
||||
@ -265,22 +243,22 @@ export class InstanceStatsService {
|
||||
] = await Promise.all([
|
||||
this.getToggleCount(),
|
||||
this.getArchivedToggleCount(),
|
||||
this.userStore.count(),
|
||||
this.userStore.countServiceAccounts(),
|
||||
this.apiTokenStore.countByType(),
|
||||
this.getRegisteredUsers(),
|
||||
this.countServiceAccounts(),
|
||||
this.countApiTokensByType(),
|
||||
this.getActiveUsers(),
|
||||
this.getProjectModeCount(),
|
||||
this.contextFieldStore.count(),
|
||||
this.groupStore.count(),
|
||||
this.roleStore.count(),
|
||||
this.roleStore.filteredCount({ type: CUSTOM_ROOT_ROLE_TYPE }),
|
||||
this.roleStore.filteredCountInUse({ type: CUSTOM_ROOT_ROLE_TYPE }),
|
||||
this.environmentStore.count(),
|
||||
this.segmentStore.count(),
|
||||
this.strategyStore.count(),
|
||||
this.contextFieldCount(),
|
||||
this.groupCount(),
|
||||
this.roleCount(),
|
||||
this.customRolesCount(),
|
||||
this.customRolesCountInUse(),
|
||||
this.environmentCount(),
|
||||
this.segmentCount(),
|
||||
this.strategiesCount(),
|
||||
this.hasSAML(),
|
||||
this.hasOIDC(),
|
||||
this.appCount ? this.appCount : this.refreshAppCountSnapshot(),
|
||||
this.appCount ? this.appCount : this.getLabeledAppCounts(),
|
||||
this.eventStore.deprecatedFilteredCount({
|
||||
type: FEATURES_EXPORTED,
|
||||
}),
|
||||
@ -288,7 +266,7 @@ export class InstanceStatsService {
|
||||
type: FEATURES_IMPORTED,
|
||||
}),
|
||||
this.getProductionChanges(),
|
||||
this.clientMetricsStore.countPreviousDayHourlyMetricsBuckets(),
|
||||
this.countPreviousDayHourlyMetricsBuckets(),
|
||||
this.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(),
|
||||
this.featureStrategiesReadModel.getMaxConstraintValues(),
|
||||
this.featureStrategiesReadModel.getMaxConstraintsPerStrategy(),
|
||||
@ -330,6 +308,59 @@ export class InstanceStatsService {
|
||||
};
|
||||
}
|
||||
|
||||
groupCount(): Promise<number> {
|
||||
return this.groupStore.count();
|
||||
}
|
||||
|
||||
roleCount(): Promise<number> {
|
||||
return this.roleStore.count();
|
||||
}
|
||||
|
||||
customRolesCount(): Promise<number> {
|
||||
return this.roleStore.filteredCount({ type: CUSTOM_ROOT_ROLE_TYPE });
|
||||
}
|
||||
|
||||
customRolesCountInUse(): Promise<number> {
|
||||
return this.roleStore.filteredCountInUse({
|
||||
type: CUSTOM_ROOT_ROLE_TYPE,
|
||||
});
|
||||
}
|
||||
|
||||
segmentCount(): Promise<number> {
|
||||
return this.segmentStore.count();
|
||||
}
|
||||
|
||||
contextFieldCount(): Promise<number> {
|
||||
return this.contextFieldStore.count();
|
||||
}
|
||||
|
||||
strategiesCount(): Promise<number> {
|
||||
return this.strategyStore.count();
|
||||
}
|
||||
|
||||
environmentCount(): Promise<number> {
|
||||
return this.environmentStore.count();
|
||||
}
|
||||
|
||||
countPreviousDayHourlyMetricsBuckets(): Promise<{
|
||||
enabledCount: number;
|
||||
variantCount: number;
|
||||
}> {
|
||||
return this.clientMetricsStore.countPreviousDayHourlyMetricsBuckets();
|
||||
}
|
||||
|
||||
countApiTokensByType(): Promise<Map<string, number>> {
|
||||
return this.apiTokenStore.countByType();
|
||||
}
|
||||
|
||||
getRegisteredUsers(): Promise<number> {
|
||||
return this.userStore.count();
|
||||
}
|
||||
|
||||
countServiceAccounts(): Promise<number> {
|
||||
return this.userStore.countServiceAccounts();
|
||||
}
|
||||
|
||||
async getLabeledAppCounts(): Promise<
|
||||
Partial<{ [key in TimeRange]: number }>
|
||||
> {
|
||||
@ -338,11 +369,12 @@ export class InstanceStatsService {
|
||||
this.clientInstanceStore.getDistinctApplicationsCount(30),
|
||||
this.clientInstanceStore.getDistinctApplicationsCount(),
|
||||
]);
|
||||
return {
|
||||
this.appCount = {
|
||||
'7d': t7d,
|
||||
'30d': t30d,
|
||||
allTime,
|
||||
};
|
||||
return this.appCount;
|
||||
}
|
||||
|
||||
getAppCountSnapshot(range: TimeRange): number | undefined {
|
||||
|
@ -59,8 +59,12 @@ export const scheduleServices = async (
|
||||
'updateLastSeen',
|
||||
);
|
||||
|
||||
// TODO this works fine for keeping labeledAppCounts up to date, but
|
||||
// it would be nice if we can keep client_apps_total prometheus metric
|
||||
// up to date. We'd need to have access to DbMetricsMonitor, which is
|
||||
// where the metric is registered and call an update only for that metric
|
||||
schedulerService.schedule(
|
||||
instanceStatsService.refreshAppCountSnapshot.bind(instanceStatsService),
|
||||
instanceStatsService.getLabeledAppCounts.bind(instanceStatsService),
|
||||
minutesToMilliseconds(5),
|
||||
'refreshAppCountSnapshot',
|
||||
);
|
||||
|
114
src/lib/metrics-gauge.test.ts
Normal file
114
src/lib/metrics-gauge.test.ts
Normal file
@ -0,0 +1,114 @@
|
||||
import { register } from 'prom-client';
|
||||
import { createTestConfig } from '../test/config/test-config';
|
||||
import type { IUnleashConfig } from './types';
|
||||
import { DbMetricsMonitor } from './metrics-gauge';
|
||||
|
||||
const prometheusRegister = register;
|
||||
let config: IUnleashConfig;
|
||||
let dbMetrics: DbMetricsMonitor;
|
||||
|
||||
beforeAll(async () => {
|
||||
config = createTestConfig({
|
||||
server: {
|
||||
serverMetrics: true,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
dbMetrics = new DbMetricsMonitor(config);
|
||||
});
|
||||
|
||||
test('should collect registered metrics', async () => {
|
||||
dbMetrics.registerGaugeDbMetric({
|
||||
name: 'my_metric',
|
||||
help: 'This is the answer to life, the univers, and everything',
|
||||
labelNames: [],
|
||||
query: () => Promise.resolve(42),
|
||||
map: (result) => ({ value: result }),
|
||||
});
|
||||
|
||||
await dbMetrics.refreshDbMetrics();
|
||||
|
||||
const metrics = await prometheusRegister.metrics();
|
||||
expect(metrics).toMatch(/my_metric 42/);
|
||||
});
|
||||
|
||||
test('should collect registered metrics with labels', async () => {
|
||||
dbMetrics.registerGaugeDbMetric({
|
||||
name: 'life_the_universe_and_everything',
|
||||
help: 'This is the answer to life, the univers, and everything',
|
||||
labelNames: ['test'],
|
||||
query: () => Promise.resolve(42),
|
||||
map: (result) => ({ value: result, labels: { test: 'case' } }),
|
||||
});
|
||||
|
||||
await dbMetrics.refreshDbMetrics();
|
||||
|
||||
const metrics = await prometheusRegister.metrics();
|
||||
expect(metrics).toMatch(
|
||||
/life_the_universe_and_everything\{test="case"\} 42/,
|
||||
);
|
||||
});
|
||||
|
||||
test('should collect multiple registered metrics with and without labels', async () => {
|
||||
dbMetrics.registerGaugeDbMetric({
|
||||
name: 'my_first_metric',
|
||||
help: 'This is the answer to life, the univers, and everything',
|
||||
labelNames: [],
|
||||
query: () => Promise.resolve(42),
|
||||
map: (result) => ({ value: result }),
|
||||
});
|
||||
|
||||
dbMetrics.registerGaugeDbMetric({
|
||||
name: 'my_other_metric',
|
||||
help: 'This is Eulers number',
|
||||
labelNames: ['euler'],
|
||||
query: () => Promise.resolve(Math.E),
|
||||
map: (result) => ({ value: result, labels: { euler: 'number' } }),
|
||||
});
|
||||
|
||||
await dbMetrics.refreshDbMetrics();
|
||||
|
||||
const metrics = await prometheusRegister.metrics();
|
||||
expect(metrics).toMatch(/my_first_metric 42/);
|
||||
expect(metrics).toMatch(/my_other_metric\{euler="number"\} 2.71828/);
|
||||
});
|
||||
|
||||
test('should support different label and value pairs', async () => {
|
||||
dbMetrics.registerGaugeDbMetric({
|
||||
name: 'multi_dimensional',
|
||||
help: 'This metric has different values for different labels',
|
||||
labelNames: ['version', 'range'],
|
||||
query: () => Promise.resolve(2),
|
||||
map: (result) => [
|
||||
{ value: result, labels: { version: '1', range: 'linear' } },
|
||||
{
|
||||
value: result * result,
|
||||
labels: { version: '2', range: 'square' },
|
||||
},
|
||||
{ value: result / 2, labels: { version: '3', range: 'half' } },
|
||||
],
|
||||
});
|
||||
|
||||
await dbMetrics.refreshDbMetrics();
|
||||
|
||||
const metrics = await prometheusRegister.metrics();
|
||||
expect(metrics).toMatch(
|
||||
/multi_dimensional\{version="1",range="linear"\} 2\nmulti_dimensional\{version="2",range="square"\} 4\nmulti_dimensional\{version="3",range="half"\} 1/,
|
||||
);
|
||||
expect(
|
||||
await dbMetrics.findValue('multi_dimensional', { range: 'linear' }),
|
||||
).toBe(2);
|
||||
expect(
|
||||
await dbMetrics.findValue('multi_dimensional', { range: 'half' }),
|
||||
).toBe(1);
|
||||
expect(
|
||||
await dbMetrics.findValue('multi_dimensional', { range: 'square' }),
|
||||
).toBe(4);
|
||||
expect(
|
||||
await dbMetrics.findValue('multi_dimensional', { range: 'x' }),
|
||||
).toBeUndefined();
|
||||
expect(await dbMetrics.findValue('multi_dimensional')).toBe(2); // first match
|
||||
expect(await dbMetrics.findValue('other')).toBeUndefined();
|
||||
});
|
94
src/lib/metrics-gauge.ts
Normal file
94
src/lib/metrics-gauge.ts
Normal file
@ -0,0 +1,94 @@
|
||||
import type { Logger } from './logger';
|
||||
import type { IUnleashConfig } from './types';
|
||||
import { createGauge, type Gauge } from './util/metrics';
|
||||
|
||||
type Query<R> = () => Promise<R | undefined | null>;
|
||||
type MetricValue<L extends string> = {
|
||||
value: number;
|
||||
labels?: Record<L, string | number>;
|
||||
};
|
||||
type MapResult<R, L extends string> = (
|
||||
result: R,
|
||||
) => MetricValue<L> | MetricValue<L>[];
|
||||
|
||||
type GaugeDefinition<T, L extends string> = {
|
||||
name: string;
|
||||
help: string;
|
||||
labelNames: L[];
|
||||
query: Query<T>;
|
||||
map: MapResult<T, L>;
|
||||
};
|
||||
|
||||
type Task = () => Promise<void>;
|
||||
|
||||
interface GaugeUpdater {
|
||||
target: Gauge<string>;
|
||||
task: Task;
|
||||
}
|
||||
export class DbMetricsMonitor {
|
||||
private updaters: Map<string, GaugeUpdater> = new Map();
|
||||
private log: Logger;
|
||||
|
||||
constructor({ getLogger }: Pick<IUnleashConfig, 'getLogger'>) {
|
||||
this.log = getLogger('gauge-metrics');
|
||||
}
|
||||
|
||||
private asArray<T>(value: T | T[]): T[] {
|
||||
return Array.isArray(value) ? value : [value];
|
||||
}
|
||||
|
||||
registerGaugeDbMetric<T, L extends string>(
|
||||
definition: GaugeDefinition<T, L>,
|
||||
): Task {
|
||||
const gauge = createGauge(definition);
|
||||
const task = async () => {
|
||||
try {
|
||||
const result = await definition.query();
|
||||
if (result !== null && result !== undefined) {
|
||||
const results = this.asArray(definition.map(result));
|
||||
gauge.reset();
|
||||
for (const r of results) {
|
||||
if (r.labels) {
|
||||
gauge.labels(r.labels).set(r.value);
|
||||
} else {
|
||||
gauge.set(r.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
this.log.warn(`Failed to refresh ${definition.name}`, e);
|
||||
}
|
||||
};
|
||||
this.updaters.set(definition.name, { target: gauge, task });
|
||||
return task;
|
||||
}
|
||||
|
||||
refreshDbMetrics = async () => {
|
||||
const tasks = Array.from(this.updaters.entries()).map(
|
||||
([name, updater]) => ({ name, task: updater.task }),
|
||||
);
|
||||
for (const { name, task } of tasks) {
|
||||
this.log.debug(`Refreshing metric ${name}`);
|
||||
await task();
|
||||
}
|
||||
};
|
||||
|
||||
async findValue(
|
||||
name: string,
|
||||
labels?: Record<string, string | number>,
|
||||
): Promise<number | undefined> {
|
||||
const gauge = await this.updaters.get(name)?.target.gauge?.get();
|
||||
if (gauge && gauge.values.length > 0) {
|
||||
const values = labels
|
||||
? gauge.values.filter(({ labels: l }) => {
|
||||
return Object.entries(labels).every(
|
||||
([key, value]) => l[key] === value,
|
||||
);
|
||||
})
|
||||
: gauge.values;
|
||||
// return first value
|
||||
return values.map(({ value }) => value).shift();
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
}
|
@ -15,7 +15,11 @@ import {
|
||||
FEATURE_UPDATED,
|
||||
PROJECT_ENVIRONMENT_REMOVED,
|
||||
} from './types/events';
|
||||
import { createMetricsMonitor } from './metrics';
|
||||
import {
|
||||
createMetricsMonitor,
|
||||
registerPrometheusMetrics,
|
||||
registerPrometheusPostgresMetrics,
|
||||
} from './metrics';
|
||||
import createStores from '../test/fixtures/store';
|
||||
import { InstanceStatsService } from './features/instance-stats/instance-stats-service';
|
||||
import VersionService from './services/version-service';
|
||||
@ -46,6 +50,7 @@ let schedulerService: SchedulerService;
|
||||
let featureLifeCycleStore: IFeatureLifecycleStore;
|
||||
let featureLifeCycleReadModel: IFeatureLifecycleReadModel;
|
||||
let db: ITestDb;
|
||||
let refreshDbMetrics: () => Promise<void>;
|
||||
|
||||
beforeAll(async () => {
|
||||
const config = createTestConfig({
|
||||
@ -102,16 +107,16 @@ beforeAll(async () => {
|
||||
},
|
||||
};
|
||||
|
||||
await monitor.startMonitoring(
|
||||
const { collectDbMetrics, collectStaticCounters } =
|
||||
registerPrometheusMetrics(
|
||||
config,
|
||||
stores,
|
||||
'4.0.0',
|
||||
eventBus,
|
||||
statsService,
|
||||
schedulerService,
|
||||
// @ts-ignore - We don't want a full knex implementation for our tests, it's enough that it actually yields the numbers we want.
|
||||
metricsDbConf,
|
||||
);
|
||||
refreshDbMetrics = collectDbMetrics;
|
||||
await collectStaticCounters();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
@ -212,6 +217,7 @@ test('should collect metrics for function timings', async () => {
|
||||
});
|
||||
|
||||
test('should collect metrics for feature flag size', async () => {
|
||||
await refreshDbMetrics();
|
||||
const metrics = await prometheusRegister.metrics();
|
||||
expect(metrics).toMatch(/feature_toggles_total\{version="(.*)"\} 0/);
|
||||
});
|
||||
@ -222,12 +228,13 @@ test('should collect metrics for archived feature flag size', async () => {
|
||||
});
|
||||
|
||||
test('should collect metrics for total client apps', async () => {
|
||||
await statsService.refreshAppCountSnapshot();
|
||||
await refreshDbMetrics();
|
||||
const metrics = await prometheusRegister.metrics();
|
||||
expect(metrics).toMatch(/client_apps_total\{range="(.*)"\} 0/);
|
||||
});
|
||||
|
||||
test('Should collect metrics for database', async () => {
|
||||
registerPrometheusPostgresMetrics(db.rawDatabase, eventBus, '15.0.0');
|
||||
const metrics = await prometheusRegister.metrics();
|
||||
expect(metrics).toMatch(/db_pool_max/);
|
||||
expect(metrics).toMatch(/db_pool_min/);
|
||||
|
@ -25,7 +25,7 @@ import {
|
||||
PROJECT_DELETED,
|
||||
} from './types/events';
|
||||
import type { IUnleashConfig } from './types/option';
|
||||
import type { ISettingStore, IUnleashStores } from './types/stores';
|
||||
import type { IUnleashStores } from './types/stores';
|
||||
import { hoursToMilliseconds, minutesToMilliseconds } from 'date-fns';
|
||||
import type { InstanceStatsService } from './features/instance-stats/instance-stats-service';
|
||||
import type { IEnvironment, ISdkHeartbeat } from './types';
|
||||
@ -37,25 +37,81 @@ import {
|
||||
} from './util/metrics';
|
||||
import type { SchedulerService } from './services';
|
||||
import type { IClientMetricsEnv } from './features/metrics/client-metrics/client-metrics-store-v2-type';
|
||||
import { DbMetricsMonitor } from './metrics-gauge';
|
||||
|
||||
export default class MetricsMonitor {
|
||||
constructor() {}
|
||||
export function registerPrometheusPostgresMetrics(
|
||||
db: Knex,
|
||||
eventBus: EventEmitter,
|
||||
postgresVersion: string,
|
||||
) {
|
||||
if (db?.client) {
|
||||
const dbPoolMin = createGauge({
|
||||
name: 'db_pool_min',
|
||||
help: 'Minimum DB pool size',
|
||||
});
|
||||
dbPoolMin.set(db.client.pool.min);
|
||||
const dbPoolMax = createGauge({
|
||||
name: 'db_pool_max',
|
||||
help: 'Maximum DB pool size',
|
||||
});
|
||||
dbPoolMax.set(db.client.pool.max);
|
||||
const dbPoolFree = createGauge({
|
||||
name: 'db_pool_free',
|
||||
help: 'Current free connections in DB pool',
|
||||
});
|
||||
const dbPoolUsed = createGauge({
|
||||
name: 'db_pool_used',
|
||||
help: 'Current connections in use in DB pool',
|
||||
});
|
||||
const dbPoolPendingCreates = createGauge({
|
||||
name: 'db_pool_pending_creates',
|
||||
help: 'how many asynchronous create calls are running in DB pool',
|
||||
});
|
||||
const dbPoolPendingAcquires = createGauge({
|
||||
name: 'db_pool_pending_acquires',
|
||||
help: 'how many acquires are waiting for a resource to be released in DB pool',
|
||||
});
|
||||
|
||||
async startMonitoring(
|
||||
eventBus.on(DB_POOL_UPDATE, (data) => {
|
||||
dbPoolFree.set(data.free);
|
||||
dbPoolUsed.set(data.used);
|
||||
dbPoolPendingCreates.set(data.pendingCreates);
|
||||
dbPoolPendingAcquires.set(data.pendingAcquires);
|
||||
});
|
||||
|
||||
const database_version = createGauge({
|
||||
name: 'postgres_version',
|
||||
help: 'Which version of postgres is running (SHOW server_version)',
|
||||
labelNames: ['version'],
|
||||
});
|
||||
database_version.labels({ version: postgresVersion }).set(1);
|
||||
}
|
||||
}
|
||||
|
||||
export function registerPrometheusMetrics(
|
||||
config: IUnleashConfig,
|
||||
stores: IUnleashStores,
|
||||
version: string,
|
||||
eventBus: EventEmitter,
|
||||
instanceStatsService: InstanceStatsService,
|
||||
schedulerService: SchedulerService,
|
||||
db: Knex,
|
||||
): Promise<void> {
|
||||
if (!config.server.serverMetrics) {
|
||||
return Promise.resolve();
|
||||
) {
|
||||
const resolveEnvironmentType = async (
|
||||
environment: string,
|
||||
cachedEnvironments: () => Promise<IEnvironment[]>,
|
||||
): Promise<string> => {
|
||||
const environments = await cachedEnvironments();
|
||||
const env = environments.find((e) => e.name === environment);
|
||||
|
||||
if (env) {
|
||||
return env.type;
|
||||
} else {
|
||||
return 'unknown';
|
||||
}
|
||||
};
|
||||
|
||||
const { eventStore, environmentStore } = stores;
|
||||
const { flagResolver } = config;
|
||||
const { flagResolver, db } = config;
|
||||
const dbMetrics = new DbMetricsMonitor(config);
|
||||
|
||||
const cachedEnvironments: () => Promise<IEnvironment[]> = memoizee(
|
||||
async () => environmentStore.getAll(),
|
||||
@ -65,8 +121,6 @@ export default class MetricsMonitor {
|
||||
},
|
||||
);
|
||||
|
||||
collectDefaultMetrics();
|
||||
|
||||
const requestDuration = createSummary({
|
||||
name: 'http_request_duration_milliseconds',
|
||||
help: 'App response time',
|
||||
@ -115,21 +169,42 @@ export default class MetricsMonitor {
|
||||
help: 'Number of times a feature flag has been used',
|
||||
labelNames: ['toggle', 'active', 'appName'],
|
||||
});
|
||||
const featureFlagsTotal = createGauge({
|
||||
|
||||
dbMetrics.registerGaugeDbMetric({
|
||||
name: 'feature_toggles_total',
|
||||
help: 'Number of feature flags',
|
||||
labelNames: ['version'],
|
||||
query: () => instanceStatsService.getToggleCount(),
|
||||
map: (value) => ({ value, labels: { version } }),
|
||||
});
|
||||
const maxFeatureEnvironmentStrategies = createGauge({
|
||||
|
||||
dbMetrics.registerGaugeDbMetric({
|
||||
name: 'max_feature_environment_strategies',
|
||||
help: 'Maximum number of environment strategies in one feature',
|
||||
labelNames: ['feature', 'environment'],
|
||||
query: () =>
|
||||
stores.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(),
|
||||
map: (result) => ({
|
||||
value: result.count,
|
||||
labels: {
|
||||
environment: result.environment,
|
||||
feature: result.feature,
|
||||
},
|
||||
}),
|
||||
});
|
||||
const maxFeatureStrategies = createGauge({
|
||||
|
||||
dbMetrics.registerGaugeDbMetric({
|
||||
name: 'max_feature_strategies',
|
||||
help: 'Maximum number of strategies in one feature',
|
||||
labelNames: ['feature'],
|
||||
query: () =>
|
||||
stores.featureStrategiesReadModel.getMaxFeatureStrategies(),
|
||||
map: (result) => ({
|
||||
value: result.count,
|
||||
labels: { feature: result.feature },
|
||||
}),
|
||||
});
|
||||
|
||||
const maxConstraintValues = createGauge({
|
||||
name: 'max_constraint_values',
|
||||
help: 'Maximum number of constraint values used in a single constraint',
|
||||
@ -236,10 +311,16 @@ export default class MetricsMonitor {
|
||||
help: 'Number of strategies',
|
||||
});
|
||||
|
||||
const clientAppsTotal = createGauge({
|
||||
dbMetrics.registerGaugeDbMetric({
|
||||
name: 'client_apps_total',
|
||||
help: 'Number of registered client apps aggregated by range by last seen',
|
||||
labelNames: ['range'],
|
||||
query: () => instanceStatsService.getLabeledAppCounts(),
|
||||
map: (result) =>
|
||||
Object.entries(result).map(([range, count]) => ({
|
||||
value: count,
|
||||
labels: { range },
|
||||
})),
|
||||
});
|
||||
|
||||
const samlEnabled = createGauge({
|
||||
@ -286,6 +367,55 @@ export default class MetricsMonitor {
|
||||
help: 'Rate limits (per minute) for METHOD/ENDPOINT pairs',
|
||||
labelNames: ['endpoint', 'method'],
|
||||
});
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/client/metrics',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.metricsRateLimiting.clientMetricsMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/client/register',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.metricsRateLimiting.clientRegisterMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/frontend/metrics',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.metricsRateLimiting.frontendMetricsMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/frontend/register',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.metricsRateLimiting.frontendRegisterMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/admin/user-admin',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.rateLimiting.createUserMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/auth/simple',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.rateLimiting.simpleLoginMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/auth/reset/password-email',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.rateLimiting.passwordResetMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/signal-endpoint/:name',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.rateLimiting.callSignalEndpointMaxPerSecond * 60);
|
||||
|
||||
const featureCreatedByMigration = createCounter({
|
||||
name: 'feature_created_by_migration_count',
|
||||
help: 'Feature createdBy migration count',
|
||||
@ -392,63 +522,13 @@ export default class MetricsMonitor {
|
||||
labelNames: ['result', 'destination'],
|
||||
});
|
||||
|
||||
async function collectStaticCounters() {
|
||||
try {
|
||||
const stats = await instanceStatsService.getStats();
|
||||
const [
|
||||
maxStrategies,
|
||||
maxEnvironmentStrategies,
|
||||
maxConstraintValuesResult,
|
||||
maxConstraintsPerStrategyResult,
|
||||
stageCountByProjectResult,
|
||||
stageDurationByProject,
|
||||
largestProjectEnvironments,
|
||||
largestFeatureEnvironments,
|
||||
deprecatedTokens,
|
||||
instanceOnboardingMetrics,
|
||||
projectsOnboardingMetrics,
|
||||
] = await Promise.all([
|
||||
stores.featureStrategiesReadModel.getMaxFeatureStrategies(),
|
||||
stores.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(),
|
||||
stores.featureStrategiesReadModel.getMaxConstraintValues(),
|
||||
stores.featureStrategiesReadModel.getMaxConstraintsPerStrategy(),
|
||||
stores.featureLifecycleReadModel.getStageCountByProject(),
|
||||
stores.featureLifecycleReadModel.getAllWithStageDuration(),
|
||||
stores.largestResourcesReadModel.getLargestProjectEnvironments(
|
||||
1,
|
||||
),
|
||||
stores.largestResourcesReadModel.getLargestFeatureEnvironments(
|
||||
1,
|
||||
),
|
||||
stores.apiTokenStore.countDeprecatedTokens(),
|
||||
flagResolver.isEnabled('onboardingMetrics')
|
||||
? stores.onboardingReadModel.getInstanceOnboardingMetrics()
|
||||
: Promise.resolve({}),
|
||||
flagResolver.isEnabled('onboardingMetrics')
|
||||
? stores.onboardingReadModel.getProjectsOnboardingMetrics()
|
||||
: Promise.resolve([]),
|
||||
]);
|
||||
|
||||
featureFlagsTotal.reset();
|
||||
featureFlagsTotal.labels({ version }).set(stats.featureToggles);
|
||||
|
||||
featureTogglesArchivedTotal.reset();
|
||||
featureTogglesArchivedTotal.set(stats.archivedFeatureToggles);
|
||||
|
||||
usersTotal.reset();
|
||||
usersTotal.set(stats.users);
|
||||
|
||||
serviceAccounts.reset();
|
||||
serviceAccounts.set(stats.serviceAccounts);
|
||||
|
||||
stageDurationByProject.forEach((stage) => {
|
||||
featureLifecycleStageDuration
|
||||
.labels({
|
||||
stage: stage.stage,
|
||||
project_id: stage.project,
|
||||
})
|
||||
.set(stage.duration);
|
||||
});
|
||||
// register event listeners
|
||||
eventBus.on(
|
||||
events.EXCEEDS_LIMIT,
|
||||
({ resource, limit }: { resource: string; limit: number }) => {
|
||||
exceedsLimitErrorCounter.increment({ resource, limit });
|
||||
},
|
||||
);
|
||||
|
||||
eventBus.on(
|
||||
events.STAGE_ENTERED,
|
||||
@ -464,260 +544,6 @@ export default class MetricsMonitor {
|
||||
},
|
||||
);
|
||||
|
||||
eventBus.on(
|
||||
events.EXCEEDS_LIMIT,
|
||||
({
|
||||
resource,
|
||||
limit,
|
||||
}: { resource: string; limit: number }) => {
|
||||
exceedsLimitErrorCounter.increment({ resource, limit });
|
||||
},
|
||||
);
|
||||
|
||||
featureLifecycleStageCountByProject.reset();
|
||||
stageCountByProjectResult.forEach((stageResult) =>
|
||||
featureLifecycleStageCountByProject
|
||||
.labels({
|
||||
project_id: stageResult.project,
|
||||
stage: stageResult.stage,
|
||||
})
|
||||
.set(stageResult.count),
|
||||
);
|
||||
|
||||
apiTokens.reset();
|
||||
|
||||
for (const [type, value] of stats.apiTokens) {
|
||||
apiTokens.labels({ type }).set(value);
|
||||
}
|
||||
|
||||
orphanedTokensTotal.reset();
|
||||
orphanedTokensTotal.set(deprecatedTokens.orphanedTokens);
|
||||
|
||||
orphanedTokensActive.reset();
|
||||
orphanedTokensActive.set(deprecatedTokens.activeOrphanedTokens);
|
||||
|
||||
legacyTokensTotal.reset();
|
||||
legacyTokensTotal.set(deprecatedTokens.legacyTokens);
|
||||
|
||||
legacyTokensActive.reset();
|
||||
legacyTokensActive.set(deprecatedTokens.activeLegacyTokens);
|
||||
|
||||
if (maxEnvironmentStrategies) {
|
||||
maxFeatureEnvironmentStrategies.reset();
|
||||
maxFeatureEnvironmentStrategies
|
||||
.labels({
|
||||
environment: maxEnvironmentStrategies.environment,
|
||||
feature: maxEnvironmentStrategies.feature,
|
||||
})
|
||||
.set(maxEnvironmentStrategies.count);
|
||||
}
|
||||
if (maxStrategies) {
|
||||
maxFeatureStrategies.reset();
|
||||
maxFeatureStrategies
|
||||
.labels({ feature: maxStrategies.feature })
|
||||
.set(maxStrategies.count);
|
||||
}
|
||||
if (maxConstraintValuesResult) {
|
||||
maxConstraintValues.reset();
|
||||
maxConstraintValues
|
||||
.labels({
|
||||
environment: maxConstraintValuesResult.environment,
|
||||
feature: maxConstraintValuesResult.feature,
|
||||
})
|
||||
.set(maxConstraintValuesResult.count);
|
||||
}
|
||||
if (maxConstraintsPerStrategyResult) {
|
||||
maxConstraintsPerStrategy.reset();
|
||||
maxConstraintsPerStrategy
|
||||
.labels({
|
||||
environment:
|
||||
maxConstraintsPerStrategyResult.environment,
|
||||
feature: maxConstraintsPerStrategyResult.feature,
|
||||
})
|
||||
.set(maxConstraintsPerStrategyResult.count);
|
||||
}
|
||||
|
||||
if (largestProjectEnvironments.length > 0) {
|
||||
const projectEnvironment = largestProjectEnvironments[0];
|
||||
largestProjectEnvironment.reset();
|
||||
largestProjectEnvironment
|
||||
.labels({
|
||||
project: projectEnvironment.project,
|
||||
environment: projectEnvironment.environment,
|
||||
})
|
||||
.set(projectEnvironment.size);
|
||||
}
|
||||
|
||||
if (largestFeatureEnvironments.length > 0) {
|
||||
const featureEnvironment = largestFeatureEnvironments[0];
|
||||
largestFeatureEnvironment.reset();
|
||||
largestFeatureEnvironment
|
||||
.labels({
|
||||
feature: featureEnvironment.feature,
|
||||
environment: featureEnvironment.environment,
|
||||
})
|
||||
.set(featureEnvironment.size);
|
||||
}
|
||||
|
||||
Object.keys(instanceOnboardingMetrics).forEach((key) => {
|
||||
if (Number.isInteger(instanceOnboardingMetrics[key])) {
|
||||
onboardingDuration
|
||||
.labels({
|
||||
event: key,
|
||||
})
|
||||
.set(instanceOnboardingMetrics[key]);
|
||||
}
|
||||
});
|
||||
projectsOnboardingMetrics.forEach(
|
||||
({ project, ...projectMetrics }) => {
|
||||
Object.keys(projectMetrics).forEach((key) => {
|
||||
if (Number.isInteger(projectMetrics[key])) {
|
||||
projectOnboardingDuration
|
||||
.labels({ event: key, project })
|
||||
.set(projectMetrics[key]);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
for (const [resource, limit] of Object.entries(
|
||||
config.resourceLimits,
|
||||
)) {
|
||||
resourceLimit.labels({ resource }).set(limit);
|
||||
}
|
||||
|
||||
enabledMetricsBucketsPreviousDay.reset();
|
||||
enabledMetricsBucketsPreviousDay.set(
|
||||
stats.previousDayMetricsBucketsCount.enabledCount,
|
||||
);
|
||||
variantMetricsBucketsPreviousDay.reset();
|
||||
variantMetricsBucketsPreviousDay.set(
|
||||
stats.previousDayMetricsBucketsCount.variantCount,
|
||||
);
|
||||
|
||||
usersActive7days.reset();
|
||||
usersActive7days.set(stats.activeUsers.last7);
|
||||
usersActive30days.reset();
|
||||
usersActive30days.set(stats.activeUsers.last30);
|
||||
usersActive60days.reset();
|
||||
usersActive60days.set(stats.activeUsers.last60);
|
||||
usersActive90days.reset();
|
||||
usersActive90days.set(stats.activeUsers.last90);
|
||||
|
||||
productionChanges30.reset();
|
||||
productionChanges30.set(stats.productionChanges.last30);
|
||||
productionChanges60.reset();
|
||||
productionChanges60.set(stats.productionChanges.last60);
|
||||
productionChanges90.reset();
|
||||
productionChanges90.set(stats.productionChanges.last90);
|
||||
|
||||
projectsTotal.reset();
|
||||
stats.projects.forEach((projectStat) => {
|
||||
projectsTotal
|
||||
.labels({ mode: projectStat.mode })
|
||||
.set(projectStat.count);
|
||||
});
|
||||
|
||||
environmentsTotal.reset();
|
||||
environmentsTotal.set(stats.environments);
|
||||
|
||||
groupsTotal.reset();
|
||||
groupsTotal.set(stats.groups);
|
||||
|
||||
rolesTotal.reset();
|
||||
rolesTotal.set(stats.roles);
|
||||
|
||||
customRootRolesTotal.reset();
|
||||
customRootRolesTotal.set(stats.customRootRoles);
|
||||
|
||||
customRootRolesInUseTotal.reset();
|
||||
customRootRolesInUseTotal.set(stats.customRootRolesInUse);
|
||||
|
||||
segmentsTotal.reset();
|
||||
segmentsTotal.set(stats.segments);
|
||||
|
||||
contextTotal.reset();
|
||||
contextTotal.set(stats.contextFields);
|
||||
|
||||
strategiesTotal.reset();
|
||||
strategiesTotal.set(stats.strategies);
|
||||
|
||||
samlEnabled.reset();
|
||||
samlEnabled.set(stats.SAMLenabled ? 1 : 0);
|
||||
|
||||
oidcEnabled.reset();
|
||||
oidcEnabled.set(stats.OIDCenabled ? 1 : 0);
|
||||
|
||||
clientAppsTotal.reset();
|
||||
stats.clientApps.forEach(({ range, count }) =>
|
||||
clientAppsTotal.labels({ range }).set(count),
|
||||
);
|
||||
|
||||
rateLimits.reset();
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/client/metrics',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.metricsRateLimiting.clientMetricsMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/client/register',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.metricsRateLimiting.clientRegisterMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/frontend/metrics',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(
|
||||
config.metricsRateLimiting.frontendMetricsMaxPerMinute,
|
||||
);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/frontend/register',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(
|
||||
config.metricsRateLimiting.frontendRegisterMaxPerMinute,
|
||||
);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/admin/user-admin',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.rateLimiting.createUserMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/auth/simple',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.rateLimiting.simpleLoginMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/auth/reset/password-email',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(config.rateLimiting.passwordResetMaxPerMinute);
|
||||
rateLimits
|
||||
.labels({
|
||||
endpoint: '/api/signal-endpoint/:name',
|
||||
method: 'POST',
|
||||
})
|
||||
.set(
|
||||
config.rateLimiting.callSignalEndpointMaxPerSecond * 60,
|
||||
);
|
||||
} catch (e) {}
|
||||
}
|
||||
|
||||
await schedulerService.schedule(
|
||||
collectStaticCounters.bind(this),
|
||||
hoursToMilliseconds(2),
|
||||
'collectStaticCounters',
|
||||
0, // no jitter
|
||||
);
|
||||
|
||||
eventBus.on(
|
||||
events.REQUEST_TIME,
|
||||
({ path, method, time, statusCode, appName }) => {
|
||||
@ -736,17 +562,14 @@ export default class MetricsMonitor {
|
||||
schedulerDuration.labels(jobId).observe(time);
|
||||
});
|
||||
|
||||
eventBus.on(
|
||||
events.FUNCTION_TIME,
|
||||
({ functionName, className, time }) => {
|
||||
eventBus.on(events.FUNCTION_TIME, ({ functionName, className, time }) => {
|
||||
functionDuration
|
||||
.labels({
|
||||
functionName,
|
||||
className,
|
||||
})
|
||||
.observe(time);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
eventBus.on(events.EVENTS_CREATED_BY_PROCESSED, ({ updated }) => {
|
||||
eventCreatedByMigration.inc(updated);
|
||||
@ -824,7 +647,7 @@ export default class MetricsMonitor {
|
||||
eventStore.on(
|
||||
FEATURE_STRATEGY_ADD,
|
||||
async ({ featureName, project, environment }) => {
|
||||
const environmentType = await this.resolveEnvironmentType(
|
||||
const environmentType = await resolveEnvironmentType(
|
||||
environment,
|
||||
cachedEnvironments,
|
||||
);
|
||||
@ -840,7 +663,7 @@ export default class MetricsMonitor {
|
||||
eventStore.on(
|
||||
FEATURE_STRATEGY_REMOVE,
|
||||
async ({ featureName, project, environment }) => {
|
||||
const environmentType = await this.resolveEnvironmentType(
|
||||
const environmentType = await resolveEnvironmentType(
|
||||
environment,
|
||||
cachedEnvironments,
|
||||
);
|
||||
@ -856,7 +679,7 @@ export default class MetricsMonitor {
|
||||
eventStore.on(
|
||||
FEATURE_STRATEGY_UPDATE,
|
||||
async ({ featureName, project, environment }) => {
|
||||
const environmentType = await this.resolveEnvironmentType(
|
||||
const environmentType = await resolveEnvironmentType(
|
||||
environment,
|
||||
cachedEnvironments,
|
||||
);
|
||||
@ -872,7 +695,7 @@ export default class MetricsMonitor {
|
||||
eventStore.on(
|
||||
FEATURE_ENVIRONMENT_DISABLED,
|
||||
async ({ featureName, project, environment }) => {
|
||||
const environmentType = await this.resolveEnvironmentType(
|
||||
const environmentType = await resolveEnvironmentType(
|
||||
environment,
|
||||
cachedEnvironments,
|
||||
);
|
||||
@ -888,7 +711,7 @@ export default class MetricsMonitor {
|
||||
eventStore.on(
|
||||
FEATURE_ENVIRONMENT_ENABLED,
|
||||
async ({ featureName, project, environment }) => {
|
||||
const environmentType = await this.resolveEnvironmentType(
|
||||
const environmentType = await resolveEnvironmentType(
|
||||
environment,
|
||||
cachedEnvironments,
|
||||
);
|
||||
@ -973,8 +796,7 @@ export default class MetricsMonitor {
|
||||
heartbeatEvent.metadata?.platformVersion ?? 'not-set',
|
||||
yggdrasil_version:
|
||||
heartbeatEvent.metadata?.yggdrasilVersion ?? 'not-set',
|
||||
spec_version:
|
||||
heartbeatEvent.metadata?.specVersion ?? 'not-set',
|
||||
spec_version: heartbeatEvent.metadata?.specVersion ?? 'not-set',
|
||||
});
|
||||
} else {
|
||||
clientSdkVersionUsage.increment({
|
||||
@ -996,76 +818,289 @@ export default class MetricsMonitor {
|
||||
addonEventsHandledCounter.increment({ result, destination });
|
||||
});
|
||||
|
||||
await this.configureDbMetrics(
|
||||
db,
|
||||
eventBus,
|
||||
schedulerService,
|
||||
stores.settingStore,
|
||||
return {
|
||||
collectDbMetrics: dbMetrics.refreshDbMetrics,
|
||||
collectStaticCounters: async () => {
|
||||
try {
|
||||
const [
|
||||
maxConstraintValuesResult,
|
||||
maxConstraintsPerStrategyResult,
|
||||
stageCountByProjectResult,
|
||||
stageDurationByProject,
|
||||
largestProjectEnvironments,
|
||||
largestFeatureEnvironments,
|
||||
deprecatedTokens,
|
||||
instanceOnboardingMetrics,
|
||||
projectsOnboardingMetrics,
|
||||
] = await Promise.all([
|
||||
stores.featureStrategiesReadModel.getMaxConstraintValues(),
|
||||
stores.featureStrategiesReadModel.getMaxConstraintsPerStrategy(),
|
||||
stores.featureLifecycleReadModel.getStageCountByProject(),
|
||||
stores.featureLifecycleReadModel.getAllWithStageDuration(),
|
||||
stores.largestResourcesReadModel.getLargestProjectEnvironments(
|
||||
1,
|
||||
),
|
||||
stores.largestResourcesReadModel.getLargestFeatureEnvironments(
|
||||
1,
|
||||
),
|
||||
stores.apiTokenStore.countDeprecatedTokens(),
|
||||
flagResolver.isEnabled('onboardingMetrics')
|
||||
? stores.onboardingReadModel.getInstanceOnboardingMetrics()
|
||||
: Promise.resolve({}),
|
||||
flagResolver.isEnabled('onboardingMetrics')
|
||||
? stores.onboardingReadModel.getProjectsOnboardingMetrics()
|
||||
: Promise.resolve([]),
|
||||
]);
|
||||
|
||||
featureTogglesArchivedTotal.reset();
|
||||
featureTogglesArchivedTotal.set(
|
||||
await instanceStatsService.getArchivedToggleCount(),
|
||||
);
|
||||
|
||||
usersTotal.reset();
|
||||
usersTotal.set(await instanceStatsService.getRegisteredUsers());
|
||||
|
||||
serviceAccounts.reset();
|
||||
serviceAccounts.set(
|
||||
await instanceStatsService.countServiceAccounts(),
|
||||
);
|
||||
|
||||
stageDurationByProject.forEach((stage) => {
|
||||
featureLifecycleStageDuration
|
||||
.labels({
|
||||
stage: stage.stage,
|
||||
project_id: stage.project,
|
||||
})
|
||||
.set(stage.duration);
|
||||
});
|
||||
|
||||
featureLifecycleStageCountByProject.reset();
|
||||
stageCountByProjectResult.forEach((stageResult) =>
|
||||
featureLifecycleStageCountByProject
|
||||
.labels({
|
||||
project_id: stageResult.project,
|
||||
stage: stageResult.stage,
|
||||
})
|
||||
.set(stageResult.count),
|
||||
);
|
||||
|
||||
apiTokens.reset();
|
||||
|
||||
for (const [
|
||||
type,
|
||||
value,
|
||||
] of await instanceStatsService.countApiTokensByType()) {
|
||||
apiTokens.labels({ type }).set(value);
|
||||
}
|
||||
|
||||
orphanedTokensTotal.reset();
|
||||
orphanedTokensTotal.set(deprecatedTokens.orphanedTokens);
|
||||
|
||||
orphanedTokensActive.reset();
|
||||
orphanedTokensActive.set(deprecatedTokens.activeOrphanedTokens);
|
||||
|
||||
legacyTokensTotal.reset();
|
||||
legacyTokensTotal.set(deprecatedTokens.legacyTokens);
|
||||
|
||||
legacyTokensActive.reset();
|
||||
legacyTokensActive.set(deprecatedTokens.activeLegacyTokens);
|
||||
|
||||
if (maxConstraintValuesResult) {
|
||||
maxConstraintValues.reset();
|
||||
maxConstraintValues
|
||||
.labels({
|
||||
environment: maxConstraintValuesResult.environment,
|
||||
feature: maxConstraintValuesResult.feature,
|
||||
})
|
||||
.set(maxConstraintValuesResult.count);
|
||||
}
|
||||
if (maxConstraintsPerStrategyResult) {
|
||||
maxConstraintsPerStrategy.reset();
|
||||
maxConstraintsPerStrategy
|
||||
.labels({
|
||||
environment:
|
||||
maxConstraintsPerStrategyResult.environment,
|
||||
feature: maxConstraintsPerStrategyResult.feature,
|
||||
})
|
||||
.set(maxConstraintsPerStrategyResult.count);
|
||||
}
|
||||
|
||||
if (largestProjectEnvironments.length > 0) {
|
||||
const projectEnvironment = largestProjectEnvironments[0];
|
||||
largestProjectEnvironment.reset();
|
||||
largestProjectEnvironment
|
||||
.labels({
|
||||
project: projectEnvironment.project,
|
||||
environment: projectEnvironment.environment,
|
||||
})
|
||||
.set(projectEnvironment.size);
|
||||
}
|
||||
|
||||
if (largestFeatureEnvironments.length > 0) {
|
||||
const featureEnvironment = largestFeatureEnvironments[0];
|
||||
largestFeatureEnvironment.reset();
|
||||
largestFeatureEnvironment
|
||||
.labels({
|
||||
feature: featureEnvironment.feature,
|
||||
environment: featureEnvironment.environment,
|
||||
})
|
||||
.set(featureEnvironment.size);
|
||||
}
|
||||
|
||||
Object.keys(instanceOnboardingMetrics).forEach((key) => {
|
||||
if (Number.isInteger(instanceOnboardingMetrics[key])) {
|
||||
onboardingDuration
|
||||
.labels({
|
||||
event: key,
|
||||
})
|
||||
.set(instanceOnboardingMetrics[key]);
|
||||
}
|
||||
});
|
||||
projectsOnboardingMetrics.forEach(
|
||||
({ project, ...projectMetrics }) => {
|
||||
Object.keys(projectMetrics).forEach((key) => {
|
||||
if (Number.isInteger(projectMetrics[key])) {
|
||||
projectOnboardingDuration
|
||||
.labels({ event: key, project })
|
||||
.set(projectMetrics[key]);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
for (const [resource, limit] of Object.entries(
|
||||
config.resourceLimits,
|
||||
)) {
|
||||
resourceLimit.labels({ resource }).set(limit);
|
||||
}
|
||||
|
||||
const previousDayMetricsBucketsCount =
|
||||
await instanceStatsService.countPreviousDayHourlyMetricsBuckets();
|
||||
enabledMetricsBucketsPreviousDay.reset();
|
||||
enabledMetricsBucketsPreviousDay.set(
|
||||
previousDayMetricsBucketsCount.enabledCount,
|
||||
);
|
||||
variantMetricsBucketsPreviousDay.reset();
|
||||
variantMetricsBucketsPreviousDay.set(
|
||||
previousDayMetricsBucketsCount.variantCount,
|
||||
);
|
||||
|
||||
const activeUsers = await instanceStatsService.getActiveUsers();
|
||||
usersActive7days.reset();
|
||||
usersActive7days.set(activeUsers.last7);
|
||||
usersActive30days.reset();
|
||||
usersActive30days.set(activeUsers.last30);
|
||||
usersActive60days.reset();
|
||||
usersActive60days.set(activeUsers.last60);
|
||||
usersActive90days.reset();
|
||||
usersActive90days.set(activeUsers.last90);
|
||||
|
||||
const productionChanges =
|
||||
await instanceStatsService.getProductionChanges();
|
||||
productionChanges30.reset();
|
||||
productionChanges30.set(productionChanges.last30);
|
||||
productionChanges60.reset();
|
||||
productionChanges60.set(productionChanges.last60);
|
||||
productionChanges90.reset();
|
||||
productionChanges90.set(productionChanges.last90);
|
||||
|
||||
const projects =
|
||||
await instanceStatsService.getProjectModeCount();
|
||||
projectsTotal.reset();
|
||||
projects.forEach((projectStat) => {
|
||||
projectsTotal
|
||||
.labels({ mode: projectStat.mode })
|
||||
.set(projectStat.count);
|
||||
});
|
||||
|
||||
environmentsTotal.reset();
|
||||
environmentsTotal.set(
|
||||
await instanceStatsService.environmentCount(),
|
||||
);
|
||||
|
||||
groupsTotal.reset();
|
||||
groupsTotal.set(await instanceStatsService.groupCount());
|
||||
|
||||
rolesTotal.reset();
|
||||
rolesTotal.set(await instanceStatsService.roleCount());
|
||||
|
||||
customRootRolesTotal.reset();
|
||||
customRootRolesTotal.set(
|
||||
await instanceStatsService.customRolesCount(),
|
||||
);
|
||||
|
||||
customRootRolesInUseTotal.reset();
|
||||
customRootRolesInUseTotal.set(
|
||||
await instanceStatsService.customRolesCountInUse(),
|
||||
);
|
||||
|
||||
segmentsTotal.reset();
|
||||
segmentsTotal.set(await instanceStatsService.segmentCount());
|
||||
|
||||
contextTotal.reset();
|
||||
contextTotal.set(
|
||||
await instanceStatsService.contextFieldCount(),
|
||||
);
|
||||
|
||||
strategiesTotal.reset();
|
||||
strategiesTotal.set(
|
||||
await instanceStatsService.strategiesCount(),
|
||||
);
|
||||
|
||||
samlEnabled.reset();
|
||||
samlEnabled.set((await instanceStatsService.hasSAML()) ? 1 : 0);
|
||||
|
||||
oidcEnabled.reset();
|
||||
oidcEnabled.set((await instanceStatsService.hasOIDC()) ? 1 : 0);
|
||||
} catch (e) {}
|
||||
},
|
||||
};
|
||||
}
|
||||
export default class MetricsMonitor {
|
||||
constructor() {}
|
||||
|
||||
async startMonitoring(
|
||||
config: IUnleashConfig,
|
||||
stores: IUnleashStores,
|
||||
version: string,
|
||||
eventBus: EventEmitter,
|
||||
instanceStatsService: InstanceStatsService,
|
||||
schedulerService: SchedulerService,
|
||||
db: Knex,
|
||||
): Promise<void> {
|
||||
if (!config.server.serverMetrics) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
async configureDbMetrics(
|
||||
db: Knex,
|
||||
eventBus: EventEmitter,
|
||||
schedulerService: SchedulerService,
|
||||
settingStore: ISettingStore,
|
||||
): Promise<void> {
|
||||
if (db?.client) {
|
||||
const dbPoolMin = createGauge({
|
||||
name: 'db_pool_min',
|
||||
help: 'Minimum DB pool size',
|
||||
});
|
||||
dbPoolMin.set(db.client.pool.min);
|
||||
const dbPoolMax = createGauge({
|
||||
name: 'db_pool_max',
|
||||
help: 'Maximum DB pool size',
|
||||
});
|
||||
dbPoolMax.set(db.client.pool.max);
|
||||
const dbPoolFree = createGauge({
|
||||
name: 'db_pool_free',
|
||||
help: 'Current free connections in DB pool',
|
||||
});
|
||||
const dbPoolUsed = createGauge({
|
||||
name: 'db_pool_used',
|
||||
help: 'Current connections in use in DB pool',
|
||||
});
|
||||
const dbPoolPendingCreates = createGauge({
|
||||
name: 'db_pool_pending_creates',
|
||||
help: 'how many asynchronous create calls are running in DB pool',
|
||||
});
|
||||
const dbPoolPendingAcquires = createGauge({
|
||||
name: 'db_pool_pending_acquires',
|
||||
help: 'how many acquires are waiting for a resource to be released in DB pool',
|
||||
});
|
||||
collectDefaultMetrics();
|
||||
|
||||
eventBus.on(DB_POOL_UPDATE, (data) => {
|
||||
dbPoolFree.set(data.free);
|
||||
dbPoolUsed.set(data.used);
|
||||
dbPoolPendingCreates.set(data.pendingCreates);
|
||||
dbPoolPendingAcquires.set(data.pendingAcquires);
|
||||
});
|
||||
const { collectStaticCounters, collectDbMetrics } =
|
||||
registerPrometheusMetrics(
|
||||
config,
|
||||
stores,
|
||||
version,
|
||||
eventBus,
|
||||
instanceStatsService,
|
||||
);
|
||||
|
||||
const postgresVersion = await stores.settingStore.postgresVersion();
|
||||
registerPrometheusPostgresMetrics(db, eventBus, postgresVersion);
|
||||
|
||||
await schedulerService.schedule(
|
||||
async () =>
|
||||
this.registerPoolMetrics.bind(
|
||||
this,
|
||||
db.client.pool,
|
||||
eventBus,
|
||||
),
|
||||
Promise.all([collectStaticCounters(), collectDbMetrics()]),
|
||||
hoursToMilliseconds(2),
|
||||
'collectStaticCounters',
|
||||
);
|
||||
await schedulerService.schedule(
|
||||
async () =>
|
||||
this.registerPoolMetrics.bind(this, db.client.pool, eventBus),
|
||||
minutesToMilliseconds(1),
|
||||
'registerPoolMetrics',
|
||||
0, // no jitter
|
||||
);
|
||||
const postgresVersion = await settingStore.postgresVersion();
|
||||
const database_version = createGauge({
|
||||
name: 'postgres_version',
|
||||
help: 'Which version of postgres is running (SHOW server_version)',
|
||||
labelNames: ['version'],
|
||||
});
|
||||
database_version.labels({ version: postgresVersion }).set(1);
|
||||
}
|
||||
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
|
||||
@ -1080,26 +1115,8 @@ export default class MetricsMonitor {
|
||||
// eslint-disable-next-line no-empty
|
||||
} catch (e) {}
|
||||
}
|
||||
|
||||
async resolveEnvironmentType(
|
||||
environment: string,
|
||||
cachedEnvironments: () => Promise<IEnvironment[]>,
|
||||
): Promise<string> {
|
||||
const environments = await cachedEnvironments();
|
||||
const env = environments.find((e) => e.name === environment);
|
||||
|
||||
if (env) {
|
||||
return env.type;
|
||||
} else {
|
||||
return 'unknown';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function createMetricsMonitor(): MetricsMonitor {
|
||||
return new MetricsMonitor();
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
createMetricsMonitor,
|
||||
};
|
||||
|
@ -6,10 +6,12 @@ import {
|
||||
import getLogger from '../../../fixtures/no-logger';
|
||||
import type { IUnleashStores } from '../../../../lib/types';
|
||||
import { ApiTokenType } from '../../../../lib/types/models/api-token';
|
||||
import { registerPrometheusMetrics } from '../../../../lib/metrics';
|
||||
|
||||
let app: IUnleashTest;
|
||||
let db: ITestDb;
|
||||
let stores: IUnleashStores;
|
||||
let refreshDbMetrics: () => Promise<void>;
|
||||
|
||||
beforeAll(async () => {
|
||||
db = await dbInit('instance_admin_api_serial', getLogger);
|
||||
@ -26,6 +28,15 @@ beforeAll(async () => {
|
||||
},
|
||||
db.rawDatabase,
|
||||
);
|
||||
|
||||
const { collectDbMetrics } = registerPrometheusMetrics(
|
||||
app.config,
|
||||
stores,
|
||||
undefined as unknown as string,
|
||||
app.config.eventBus,
|
||||
app.services.instanceStatsService,
|
||||
);
|
||||
refreshDbMetrics = collectDbMetrics;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
@ -39,6 +50,8 @@ test('should return instance statistics', async () => {
|
||||
createdByUserId: 9999,
|
||||
});
|
||||
|
||||
await refreshDbMetrics();
|
||||
|
||||
return app.request
|
||||
.get('/api/admin/instance-admin/statistics')
|
||||
.expect('Content-Type', /json/)
|
||||
|
Loading…
Reference in New Issue
Block a user