2021-08-12 15:04:37 +02:00
|
|
|
import { register } from 'prom-client';
|
|
|
|
import EventEmitter from 'events';
|
2024-03-18 13:58:05 +01:00
|
|
|
import type { IEventStore } from './types/stores/event-store';
|
2021-08-12 15:04:37 +02:00
|
|
|
import { createTestConfig } from '../test/config/test-config';
|
2024-03-12 13:27:04 +01:00
|
|
|
import { DB_TIME, FUNCTION_TIME, REQUEST_TIME } from './metric-events';
|
2022-07-22 11:00:22 +02:00
|
|
|
import {
|
|
|
|
CLIENT_METRICS,
|
|
|
|
CLIENT_REGISTER,
|
2024-01-09 16:33:00 +01:00
|
|
|
FEATURE_ENVIRONMENT_ENABLED,
|
2022-07-22 11:00:22 +02:00
|
|
|
FEATURE_UPDATED,
|
chore: Establish a baseline for the number of envs disabled per project (#6807)
This PR adds a counter in Prometheus for counting the number of
"environment disabled" events we get per project. The purpose of this is
to establish a baseline for one of the "project management UI" project's
key results.
## On gauges vs counters
This PR uses a counter. Using a gauge would give you the total number of
envs disabled, not the number of disable events. The difference is
subtle, but important.
For projects that were created before the new feature, the gauge might
be appropriate. Because each disabled env would require at least one
disabled event, we can get a floor of how many events were triggered for
each project.
However, for projects created after we introduce the planned change,
we're not interested in the total envs anymore, because you can disable
a hundred envs on creation with a single action. In this case, a gauge
showing 100 disabled envs would be misleading, because it didn't take
100 events to disable them.
So the interesting metric here is how many times did you specifically
disable an environment in project settings, hence the counter.
## Assumptions and future plans
To make this easier on ourselves, we make the follow assumption: people
primarily disable envs **when creating a project**.
This means that there might be a few lagging indicators granting some
projects a smaller number of events than expected, but we may be able to
filter those out.
Further, if we had a metric for each project and its creation date, we
could correlate that with the metrics to answer the question "how many
envs do people disable in the first week? Two weeks? A month?". Or
worded differently: after creating a project, how long does it take for
people to configure environments?
Similarly, if we gather that data, it will also make filtering out the
number of events for projects created **after** the new changes have
been released much easier.
The good news: Because the project creation metric with dates is a
static aggregate, it can be applied at any time, even retroactively, to
see the effects.
2024-04-10 08:49:15 +02:00
|
|
|
PROJECT_ENVIRONMENT_REMOVED,
|
2022-07-22 11:00:22 +02:00
|
|
|
} from './types/events';
|
2021-08-12 15:04:37 +02:00
|
|
|
import { createMetricsMonitor } from './metrics';
|
|
|
|
import createStores from '../test/fixtures/store';
|
2023-09-18 15:05:17 +02:00
|
|
|
import { InstanceStatsService } from './features/instance-stats/instance-stats-service';
|
2022-10-25 13:10:27 +02:00
|
|
|
import VersionService from './services/version-service';
|
2023-09-18 15:05:17 +02:00
|
|
|
import { createFakeGetActiveUsers } from './features/instance-stats/getActiveUsers';
|
2023-10-10 12:32:23 +02:00
|
|
|
import { createFakeGetProductionChanges } from './features/instance-stats/getProductionChanges';
|
2024-03-18 13:58:05 +01:00
|
|
|
import type { IEnvironmentStore, IUnleashStores } from './types';
|
2024-01-09 16:33:00 +01:00
|
|
|
import FakeEnvironmentStore from './features/project-environments/fake-environment-store';
|
2024-02-08 17:15:42 +01:00
|
|
|
import { SchedulerService } from './services';
|
|
|
|
import noLogger from '../test/fixtures/no-logger';
|
2020-12-16 14:49:11 +01:00
|
|
|
|
|
|
|
const monitor = createMetricsMonitor();
|
2021-08-12 15:04:37 +02:00
|
|
|
const eventBus = new EventEmitter();
|
|
|
|
const prometheusRegister = register;
|
2022-09-27 11:06:06 +02:00
|
|
|
let eventStore: IEventStore;
|
2024-01-09 16:33:00 +01:00
|
|
|
let environmentStore: IEnvironmentStore;
|
2022-10-25 13:10:27 +02:00
|
|
|
let statsService: InstanceStatsService;
|
2024-01-12 10:25:59 +01:00
|
|
|
let stores: IUnleashStores;
|
2024-02-08 17:15:42 +01:00
|
|
|
let schedulerService: SchedulerService;
|
|
|
|
beforeAll(async () => {
|
2021-04-22 10:07:10 +02:00
|
|
|
const config = createTestConfig({
|
|
|
|
server: {
|
|
|
|
serverMetrics: true,
|
|
|
|
},
|
|
|
|
});
|
2021-08-12 15:04:37 +02:00
|
|
|
stores = createStores();
|
2022-09-27 11:06:06 +02:00
|
|
|
eventStore = stores.eventStore;
|
2024-01-09 16:33:00 +01:00
|
|
|
environmentStore = new FakeEnvironmentStore();
|
|
|
|
stores.environmentStore = environmentStore;
|
2023-10-10 12:32:23 +02:00
|
|
|
const versionService = new VersionService(
|
|
|
|
stores,
|
|
|
|
config,
|
|
|
|
createFakeGetActiveUsers(),
|
|
|
|
createFakeGetProductionChanges(),
|
|
|
|
);
|
2023-09-18 15:05:17 +02:00
|
|
|
statsService = new InstanceStatsService(
|
|
|
|
stores,
|
|
|
|
config,
|
|
|
|
versionService,
|
|
|
|
createFakeGetActiveUsers(),
|
2023-10-10 12:32:23 +02:00
|
|
|
createFakeGetProductionChanges(),
|
2023-09-18 15:05:17 +02:00
|
|
|
);
|
|
|
|
|
2024-02-08 17:15:42 +01:00
|
|
|
schedulerService = new SchedulerService(
|
|
|
|
noLogger,
|
|
|
|
{
|
|
|
|
isMaintenanceMode: () => Promise.resolve(false),
|
|
|
|
},
|
|
|
|
eventBus,
|
|
|
|
);
|
|
|
|
|
2021-08-12 15:04:37 +02:00
|
|
|
const db = {
|
|
|
|
client: {
|
|
|
|
pool: {
|
|
|
|
min: 0,
|
|
|
|
max: 4,
|
|
|
|
numUsed: () => 2,
|
|
|
|
numFree: () => 2,
|
|
|
|
numPendingAcquires: () => 0,
|
|
|
|
numPendingCreates: () => 1,
|
2021-02-04 14:14:46 +01:00
|
|
|
},
|
2020-08-03 13:34:10 +02:00
|
|
|
},
|
|
|
|
};
|
2024-02-08 17:15:42 +01:00
|
|
|
|
|
|
|
await monitor.startMonitoring(
|
2022-10-25 13:10:27 +02:00
|
|
|
config,
|
|
|
|
stores,
|
|
|
|
'4.0.0',
|
|
|
|
eventBus,
|
|
|
|
statsService,
|
2024-02-08 17:15:42 +01:00
|
|
|
schedulerService,
|
|
|
|
// @ts-ignore - We don't want a full knex implementation for our tests, it's enough that it actually yields the numbers we want.
|
2022-10-25 13:10:27 +02:00
|
|
|
db,
|
|
|
|
);
|
2020-12-16 14:49:11 +01:00
|
|
|
});
|
2024-02-08 17:15:42 +01:00
|
|
|
|
|
|
|
afterAll(async () => {
|
|
|
|
schedulerService.stop();
|
2018-05-23 11:24:24 +02:00
|
|
|
});
|
|
|
|
|
2021-05-28 11:10:24 +02:00
|
|
|
test('should collect metrics for requests', async () => {
|
2017-06-28 10:17:14 +02:00
|
|
|
eventBus.emit(REQUEST_TIME, {
|
|
|
|
path: 'somePath',
|
|
|
|
method: 'GET',
|
|
|
|
statusCode: 200,
|
|
|
|
time: 1337,
|
|
|
|
});
|
2016-12-04 14:09:37 +01:00
|
|
|
|
2021-02-04 13:20:10 +01:00
|
|
|
const metrics = await prometheusRegister.metrics();
|
2021-05-28 11:10:24 +02:00
|
|
|
expect(metrics).toMatch(
|
2023-04-17 09:11:22 +02:00
|
|
|
/http_request_duration_milliseconds\{quantile="0\.99",path="somePath",method="GET",status="200",appName="undefined"\}.*1337/,
|
2017-06-28 10:17:14 +02:00
|
|
|
);
|
2016-12-04 14:09:37 +01:00
|
|
|
});
|
2018-05-23 11:24:24 +02:00
|
|
|
|
2021-05-28 11:10:24 +02:00
|
|
|
test('should collect metrics for updated toggles', async () => {
|
2021-08-12 15:04:37 +02:00
|
|
|
stores.eventStore.emit(FEATURE_UPDATED, {
|
2021-11-12 13:15:51 +01:00
|
|
|
featureName: 'TestToggle',
|
2022-09-08 11:01:27 +02:00
|
|
|
project: 'default',
|
2018-05-23 11:24:24 +02:00
|
|
|
data: { name: 'TestToggle' },
|
|
|
|
});
|
|
|
|
|
2021-02-04 13:20:10 +01:00
|
|
|
const metrics = await prometheusRegister.metrics();
|
2021-05-28 11:10:24 +02:00
|
|
|
expect(metrics).toMatch(
|
2024-01-09 16:33:00 +01:00
|
|
|
/feature_toggle_update_total\{toggle="TestToggle",project="default",environment="default",environmentType="production"\} 1/,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
test('should set environmentType when toggle is flipped', async () => {
|
|
|
|
await environmentStore.create({
|
|
|
|
name: 'testEnvironment',
|
|
|
|
enabled: true,
|
|
|
|
type: 'testType',
|
|
|
|
sortOrder: 1,
|
|
|
|
});
|
|
|
|
stores.eventStore.emit(FEATURE_ENVIRONMENT_ENABLED, {
|
|
|
|
featureName: 'TestToggle',
|
|
|
|
project: 'default',
|
|
|
|
environment: 'testEnvironment',
|
|
|
|
data: { name: 'TestToggle' },
|
|
|
|
});
|
|
|
|
|
|
|
|
// Wait for event to be processed, not nice, but it works.
|
|
|
|
await new Promise((done) => {
|
|
|
|
setTimeout(done, 1);
|
|
|
|
});
|
|
|
|
const metrics = await prometheusRegister.metrics();
|
|
|
|
|
|
|
|
expect(metrics).toMatch(
|
|
|
|
/feature_toggle_update_total\{toggle="TestToggle",project="default",environment="testEnvironment",environmentType="testType"\} 1/,
|
2021-05-28 11:10:24 +02:00
|
|
|
);
|
2018-05-23 11:24:24 +02:00
|
|
|
});
|
2018-11-28 15:50:49 +01:00
|
|
|
|
2021-05-28 11:10:24 +02:00
|
|
|
test('should collect metrics for client metric reports', async () => {
|
2021-12-09 21:02:58 +01:00
|
|
|
eventBus.emit(CLIENT_METRICS, {
|
2018-11-28 15:50:49 +01:00
|
|
|
bucket: {
|
|
|
|
toggles: {
|
|
|
|
TestToggle: {
|
|
|
|
yes: 10,
|
|
|
|
no: 5,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
2021-02-04 13:20:10 +01:00
|
|
|
const metrics = await prometheusRegister.metrics();
|
2021-05-28 11:10:24 +02:00
|
|
|
expect(metrics).toMatch(
|
2023-04-17 09:11:22 +02:00
|
|
|
/feature_toggle_usage_total\{toggle="TestToggle",active="true",appName="undefined"\} 10\nfeature_toggle_usage_total\{toggle="TestToggle",active="false",appName="undefined"\} 5/,
|
2018-11-28 15:50:49 +01:00
|
|
|
);
|
|
|
|
});
|
2019-08-04 11:10:51 +02:00
|
|
|
|
2021-05-28 11:10:24 +02:00
|
|
|
test('should collect metrics for db query timings', async () => {
|
2019-08-04 11:10:51 +02:00
|
|
|
eventBus.emit(DB_TIME, {
|
|
|
|
store: 'foo',
|
|
|
|
action: 'bar',
|
|
|
|
time: 0.1337,
|
|
|
|
});
|
|
|
|
|
2021-02-04 13:20:10 +01:00
|
|
|
const metrics = await prometheusRegister.metrics();
|
2021-05-28 11:10:24 +02:00
|
|
|
expect(metrics).toMatch(
|
2023-04-17 09:11:22 +02:00
|
|
|
/db_query_duration_seconds\{quantile="0\.99",store="foo",action="bar"\} 0.1337/,
|
2019-08-04 11:10:51 +02:00
|
|
|
);
|
|
|
|
});
|
2020-02-28 14:50:32 +01:00
|
|
|
|
2024-03-12 13:27:04 +01:00
|
|
|
test('should collect metrics for function timings', async () => {
|
|
|
|
eventBus.emit(FUNCTION_TIME, {
|
|
|
|
functionName: 'getToggles',
|
|
|
|
className: 'ToggleService',
|
2024-03-12 11:30:30 +01:00
|
|
|
time: 0.1337,
|
|
|
|
});
|
|
|
|
|
|
|
|
const metrics = await prometheusRegister.metrics();
|
|
|
|
expect(metrics).toMatch(
|
2024-03-12 13:27:04 +01:00
|
|
|
/function_duration_seconds\{quantile="0\.99",functionName="getToggles",className="ToggleService"\} 0.1337/,
|
2024-03-12 11:30:30 +01:00
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2021-05-28 11:10:24 +02:00
|
|
|
test('should collect metrics for feature toggle size', async () => {
|
2021-02-04 13:20:10 +01:00
|
|
|
const metrics = await prometheusRegister.metrics();
|
2023-04-17 09:11:22 +02:00
|
|
|
expect(metrics).toMatch(/feature_toggles_total\{version="(.*)"\} 0/);
|
2020-02-28 14:50:32 +01:00
|
|
|
});
|
2021-02-04 14:14:46 +01:00
|
|
|
|
2024-03-28 12:40:30 +01:00
|
|
|
test('should collect metrics for archived feature toggle size', async () => {
|
|
|
|
const metrics = await prometheusRegister.metrics();
|
|
|
|
expect(metrics).toMatch(/feature_toggles_archived_total 0/);
|
|
|
|
});
|
|
|
|
|
2023-08-07 15:59:29 +02:00
|
|
|
test('should collect metrics for total client apps', async () => {
|
2024-02-08 17:15:42 +01:00
|
|
|
await statsService.refreshAppCountSnapshot();
|
2022-12-16 12:16:51 +01:00
|
|
|
const metrics = await prometheusRegister.metrics();
|
2023-04-17 09:11:22 +02:00
|
|
|
expect(metrics).toMatch(/client_apps_total\{range="(.*)"\} 0/);
|
2022-12-16 12:16:51 +01:00
|
|
|
});
|
|
|
|
|
2021-05-28 11:10:24 +02:00
|
|
|
test('Should collect metrics for database', async () => {
|
2021-02-04 14:14:46 +01:00
|
|
|
const metrics = await prometheusRegister.metrics();
|
2021-05-28 11:10:24 +02:00
|
|
|
expect(metrics).toMatch(/db_pool_max/);
|
|
|
|
expect(metrics).toMatch(/db_pool_min/);
|
|
|
|
expect(metrics).toMatch(/db_pool_used/);
|
|
|
|
expect(metrics).toMatch(/db_pool_free/);
|
|
|
|
expect(metrics).toMatch(/db_pool_pending_creates/);
|
|
|
|
expect(metrics).toMatch(/db_pool_pending_acquires/);
|
2021-02-04 14:14:46 +01:00
|
|
|
});
|
2022-07-22 11:00:22 +02:00
|
|
|
|
|
|
|
test('Should collect metrics for client sdk versions', async () => {
|
2022-09-27 11:06:06 +02:00
|
|
|
eventStore.emit(CLIENT_REGISTER, {
|
|
|
|
sdkVersion: 'unleash-client-node:3.2.5',
|
|
|
|
});
|
|
|
|
eventStore.emit(CLIENT_REGISTER, {
|
|
|
|
sdkVersion: 'unleash-client-node:3.2.5',
|
|
|
|
});
|
|
|
|
eventStore.emit(CLIENT_REGISTER, {
|
|
|
|
sdkVersion: 'unleash-client-node:3.2.5',
|
|
|
|
});
|
|
|
|
eventStore.emit(CLIENT_REGISTER, {
|
|
|
|
sdkVersion: 'unleash-client-java:5.0.0',
|
|
|
|
});
|
|
|
|
eventStore.emit(CLIENT_REGISTER, {
|
|
|
|
sdkVersion: 'unleash-client-java:5.0.0',
|
|
|
|
});
|
|
|
|
eventStore.emit(CLIENT_REGISTER, {
|
|
|
|
sdkVersion: 'unleash-client-java:5.0.0',
|
|
|
|
});
|
2022-07-22 11:00:22 +02:00
|
|
|
const metrics = await prometheusRegister.getSingleMetricAsString(
|
|
|
|
'client_sdk_versions',
|
|
|
|
);
|
|
|
|
expect(metrics).toMatch(
|
2023-04-17 09:11:22 +02:00
|
|
|
/client_sdk_versions\{sdk_name="unleash-client-node",sdk_version="3\.2\.5"\} 3/,
|
2022-07-22 11:00:22 +02:00
|
|
|
);
|
|
|
|
expect(metrics).toMatch(
|
2023-04-17 09:11:22 +02:00
|
|
|
/client_sdk_versions\{sdk_name="unleash-client-java",sdk_version="5\.0\.0"\} 3/,
|
2022-07-22 11:00:22 +02:00
|
|
|
);
|
2022-09-27 11:06:06 +02:00
|
|
|
eventStore.emit(CLIENT_REGISTER, {
|
|
|
|
sdkVersion: 'unleash-client-node:3.2.5',
|
|
|
|
});
|
2022-07-22 11:00:22 +02:00
|
|
|
const newmetrics = await prometheusRegister.getSingleMetricAsString(
|
|
|
|
'client_sdk_versions',
|
|
|
|
);
|
|
|
|
expect(newmetrics).toMatch(
|
2023-04-17 09:11:22 +02:00
|
|
|
/client_sdk_versions\{sdk_name="unleash-client-node",sdk_version="3\.2\.5"\} 4/,
|
2022-07-22 11:00:22 +02:00
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
test('Should not collect client sdk version if sdkVersion is of wrong format or non-existent', async () => {
|
2022-09-27 11:06:06 +02:00
|
|
|
eventStore.emit(CLIENT_REGISTER, { sdkVersion: 'unleash-client-rust' });
|
|
|
|
eventStore.emit(CLIENT_REGISTER, {});
|
2022-07-22 11:00:22 +02:00
|
|
|
const metrics = await prometheusRegister.getSingleMetricAsString(
|
|
|
|
'client_sdk_versions',
|
|
|
|
);
|
|
|
|
expect(metrics).not.toMatch(/unleash-client-rust/);
|
|
|
|
});
|
chore: Establish a baseline for the number of envs disabled per project (#6807)
This PR adds a counter in Prometheus for counting the number of
"environment disabled" events we get per project. The purpose of this is
to establish a baseline for one of the "project management UI" project's
key results.
## On gauges vs counters
This PR uses a counter. Using a gauge would give you the total number of
envs disabled, not the number of disable events. The difference is
subtle, but important.
For projects that were created before the new feature, the gauge might
be appropriate. Because each disabled env would require at least one
disabled event, we can get a floor of how many events were triggered for
each project.
However, for projects created after we introduce the planned change,
we're not interested in the total envs anymore, because you can disable
a hundred envs on creation with a single action. In this case, a gauge
showing 100 disabled envs would be misleading, because it didn't take
100 events to disable them.
So the interesting metric here is how many times did you specifically
disable an environment in project settings, hence the counter.
## Assumptions and future plans
To make this easier on ourselves, we make the follow assumption: people
primarily disable envs **when creating a project**.
This means that there might be a few lagging indicators granting some
projects a smaller number of events than expected, but we may be able to
filter those out.
Further, if we had a metric for each project and its creation date, we
could correlate that with the metrics to answer the question "how many
envs do people disable in the first week? Two weeks? A month?". Or
worded differently: after creating a project, how long does it take for
people to configure environments?
Similarly, if we gather that data, it will also make filtering out the
number of events for projects created **after** the new changes have
been released much easier.
The good news: Because the project creation metric with dates is a
static aggregate, it can be applied at any time, even retroactively, to
see the effects.
2024-04-10 08:49:15 +02:00
|
|
|
|
|
|
|
test('should collect metrics for project disabled numbers', async () => {
|
|
|
|
eventStore.emit(PROJECT_ENVIRONMENT_REMOVED, {
|
|
|
|
project: 'default',
|
|
|
|
environment: 'staging',
|
|
|
|
createdBy: 'Jay',
|
|
|
|
createdByUserId: 26,
|
|
|
|
});
|
|
|
|
|
|
|
|
const recordedMetric = await prometheusRegister.getSingleMetricAsString(
|
|
|
|
'project_environments_disabled',
|
|
|
|
);
|
|
|
|
expect(recordedMetric).toMatch(
|
|
|
|
/project_environments_disabled{project_id=\"default\"} 1/,
|
|
|
|
);
|
|
|
|
});
|