2024-01-19 15:51:29 +01:00
|
|
|
import { collectDefaultMetrics } from 'prom-client';
|
2024-01-09 16:33:00 +01:00
|
|
|
import memoizee from 'memoizee';
|
2024-03-18 13:58:05 +01:00
|
|
|
import type EventEmitter from 'events';
|
|
|
|
import type { Knex } from 'knex';
|
2021-04-29 10:21:29 +02:00
|
|
|
import * as events from './metric-events';
|
2021-02-16 14:30:08 +01:00
|
|
|
import {
|
2021-11-02 15:13:46 +01:00
|
|
|
DB_POOL_UPDATE,
|
2018-05-23 11:24:24 +02:00
|
|
|
FEATURE_ARCHIVED,
|
2021-11-02 15:13:46 +01:00
|
|
|
FEATURE_CREATED,
|
2018-05-23 11:24:24 +02:00
|
|
|
FEATURE_REVIVED,
|
2021-11-12 13:15:51 +01:00
|
|
|
FEATURE_STRATEGY_ADD,
|
|
|
|
FEATURE_STRATEGY_REMOVE,
|
|
|
|
FEATURE_STRATEGY_UPDATE,
|
2022-09-08 11:01:27 +02:00
|
|
|
FEATURE_ENVIRONMENT_ENABLED,
|
|
|
|
FEATURE_ENVIRONMENT_DISABLED,
|
|
|
|
FEATURE_VARIANTS_UPDATED,
|
|
|
|
FEATURE_METADATA_UPDATED,
|
2021-11-02 15:13:46 +01:00
|
|
|
FEATURE_UPDATED,
|
2021-12-09 21:02:58 +01:00
|
|
|
CLIENT_METRICS,
|
2022-07-22 11:00:22 +02:00
|
|
|
CLIENT_REGISTER,
|
chore: Establish a baseline for the number of envs disabled per project (#6807)
This PR adds a counter in Prometheus for counting the number of
"environment disabled" events we get per project. The purpose of this is
to establish a baseline for one of the "project management UI" project's
key results.
## On gauges vs counters
This PR uses a counter. Using a gauge would give you the total number of
envs disabled, not the number of disable events. The difference is
subtle, but important.
For projects that were created before the new feature, the gauge might
be appropriate. Because each disabled env would require at least one
disabled event, we can get a floor of how many events were triggered for
each project.
However, for projects created after we introduce the planned change,
we're not interested in the total envs anymore, because you can disable
a hundred envs on creation with a single action. In this case, a gauge
showing 100 disabled envs would be misleading, because it didn't take
100 events to disable them.
So the interesting metric here is how many times did you specifically
disable an environment in project settings, hence the counter.
## Assumptions and future plans
To make this easier on ourselves, we make the follow assumption: people
primarily disable envs **when creating a project**.
This means that there might be a few lagging indicators granting some
projects a smaller number of events than expected, but we may be able to
filter those out.
Further, if we had a metric for each project and its creation date, we
could correlate that with the metrics to answer the question "how many
envs do people disable in the first week? Two weeks? A month?". Or
worded differently: after creating a project, how long does it take for
people to configure environments?
Similarly, if we gather that data, it will also make filtering out the
number of events for projects created **after** the new changes have
been released much easier.
The good news: Because the project creation metric with dates is a
static aggregate, it can be applied at any time, even retroactively, to
see the effects.
2024-04-10 08:49:15 +02:00
|
|
|
PROJECT_ENVIRONMENT_REMOVED,
|
2024-08-20 09:46:39 +02:00
|
|
|
PROJECT_CREATED,
|
|
|
|
PROJECT_ARCHIVED,
|
|
|
|
PROJECT_REVIVED,
|
|
|
|
PROJECT_DELETED,
|
2021-04-29 10:21:29 +02:00
|
|
|
} from './types/events';
|
2024-03-18 13:58:05 +01:00
|
|
|
import type { IUnleashConfig } from './types/option';
|
2024-10-18 11:11:22 +02:00
|
|
|
import type { IUnleashStores } from './types/stores';
|
2021-11-02 15:13:46 +01:00
|
|
|
import { hoursToMilliseconds, minutesToMilliseconds } from 'date-fns';
|
2024-03-18 13:58:05 +01:00
|
|
|
import type { InstanceStatsService } from './features/instance-stats/instance-stats-service';
|
2024-07-04 08:51:27 +02:00
|
|
|
import type { IEnvironment, ISdkHeartbeat } from './types';
|
2024-02-22 14:29:21 +01:00
|
|
|
import {
|
|
|
|
createCounter,
|
|
|
|
createGauge,
|
|
|
|
createSummary,
|
|
|
|
createHistogram,
|
|
|
|
} from './util/metrics';
|
2024-03-18 13:58:05 +01:00
|
|
|
import type { SchedulerService } from './services';
|
2024-05-31 11:40:46 +02:00
|
|
|
import type { IClientMetricsEnv } from './features/metrics/client-metrics/client-metrics-store-v2-type';
|
2024-10-18 11:11:22 +02:00
|
|
|
import { DbMetricsMonitor } from './metrics-gauge';
|
|
|
|
|
|
|
|
export function registerPrometheusPostgresMetrics(
|
|
|
|
db: Knex,
|
|
|
|
eventBus: EventEmitter,
|
|
|
|
postgresVersion: string,
|
|
|
|
) {
|
|
|
|
if (db?.client) {
|
|
|
|
const dbPoolMin = createGauge({
|
|
|
|
name: 'db_pool_min',
|
|
|
|
help: 'Minimum DB pool size',
|
|
|
|
});
|
|
|
|
dbPoolMin.set(db.client.pool.min);
|
|
|
|
const dbPoolMax = createGauge({
|
|
|
|
name: 'db_pool_max',
|
|
|
|
help: 'Maximum DB pool size',
|
|
|
|
});
|
|
|
|
dbPoolMax.set(db.client.pool.max);
|
|
|
|
const dbPoolFree = createGauge({
|
|
|
|
name: 'db_pool_free',
|
|
|
|
help: 'Current free connections in DB pool',
|
|
|
|
});
|
|
|
|
const dbPoolUsed = createGauge({
|
|
|
|
name: 'db_pool_used',
|
|
|
|
help: 'Current connections in use in DB pool',
|
|
|
|
});
|
|
|
|
const dbPoolPendingCreates = createGauge({
|
|
|
|
name: 'db_pool_pending_creates',
|
|
|
|
help: 'how many asynchronous create calls are running in DB pool',
|
|
|
|
});
|
|
|
|
const dbPoolPendingAcquires = createGauge({
|
|
|
|
name: 'db_pool_pending_acquires',
|
|
|
|
help: 'how many acquires are waiting for a resource to be released in DB pool',
|
|
|
|
});
|
|
|
|
|
|
|
|
eventBus.on(DB_POOL_UPDATE, (data) => {
|
|
|
|
dbPoolFree.set(data.free);
|
|
|
|
dbPoolUsed.set(data.used);
|
|
|
|
dbPoolPendingCreates.set(data.pendingCreates);
|
|
|
|
dbPoolPendingAcquires.set(data.pendingAcquires);
|
|
|
|
});
|
|
|
|
|
|
|
|
const database_version = createGauge({
|
|
|
|
name: 'postgres_version',
|
|
|
|
help: 'Which version of postgres is running (SHOW server_version)',
|
2020-12-16 14:49:11 +01:00
|
|
|
labelNames: ['version'],
|
|
|
|
});
|
2024-10-18 11:11:22 +02:00
|
|
|
database_version.labels({ version: postgresVersion }).set(1);
|
|
|
|
}
|
|
|
|
}
|
2024-07-11 11:39:38 +02:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
export function registerPrometheusMetrics(
|
|
|
|
config: IUnleashConfig,
|
|
|
|
stores: IUnleashStores,
|
|
|
|
version: string,
|
|
|
|
eventBus: EventEmitter,
|
|
|
|
instanceStatsService: InstanceStatsService,
|
|
|
|
) {
|
|
|
|
const resolveEnvironmentType = async (
|
|
|
|
environment: string,
|
|
|
|
cachedEnvironments: () => Promise<IEnvironment[]>,
|
|
|
|
): Promise<string> => {
|
|
|
|
const environments = await cachedEnvironments();
|
|
|
|
const env = environments.find((e) => e.name === environment);
|
2024-07-11 11:39:38 +02:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
if (env) {
|
|
|
|
return env.type;
|
|
|
|
} else {
|
|
|
|
return 'unknown';
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
const { eventStore, environmentStore } = stores;
|
2024-10-28 11:47:58 +01:00
|
|
|
const { flagResolver } = config;
|
2024-10-18 11:11:22 +02:00
|
|
|
const dbMetrics = new DbMetricsMonitor(config);
|
|
|
|
|
|
|
|
const cachedEnvironments: () => Promise<IEnvironment[]> = memoizee(
|
|
|
|
async () => environmentStore.getAll(),
|
|
|
|
{
|
|
|
|
promise: true,
|
|
|
|
maxAge: hoursToMilliseconds(1),
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
|
|
|
const requestDuration = createSummary({
|
|
|
|
name: 'http_request_duration_milliseconds',
|
|
|
|
help: 'App response time',
|
|
|
|
labelNames: ['path', 'method', 'status', 'appName'],
|
|
|
|
percentiles: [0.1, 0.5, 0.9, 0.95, 0.99],
|
|
|
|
maxAgeSeconds: 600,
|
|
|
|
ageBuckets: 5,
|
|
|
|
});
|
|
|
|
const schedulerDuration = createSummary({
|
|
|
|
name: 'scheduler_duration_seconds',
|
|
|
|
help: 'Scheduler duration time',
|
|
|
|
labelNames: ['jobId'],
|
|
|
|
percentiles: [0.1, 0.5, 0.9, 0.95, 0.99],
|
|
|
|
maxAgeSeconds: 600,
|
|
|
|
ageBuckets: 5,
|
|
|
|
});
|
|
|
|
const dbDuration = createSummary({
|
|
|
|
name: 'db_query_duration_seconds',
|
|
|
|
help: 'DB query duration time',
|
|
|
|
labelNames: ['store', 'action'],
|
|
|
|
percentiles: [0.1, 0.5, 0.9, 0.95, 0.99],
|
|
|
|
maxAgeSeconds: 600,
|
|
|
|
ageBuckets: 5,
|
|
|
|
});
|
|
|
|
const functionDuration = createSummary({
|
|
|
|
name: 'function_duration_seconds',
|
|
|
|
help: 'Function duration time',
|
|
|
|
labelNames: ['functionName', 'className'],
|
|
|
|
percentiles: [0.1, 0.5, 0.9, 0.95, 0.99],
|
|
|
|
maxAgeSeconds: 600,
|
|
|
|
ageBuckets: 5,
|
|
|
|
});
|
|
|
|
const featureFlagUpdateTotal = createCounter({
|
|
|
|
name: 'feature_toggle_update_total',
|
2024-11-26 08:57:43 +01:00
|
|
|
help: 'Number of times a flag has been updated. Environment label would be "n/a" when it is not available, e.g. when a feature flag is created.',
|
2024-10-18 11:11:22 +02:00
|
|
|
labelNames: [
|
|
|
|
'toggle',
|
|
|
|
'project',
|
|
|
|
'environment',
|
|
|
|
'environmentType',
|
|
|
|
'action',
|
|
|
|
],
|
|
|
|
});
|
|
|
|
const featureFlagUsageTotal = createCounter({
|
|
|
|
name: 'feature_toggle_usage_total',
|
|
|
|
help: 'Number of times a feature flag has been used',
|
|
|
|
labelNames: ['toggle', 'active', 'appName'],
|
|
|
|
});
|
|
|
|
|
|
|
|
dbMetrics.registerGaugeDbMetric({
|
|
|
|
name: 'feature_toggles_total',
|
|
|
|
help: 'Number of feature flags',
|
|
|
|
labelNames: ['version'],
|
|
|
|
query: () => instanceStatsService.getToggleCount(),
|
|
|
|
map: (value) => ({ value, labels: { version } }),
|
|
|
|
});
|
|
|
|
|
|
|
|
dbMetrics.registerGaugeDbMetric({
|
|
|
|
name: 'max_feature_environment_strategies',
|
|
|
|
help: 'Maximum number of environment strategies in one feature',
|
|
|
|
labelNames: ['feature', 'environment'],
|
|
|
|
query: () =>
|
|
|
|
stores.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(),
|
|
|
|
map: (result) => ({
|
|
|
|
value: result.count,
|
|
|
|
labels: {
|
|
|
|
environment: result.environment,
|
|
|
|
feature: result.feature,
|
|
|
|
},
|
|
|
|
}),
|
|
|
|
});
|
|
|
|
|
|
|
|
dbMetrics.registerGaugeDbMetric({
|
|
|
|
name: 'max_feature_strategies',
|
|
|
|
help: 'Maximum number of strategies in one feature',
|
|
|
|
labelNames: ['feature'],
|
|
|
|
query: () =>
|
|
|
|
stores.featureStrategiesReadModel.getMaxFeatureStrategies(),
|
|
|
|
map: (result) => ({
|
|
|
|
value: result.count,
|
|
|
|
labels: { feature: result.feature },
|
|
|
|
}),
|
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'max_constraint_values',
|
|
|
|
help: 'Maximum number of constraint values used in a single constraint',
|
|
|
|
labelNames: ['feature', 'environment'],
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => stores.featureStrategiesReadModel.getMaxConstraintValues(),
|
|
|
|
map: (result) => ({
|
|
|
|
value: result.count,
|
|
|
|
labels: {
|
|
|
|
environment: result.environment,
|
|
|
|
feature: result.feature,
|
|
|
|
},
|
|
|
|
}),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
2024-10-22 15:11:57 +02:00
|
|
|
|
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'max_strategy_constraints',
|
|
|
|
help: 'Maximum number of constraints used on a single strategy',
|
|
|
|
labelNames: ['feature', 'environment'],
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () =>
|
|
|
|
stores.featureStrategiesReadModel.getMaxConstraintsPerStrategy(),
|
|
|
|
map: (result) => ({
|
|
|
|
value: result.count,
|
|
|
|
labels: {
|
|
|
|
environment: result.environment,
|
|
|
|
feature: result.feature,
|
|
|
|
},
|
|
|
|
}),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
2024-10-22 15:11:57 +02:00
|
|
|
|
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'largest_project_environment_size',
|
|
|
|
help: 'The largest project environment size (bytes) based on strategies, constraints, variants and parameters',
|
|
|
|
labelNames: ['project', 'environment'],
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () =>
|
|
|
|
stores.largestResourcesReadModel.getLargestProjectEnvironments(1),
|
|
|
|
map: (results) => {
|
|
|
|
const result = results[0];
|
|
|
|
return {
|
|
|
|
value: result.size,
|
|
|
|
labels: {
|
|
|
|
project: result.project,
|
|
|
|
environment: result.environment,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
},
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'largest_feature_environment_size',
|
|
|
|
help: 'The largest feature environment size (bytes) base on strategies, constraints, variants and parameters',
|
|
|
|
labelNames: ['feature', 'environment'],
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () =>
|
|
|
|
stores.largestResourcesReadModel.getLargestFeatureEnvironments(1),
|
|
|
|
map: (results) => {
|
|
|
|
const result = results[0];
|
|
|
|
return {
|
|
|
|
value: result.size,
|
|
|
|
labels: {
|
|
|
|
feature: result.feature,
|
|
|
|
environment: result.environment,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
},
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
const featureTogglesArchivedTotal = createGauge({
|
|
|
|
name: 'feature_toggles_archived_total',
|
|
|
|
help: 'Number of archived feature flags',
|
|
|
|
});
|
|
|
|
const usersTotal = createGauge({
|
|
|
|
name: 'users_total',
|
|
|
|
help: 'Number of users',
|
|
|
|
});
|
2024-10-25 14:43:14 +02:00
|
|
|
const trafficTotal = createGauge({
|
|
|
|
name: 'traffic_total',
|
|
|
|
help: 'Traffic used current month',
|
|
|
|
});
|
2024-10-18 11:11:22 +02:00
|
|
|
const serviceAccounts = createGauge({
|
|
|
|
name: 'service_accounts_total',
|
|
|
|
help: 'Number of service accounts',
|
|
|
|
});
|
|
|
|
const apiTokens = createGauge({
|
|
|
|
name: 'api_tokens_total',
|
|
|
|
help: 'Number of API tokens',
|
|
|
|
labelNames: ['type'],
|
|
|
|
});
|
|
|
|
const enabledMetricsBucketsPreviousDay = createGauge({
|
|
|
|
name: 'enabled_metrics_buckets_previous_day',
|
|
|
|
help: 'Number of hourly enabled/disabled metric buckets in the previous day',
|
|
|
|
});
|
|
|
|
const variantMetricsBucketsPreviousDay = createGauge({
|
|
|
|
name: 'variant_metrics_buckets_previous_day',
|
|
|
|
help: 'Number of hourly variant metric buckets in the previous day',
|
|
|
|
});
|
|
|
|
const usersActive7days = createGauge({
|
|
|
|
name: 'users_active_7',
|
|
|
|
help: 'Number of users active in the last 7 days',
|
|
|
|
});
|
|
|
|
const usersActive30days = createGauge({
|
|
|
|
name: 'users_active_30',
|
|
|
|
help: 'Number of users active in the last 30 days',
|
|
|
|
});
|
|
|
|
const usersActive60days = createGauge({
|
|
|
|
name: 'users_active_60',
|
|
|
|
help: 'Number of users active in the last 60 days',
|
|
|
|
});
|
|
|
|
const usersActive90days = createGauge({
|
|
|
|
name: 'users_active_90',
|
|
|
|
help: 'Number of users active in the last 90 days',
|
|
|
|
});
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'projects_total',
|
|
|
|
help: 'Number of projects',
|
|
|
|
labelNames: ['mode'],
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.getProjectModeCount(),
|
|
|
|
map: (projects) =>
|
|
|
|
projects.map((projectStat) => ({
|
|
|
|
value: projectStat.count,
|
|
|
|
labels: { mode: projectStat.mode },
|
|
|
|
})),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
2024-10-22 15:11:57 +02:00
|
|
|
|
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'environments_total',
|
|
|
|
help: 'Number of environments',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.environmentCount(),
|
|
|
|
map: (result) => ({ value: result }),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'groups_total',
|
|
|
|
help: 'Number of groups',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.groupCount(),
|
|
|
|
map: (result) => ({ value: result }),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'roles_total',
|
|
|
|
help: 'Number of roles',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.roleCount(),
|
|
|
|
map: (result) => ({ value: result }),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'custom_root_roles_total',
|
|
|
|
help: 'Number of custom root roles',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.customRolesCount(),
|
|
|
|
map: (result) => ({ value: result }),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'custom_root_roles_in_use_total',
|
|
|
|
help: 'Number of custom root roles in use',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.customRolesCountInUse(),
|
|
|
|
map: (result) => ({ value: result }),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'segments_total',
|
|
|
|
help: 'Number of segments',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.segmentCount(),
|
|
|
|
map: (result) => ({ value: result }),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'context_total',
|
|
|
|
help: 'Number of context',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.contextFieldCount(),
|
|
|
|
map: (result) => ({ value: result }),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'strategies_total',
|
|
|
|
help: 'Number of strategies',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.strategiesCount(),
|
|
|
|
map: (result) => ({ value: result }),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
dbMetrics.registerGaugeDbMetric({
|
|
|
|
name: 'client_apps_total',
|
|
|
|
help: 'Number of registered client apps aggregated by range by last seen',
|
|
|
|
labelNames: ['range'],
|
|
|
|
query: () => instanceStatsService.getLabeledAppCounts(),
|
|
|
|
map: (result) =>
|
|
|
|
Object.entries(result).map(([range, count]) => ({
|
|
|
|
value: count,
|
|
|
|
labels: { range },
|
|
|
|
})),
|
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'saml_enabled',
|
|
|
|
help: 'Whether SAML is enabled',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.hasSAML(),
|
|
|
|
map: (result) => ({ value: result ? 1 : 0 }),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'oidc_enabled',
|
|
|
|
help: 'Whether OIDC is enabled',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => instanceStatsService.hasOIDC(),
|
|
|
|
map: (result) => ({ value: result ? 1 : 0 }),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
2024-11-13 10:07:06 +01:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-11-13 12:10:53 +01:00
|
|
|
name: 'password_auth_enabled',
|
2024-11-13 10:07:06 +01:00
|
|
|
help: 'Whether password auth is enabled',
|
|
|
|
query: () => instanceStatsService.hasPasswordAuth(),
|
|
|
|
map: (result) => ({ value: result ? 1 : 0 }),
|
|
|
|
});
|
|
|
|
|
|
|
|
dbMetrics.registerGaugeDbMetric({
|
|
|
|
name: 'scim_enabled',
|
|
|
|
help: 'Whether SCIM is enabled',
|
|
|
|
query: () => instanceStatsService.hasSCIM(),
|
|
|
|
map: (result) => ({ value: result ? 1 : 0 }),
|
|
|
|
});
|
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
const clientSdkVersionUsage = createCounter({
|
|
|
|
name: 'client_sdk_versions',
|
|
|
|
help: 'Which sdk versions are being used',
|
|
|
|
labelNames: [
|
|
|
|
'sdk_name',
|
|
|
|
'sdk_version',
|
|
|
|
'platform_name',
|
|
|
|
'platform_version',
|
|
|
|
'yggdrasil_version',
|
|
|
|
'spec_version',
|
|
|
|
],
|
|
|
|
});
|
|
|
|
|
|
|
|
const productionChanges30 = createGauge({
|
|
|
|
name: 'production_changes_30',
|
|
|
|
help: 'Changes made to production environment last 30 days',
|
|
|
|
labelNames: ['environment'],
|
|
|
|
});
|
|
|
|
const productionChanges60 = createGauge({
|
|
|
|
name: 'production_changes_60',
|
|
|
|
help: 'Changes made to production environment last 60 days',
|
|
|
|
labelNames: ['environment'],
|
|
|
|
});
|
|
|
|
const productionChanges90 = createGauge({
|
|
|
|
name: 'production_changes_90',
|
|
|
|
help: 'Changes made to production environment last 90 days',
|
|
|
|
labelNames: ['environment'],
|
|
|
|
});
|
|
|
|
|
|
|
|
const rateLimits = createGauge({
|
|
|
|
name: 'rate_limits',
|
|
|
|
help: 'Rate limits (per minute) for METHOD/ENDPOINT pairs',
|
|
|
|
labelNames: ['endpoint', 'method'],
|
|
|
|
});
|
|
|
|
rateLimits
|
|
|
|
.labels({
|
|
|
|
endpoint: '/api/client/metrics',
|
|
|
|
method: 'POST',
|
|
|
|
})
|
|
|
|
.set(config.metricsRateLimiting.clientMetricsMaxPerMinute);
|
|
|
|
rateLimits
|
|
|
|
.labels({
|
|
|
|
endpoint: '/api/client/register',
|
|
|
|
method: 'POST',
|
|
|
|
})
|
|
|
|
.set(config.metricsRateLimiting.clientRegisterMaxPerMinute);
|
|
|
|
rateLimits
|
|
|
|
.labels({
|
|
|
|
endpoint: '/api/frontend/metrics',
|
|
|
|
method: 'POST',
|
|
|
|
})
|
|
|
|
.set(config.metricsRateLimiting.frontendMetricsMaxPerMinute);
|
|
|
|
rateLimits
|
|
|
|
.labels({
|
|
|
|
endpoint: '/api/frontend/register',
|
|
|
|
method: 'POST',
|
|
|
|
})
|
|
|
|
.set(config.metricsRateLimiting.frontendRegisterMaxPerMinute);
|
|
|
|
rateLimits
|
|
|
|
.labels({
|
|
|
|
endpoint: '/api/admin/user-admin',
|
|
|
|
method: 'POST',
|
|
|
|
})
|
|
|
|
.set(config.rateLimiting.createUserMaxPerMinute);
|
|
|
|
rateLimits
|
|
|
|
.labels({
|
|
|
|
endpoint: '/auth/simple',
|
|
|
|
method: 'POST',
|
|
|
|
})
|
|
|
|
.set(config.rateLimiting.simpleLoginMaxPerMinute);
|
|
|
|
rateLimits
|
|
|
|
.labels({
|
|
|
|
endpoint: '/auth/reset/password-email',
|
|
|
|
method: 'POST',
|
|
|
|
})
|
|
|
|
.set(config.rateLimiting.passwordResetMaxPerMinute);
|
|
|
|
rateLimits
|
|
|
|
.labels({
|
|
|
|
endpoint: '/api/signal-endpoint/:name',
|
|
|
|
method: 'POST',
|
|
|
|
})
|
|
|
|
.set(config.rateLimiting.callSignalEndpointMaxPerSecond * 60);
|
|
|
|
|
2024-12-16 10:48:33 +01:00
|
|
|
const namePrefixUsed = createCounter({
|
|
|
|
name: 'nameprefix_count',
|
|
|
|
help: 'Count of nameprefix usage in client api',
|
|
|
|
});
|
|
|
|
|
|
|
|
const tagsUsed = createCounter({
|
|
|
|
name: 'tags_count',
|
|
|
|
help: 'Count of tags usage in client api',
|
|
|
|
});
|
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
const featureCreatedByMigration = createCounter({
|
|
|
|
name: 'feature_created_by_migration_count',
|
|
|
|
help: 'Feature createdBy migration count',
|
|
|
|
});
|
|
|
|
const eventCreatedByMigration = createCounter({
|
|
|
|
name: 'event_created_by_migration_count',
|
|
|
|
help: 'Event createdBy migration count',
|
|
|
|
});
|
|
|
|
const proxyRepositoriesCreated = createCounter({
|
|
|
|
name: 'proxy_repositories_created',
|
|
|
|
help: 'Proxy repositories created',
|
|
|
|
});
|
|
|
|
const frontendApiRepositoriesCreated = createCounter({
|
|
|
|
name: 'frontend_api_repositories_created',
|
|
|
|
help: 'Frontend API repositories created',
|
|
|
|
});
|
|
|
|
const mapFeaturesForClientDuration = createHistogram({
|
|
|
|
name: 'map_features_for_client_duration',
|
|
|
|
help: 'Duration of mapFeaturesForClient function',
|
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'feature_lifecycle_stage_duration',
|
|
|
|
labelNames: ['stage', 'project_id'],
|
|
|
|
help: 'Duration of feature lifecycle stages',
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => stores.featureLifecycleReadModel.getAllWithStageDuration(),
|
|
|
|
map: (result) =>
|
|
|
|
result.map((stageResult) => ({
|
|
|
|
value: stageResult.duration,
|
|
|
|
labels: {
|
|
|
|
project_id: stageResult.project,
|
|
|
|
stage: stageResult.stage,
|
|
|
|
},
|
|
|
|
})),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'onboarding_duration',
|
|
|
|
labelNames: ['event'],
|
|
|
|
help: 'firstLogin, secondLogin, firstFeatureFlag, firstPreLive, firstLive from first user creation',
|
2024-10-28 11:47:58 +01:00
|
|
|
query: () => stores.onboardingReadModel.getInstanceOnboardingMetrics(),
|
2024-10-22 15:11:57 +02:00
|
|
|
map: (result) =>
|
|
|
|
Object.keys(result)
|
|
|
|
.filter((key) => Number.isInteger(result[key]))
|
|
|
|
.map((key) => ({
|
|
|
|
value: result[key],
|
|
|
|
labels: {
|
|
|
|
event: key,
|
|
|
|
},
|
|
|
|
})),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
2024-10-22 15:11:57 +02:00
|
|
|
|
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'project_onboarding_duration',
|
|
|
|
labelNames: ['event', 'project'],
|
|
|
|
help: 'firstFeatureFlag, firstPreLive, firstLive from project creation',
|
2024-10-28 11:47:58 +01:00
|
|
|
query: () => stores.onboardingReadModel.getProjectsOnboardingMetrics(),
|
2024-10-22 15:11:57 +02:00
|
|
|
map: (projectsOnboardingMetrics) =>
|
|
|
|
projectsOnboardingMetrics.flatMap(
|
|
|
|
({ project, ...projectMetrics }) =>
|
|
|
|
Object.keys(projectMetrics)
|
|
|
|
.filter((key) => Number.isInteger(projectMetrics[key]))
|
|
|
|
.map((key) => ({
|
|
|
|
value: projectMetrics[key],
|
|
|
|
labels: {
|
|
|
|
event: key,
|
|
|
|
project,
|
|
|
|
},
|
|
|
|
})),
|
|
|
|
),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
dbMetrics.registerGaugeDbMetric({
|
2024-10-18 11:11:22 +02:00
|
|
|
name: 'feature_lifecycle_stage_count_by_project',
|
|
|
|
help: 'Count features in a given stage by project id',
|
|
|
|
labelNames: ['stage', 'project_id'],
|
2024-10-22 15:11:57 +02:00
|
|
|
query: () => stores.featureLifecycleReadModel.getStageCountByProject(),
|
|
|
|
map: (result) =>
|
|
|
|
result.map((stageResult) => ({
|
|
|
|
value: stageResult.count,
|
|
|
|
labels: {
|
|
|
|
project_id: stageResult.project,
|
|
|
|
stage: stageResult.stage,
|
|
|
|
},
|
|
|
|
})),
|
2024-10-18 11:11:22 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
const featureLifecycleStageEnteredCounter = createCounter({
|
|
|
|
name: 'feature_lifecycle_stage_entered',
|
|
|
|
help: 'Count how many features entered a given stage',
|
|
|
|
labelNames: ['stage'],
|
|
|
|
});
|
|
|
|
|
|
|
|
const projectActionsCounter = createCounter({
|
|
|
|
name: 'project_actions_count',
|
|
|
|
help: 'Count project actions',
|
|
|
|
labelNames: ['action'],
|
|
|
|
});
|
|
|
|
|
|
|
|
const projectEnvironmentsDisabled = createCounter({
|
|
|
|
name: 'project_environments_disabled',
|
|
|
|
help: 'How many "environment disabled" events we have received for each project',
|
|
|
|
labelNames: ['project_id'],
|
|
|
|
});
|
|
|
|
|
|
|
|
const orphanedTokensTotal = createGauge({
|
|
|
|
name: 'orphaned_api_tokens_total',
|
|
|
|
help: 'Number of API tokens without a project',
|
|
|
|
});
|
|
|
|
|
2024-12-19 12:15:30 +01:00
|
|
|
const clientFeaturesMemory = createGauge({
|
|
|
|
name: 'client_features_memory',
|
|
|
|
help: 'The amount of memory client features endpoint is using for caching',
|
|
|
|
});
|
|
|
|
|
|
|
|
const clientDeltaMemory = createGauge({
|
|
|
|
name: 'client_delta_memory',
|
|
|
|
help: 'The amount of memory client features delta endpoint is using for caching',
|
|
|
|
});
|
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
const orphanedTokensActive = createGauge({
|
|
|
|
name: 'orphaned_api_tokens_active',
|
|
|
|
help: 'Number of API tokens without a project, last seen within 3 months',
|
|
|
|
});
|
|
|
|
|
|
|
|
const legacyTokensTotal = createGauge({
|
|
|
|
name: 'legacy_api_tokens_total',
|
|
|
|
help: 'Number of API tokens with v1 format',
|
|
|
|
});
|
|
|
|
|
|
|
|
const legacyTokensActive = createGauge({
|
|
|
|
name: 'legacy_api_tokens_active',
|
|
|
|
help: 'Number of API tokens with v1 format, last seen within 3 months',
|
|
|
|
});
|
|
|
|
|
|
|
|
const exceedsLimitErrorCounter = createCounter({
|
|
|
|
name: 'exceeds_limit_error',
|
|
|
|
help: 'The number of exceeds limit errors registered by this instance.',
|
|
|
|
labelNames: ['resource', 'limit'],
|
|
|
|
});
|
|
|
|
|
|
|
|
const requestOriginCounter = createCounter({
|
|
|
|
name: 'request_origin_counter',
|
|
|
|
help: 'Number of authenticated requests, including origin information.',
|
|
|
|
labelNames: ['type', 'method', 'source'],
|
|
|
|
});
|
|
|
|
|
|
|
|
const resourceLimit = createGauge({
|
|
|
|
name: 'resource_limit',
|
|
|
|
help: 'The maximum number of resources allowed.',
|
|
|
|
labelNames: ['resource'],
|
|
|
|
});
|
2024-10-22 15:11:57 +02:00
|
|
|
for (const [resource, limit] of Object.entries(config.resourceLimits)) {
|
|
|
|
resourceLimit.labels({ resource }).set(limit);
|
|
|
|
}
|
2024-10-18 11:11:22 +02:00
|
|
|
|
2024-11-13 15:00:47 +01:00
|
|
|
const licensedUsers = createGauge({
|
|
|
|
name: 'licensed_users',
|
2024-12-17 11:39:54 +01:00
|
|
|
help: 'The number of seats used.',
|
2024-11-13 15:00:47 +01:00
|
|
|
});
|
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
const addonEventsHandledCounter = createCounter({
|
|
|
|
name: 'addon_events_handled',
|
|
|
|
help: 'Events handled by addons and the result.',
|
|
|
|
labelNames: ['result', 'destination'],
|
|
|
|
});
|
|
|
|
|
|
|
|
// register event listeners
|
|
|
|
eventBus.on(
|
|
|
|
events.EXCEEDS_LIMIT,
|
|
|
|
({ resource, limit }: { resource: string; limit: number }) => {
|
|
|
|
exceedsLimitErrorCounter.increment({ resource, limit });
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
|
|
|
eventBus.on(
|
|
|
|
events.STAGE_ENTERED,
|
|
|
|
(entered: { stage: string; feature: string }) => {
|
|
|
|
featureLifecycleStageEnteredCounter.increment({
|
|
|
|
stage: entered.stage,
|
|
|
|
});
|
|
|
|
},
|
|
|
|
);
|
2024-07-11 11:39:38 +02:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
eventBus.on(
|
|
|
|
events.REQUEST_TIME,
|
|
|
|
({ path, method, time, statusCode, appName }) => {
|
|
|
|
requestDuration
|
|
|
|
.labels({
|
|
|
|
path,
|
|
|
|
method,
|
|
|
|
status: statusCode,
|
|
|
|
appName,
|
|
|
|
})
|
|
|
|
.observe(time);
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
|
|
|
eventBus.on(events.SCHEDULER_JOB_TIME, ({ jobId, time }) => {
|
|
|
|
schedulerDuration.labels(jobId).observe(time);
|
|
|
|
});
|
|
|
|
|
|
|
|
eventBus.on(events.FUNCTION_TIME, ({ functionName, className, time }) => {
|
|
|
|
functionDuration
|
|
|
|
.labels({
|
|
|
|
functionName,
|
|
|
|
className,
|
|
|
|
})
|
|
|
|
.observe(time);
|
|
|
|
});
|
|
|
|
|
|
|
|
eventBus.on(events.EVENTS_CREATED_BY_PROCESSED, ({ updated }) => {
|
|
|
|
eventCreatedByMigration.inc(updated);
|
|
|
|
});
|
|
|
|
|
|
|
|
eventBus.on(events.FEATURES_CREATED_BY_PROCESSED, ({ updated }) => {
|
|
|
|
featureCreatedByMigration.inc(updated);
|
|
|
|
});
|
|
|
|
|
|
|
|
eventBus.on(events.DB_TIME, ({ store, action, time }) => {
|
|
|
|
dbDuration
|
|
|
|
.labels({
|
|
|
|
store,
|
|
|
|
action,
|
|
|
|
})
|
|
|
|
.observe(time);
|
|
|
|
});
|
|
|
|
|
|
|
|
eventBus.on(events.PROXY_REPOSITORY_CREATED, () => {
|
|
|
|
proxyRepositoriesCreated.inc();
|
|
|
|
});
|
|
|
|
|
|
|
|
eventBus.on(events.FRONTEND_API_REPOSITORY_CREATED, () => {
|
|
|
|
frontendApiRepositoriesCreated.inc();
|
|
|
|
});
|
|
|
|
|
|
|
|
eventBus.on(events.PROXY_FEATURES_FOR_TOKEN_TIME, ({ duration }) => {
|
|
|
|
mapFeaturesForClientDuration.observe(duration);
|
|
|
|
});
|
|
|
|
|
2024-12-16 10:48:33 +01:00
|
|
|
eventBus.on(events.CLIENT_METRICS_NAMEPREFIX, () => {
|
|
|
|
namePrefixUsed.inc();
|
|
|
|
});
|
|
|
|
|
|
|
|
eventBus.on(events.CLIENT_METRICS_TAGS, () => {
|
|
|
|
tagsUsed.inc();
|
|
|
|
});
|
|
|
|
|
2024-12-19 12:15:30 +01:00
|
|
|
eventBus.on(events.CLIENT_FEATURES_MEMORY, (event: { memory: number }) => {
|
|
|
|
clientFeaturesMemory.reset();
|
|
|
|
clientFeaturesMemory.set(event.memory);
|
|
|
|
});
|
|
|
|
|
|
|
|
eventBus.on(events.CLIENT_DELTA_MEMORY, (event: { memory: number }) => {
|
|
|
|
clientDeltaMemory.reset();
|
|
|
|
clientDeltaMemory.set(event.memory);
|
|
|
|
});
|
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
events.onMetricEvent(
|
|
|
|
eventBus,
|
|
|
|
events.REQUEST_ORIGIN,
|
|
|
|
({ type, method, source }) => {
|
|
|
|
requestOriginCounter.increment({ type, method, source });
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
|
|
|
eventStore.on(FEATURE_CREATED, ({ featureName, project }) => {
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment: 'n/a',
|
|
|
|
environmentType: 'n/a',
|
|
|
|
action: 'created',
|
|
|
|
});
|
|
|
|
});
|
|
|
|
eventStore.on(FEATURE_VARIANTS_UPDATED, ({ featureName, project }) => {
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment: 'n/a',
|
|
|
|
environmentType: 'n/a',
|
|
|
|
action: 'updated',
|
|
|
|
});
|
|
|
|
});
|
|
|
|
eventStore.on(FEATURE_METADATA_UPDATED, ({ featureName, project }) => {
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment: 'n/a',
|
|
|
|
environmentType: 'n/a',
|
|
|
|
action: 'updated',
|
|
|
|
});
|
|
|
|
});
|
|
|
|
eventStore.on(FEATURE_UPDATED, ({ featureName, project }) => {
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment: 'default',
|
|
|
|
environmentType: 'production',
|
|
|
|
action: 'updated',
|
|
|
|
});
|
|
|
|
});
|
|
|
|
eventStore.on(
|
|
|
|
FEATURE_STRATEGY_ADD,
|
|
|
|
async ({ featureName, project, environment }) => {
|
|
|
|
const environmentType = await resolveEnvironmentType(
|
|
|
|
environment,
|
|
|
|
cachedEnvironments,
|
|
|
|
);
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment,
|
|
|
|
environmentType,
|
|
|
|
action: 'updated',
|
|
|
|
});
|
|
|
|
},
|
|
|
|
);
|
|
|
|
eventStore.on(
|
|
|
|
FEATURE_STRATEGY_REMOVE,
|
|
|
|
async ({ featureName, project, environment }) => {
|
|
|
|
const environmentType = await resolveEnvironmentType(
|
|
|
|
environment,
|
|
|
|
cachedEnvironments,
|
|
|
|
);
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment,
|
|
|
|
environmentType,
|
|
|
|
action: 'updated',
|
|
|
|
});
|
|
|
|
},
|
|
|
|
);
|
|
|
|
eventStore.on(
|
|
|
|
FEATURE_STRATEGY_UPDATE,
|
|
|
|
async ({ featureName, project, environment }) => {
|
|
|
|
const environmentType = await resolveEnvironmentType(
|
|
|
|
environment,
|
|
|
|
cachedEnvironments,
|
|
|
|
);
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment,
|
|
|
|
environmentType,
|
|
|
|
action: 'updated',
|
|
|
|
});
|
|
|
|
},
|
|
|
|
);
|
|
|
|
eventStore.on(
|
|
|
|
FEATURE_ENVIRONMENT_DISABLED,
|
|
|
|
async ({ featureName, project, environment }) => {
|
|
|
|
const environmentType = await resolveEnvironmentType(
|
|
|
|
environment,
|
|
|
|
cachedEnvironments,
|
|
|
|
);
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment,
|
|
|
|
environmentType,
|
|
|
|
action: 'updated',
|
|
|
|
});
|
|
|
|
},
|
|
|
|
);
|
|
|
|
eventStore.on(
|
|
|
|
FEATURE_ENVIRONMENT_ENABLED,
|
|
|
|
async ({ featureName, project, environment }) => {
|
|
|
|
const environmentType = await resolveEnvironmentType(
|
|
|
|
environment,
|
|
|
|
cachedEnvironments,
|
|
|
|
);
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment,
|
|
|
|
environmentType,
|
|
|
|
action: 'updated',
|
|
|
|
});
|
|
|
|
},
|
|
|
|
);
|
|
|
|
eventStore.on(FEATURE_ARCHIVED, ({ featureName, project }) => {
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment: 'n/a',
|
|
|
|
environmentType: 'n/a',
|
|
|
|
action: 'archived',
|
|
|
|
});
|
|
|
|
});
|
|
|
|
eventStore.on(FEATURE_REVIVED, ({ featureName, project }) => {
|
|
|
|
featureFlagUpdateTotal.increment({
|
|
|
|
toggle: featureName,
|
|
|
|
project,
|
|
|
|
environment: 'n/a',
|
|
|
|
environmentType: 'n/a',
|
|
|
|
action: 'revived',
|
|
|
|
});
|
|
|
|
});
|
|
|
|
eventStore.on(PROJECT_CREATED, () => {
|
|
|
|
projectActionsCounter.increment({ action: PROJECT_CREATED });
|
|
|
|
});
|
|
|
|
eventStore.on(PROJECT_ARCHIVED, () => {
|
|
|
|
projectActionsCounter.increment({ action: PROJECT_ARCHIVED });
|
|
|
|
});
|
|
|
|
eventStore.on(PROJECT_REVIVED, () => {
|
|
|
|
projectActionsCounter.increment({ action: PROJECT_REVIVED });
|
|
|
|
});
|
|
|
|
eventStore.on(PROJECT_DELETED, () => {
|
|
|
|
projectActionsCounter.increment({ action: PROJECT_DELETED });
|
|
|
|
});
|
|
|
|
|
|
|
|
const logger = config.getLogger('metrics.ts');
|
|
|
|
eventBus.on(CLIENT_METRICS, (metrics: IClientMetricsEnv[]) => {
|
|
|
|
try {
|
|
|
|
for (const metric of metrics) {
|
|
|
|
featureFlagUsageTotal.increment(
|
|
|
|
{
|
|
|
|
toggle: metric.featureName,
|
|
|
|
active: 'true',
|
|
|
|
appName: metric.appName,
|
|
|
|
},
|
|
|
|
metric.yes,
|
|
|
|
);
|
|
|
|
featureFlagUsageTotal.increment(
|
|
|
|
{
|
|
|
|
toggle: metric.featureName,
|
|
|
|
active: 'false',
|
|
|
|
appName: metric.appName,
|
|
|
|
},
|
|
|
|
metric.no,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
} catch (e) {
|
|
|
|
logger.warn('Metrics registration failed', e);
|
|
|
|
}
|
|
|
|
});
|
2024-07-11 11:39:38 +02:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
eventStore.on(CLIENT_REGISTER, (heartbeatEvent: ISdkHeartbeat) => {
|
|
|
|
if (!heartbeatEvent.sdkName || !heartbeatEvent.sdkVersion) {
|
|
|
|
return;
|
|
|
|
}
|
2024-07-18 13:35:45 +02:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
if (flagResolver.isEnabled('extendedMetrics')) {
|
|
|
|
clientSdkVersionUsage.increment({
|
|
|
|
sdk_name: heartbeatEvent.sdkName,
|
|
|
|
sdk_version: heartbeatEvent.sdkVersion,
|
|
|
|
platform_name:
|
|
|
|
heartbeatEvent.metadata?.platformName ?? 'not-set',
|
|
|
|
platform_version:
|
|
|
|
heartbeatEvent.metadata?.platformVersion ?? 'not-set',
|
|
|
|
yggdrasil_version:
|
|
|
|
heartbeatEvent.metadata?.yggdrasilVersion ?? 'not-set',
|
|
|
|
spec_version: heartbeatEvent.metadata?.specVersion ?? 'not-set',
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
clientSdkVersionUsage.increment({
|
|
|
|
sdk_name: heartbeatEvent.sdkName,
|
|
|
|
sdk_version: heartbeatEvent.sdkVersion,
|
|
|
|
platform_name: 'not-set',
|
|
|
|
platform_version: 'not-set',
|
|
|
|
yggdrasil_version: 'not-set',
|
|
|
|
spec_version: 'not-set',
|
|
|
|
});
|
|
|
|
}
|
|
|
|
});
|
2024-07-31 14:52:39 +02:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
eventStore.on(PROJECT_ENVIRONMENT_REMOVED, ({ project }) => {
|
|
|
|
projectEnvironmentsDisabled.increment({ project_id: project });
|
|
|
|
});
|
feat: create gauges for all resource limits (#7718)
This PR adds Grafana gauges for all the existing resource limits. The
primary purpose is to be able to use this in alerting. Secondarily, we
can also use it to get better insights into how many customers have
increased their limits, as well as how many people are approaching their
limit, regdardless of whether it's been increased or not.
## Discussion points
### Implementation
The first approach I took (in
https://github.com/Unleash/unleash/pull/7718/commits/87528b4c6785899d7afec34721cb9f0df9bf0bfc),
was to add a new gauge for each resource limit. However, there's a lot
of boilerplate for it.
I thought doing it like this (the current implementation) would make it
easier. We should still be able to use the labelName to collate this in
Grafana, as far as I understand? As a bonus, we'd automatically get new
resource limits when we add them to the schema.
``` tsx
const resourceLimit = createGauge({
name: 'resource_limit',
help: 'The maximum number of resources allowed.',
labelNames: ['resource'],
});
// ...
for (const [resource, limit] of Object.entries(config.resourceLimits)) {
resourceLimit.labels({ resource }).set(limit);
}
```
That way, when checking the stats, we should be able to do something
like this:
``` promql
resource_limit{resource="constraintValues"}
```
### Do we need to reset gauges?
I noticed that we reset gauges before setting values in them all over
the place. I don't know if that's necessary. I'd like to get that double
clarified before merging this.
2024-08-01 09:59:25 +02:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
eventBus.on(events.ADDON_EVENTS_HANDLED, ({ result, destination }) => {
|
|
|
|
addonEventsHandledCounter.increment({ result, destination });
|
|
|
|
});
|
2024-08-20 09:00:28 +02:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
return {
|
2024-10-22 15:11:57 +02:00
|
|
|
collectAggDbMetrics: dbMetrics.refreshMetrics,
|
2024-10-18 11:11:22 +02:00
|
|
|
collectStaticCounters: async () => {
|
2021-02-17 15:24:43 +01:00
|
|
|
try {
|
2024-03-28 12:40:30 +01:00
|
|
|
featureTogglesArchivedTotal.reset();
|
2024-10-18 11:11:22 +02:00
|
|
|
featureTogglesArchivedTotal.set(
|
|
|
|
await instanceStatsService.getArchivedToggleCount(),
|
|
|
|
);
|
2024-03-28 12:40:30 +01:00
|
|
|
|
2021-08-27 10:10:14 +02:00
|
|
|
usersTotal.reset();
|
2024-10-18 11:11:22 +02:00
|
|
|
usersTotal.set(await instanceStatsService.getRegisteredUsers());
|
2022-10-25 13:10:27 +02:00
|
|
|
|
2023-11-29 13:09:30 +01:00
|
|
|
serviceAccounts.reset();
|
2024-10-18 11:11:22 +02:00
|
|
|
serviceAccounts.set(
|
|
|
|
await instanceStatsService.countServiceAccounts(),
|
|
|
|
);
|
2023-11-29 13:09:30 +01:00
|
|
|
|
2024-10-25 14:43:14 +02:00
|
|
|
trafficTotal.reset();
|
|
|
|
trafficTotal.set(
|
|
|
|
await instanceStatsService.getCurrentTrafficData(),
|
|
|
|
);
|
|
|
|
|
2023-11-29 13:09:30 +01:00
|
|
|
apiTokens.reset();
|
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
for (const [
|
|
|
|
type,
|
|
|
|
value,
|
|
|
|
] of await instanceStatsService.countApiTokensByType()) {
|
2024-01-19 15:51:29 +01:00
|
|
|
apiTokens.labels({ type }).set(value);
|
2023-11-29 13:09:30 +01:00
|
|
|
}
|
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
const deprecatedTokens =
|
|
|
|
await stores.apiTokenStore.countDeprecatedTokens();
|
2024-07-11 11:39:38 +02:00
|
|
|
orphanedTokensTotal.reset();
|
|
|
|
orphanedTokensTotal.set(deprecatedTokens.orphanedTokens);
|
|
|
|
|
|
|
|
orphanedTokensActive.reset();
|
|
|
|
orphanedTokensActive.set(deprecatedTokens.activeOrphanedTokens);
|
|
|
|
|
|
|
|
legacyTokensTotal.reset();
|
|
|
|
legacyTokensTotal.set(deprecatedTokens.legacyTokens);
|
|
|
|
|
|
|
|
legacyTokensActive.reset();
|
|
|
|
legacyTokensActive.set(deprecatedTokens.activeLegacyTokens);
|
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
const previousDayMetricsBucketsCount =
|
|
|
|
await instanceStatsService.countPreviousDayHourlyMetricsBuckets();
|
2024-01-15 15:31:38 +01:00
|
|
|
enabledMetricsBucketsPreviousDay.reset();
|
|
|
|
enabledMetricsBucketsPreviousDay.set(
|
2024-10-18 11:11:22 +02:00
|
|
|
previousDayMetricsBucketsCount.enabledCount,
|
2024-01-15 15:31:38 +01:00
|
|
|
);
|
|
|
|
variantMetricsBucketsPreviousDay.reset();
|
|
|
|
variantMetricsBucketsPreviousDay.set(
|
2024-10-18 11:11:22 +02:00
|
|
|
previousDayMetricsBucketsCount.variantCount,
|
2024-01-15 15:31:38 +01:00
|
|
|
);
|
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
const activeUsers = await instanceStatsService.getActiveUsers();
|
2023-09-18 15:05:17 +02:00
|
|
|
usersActive7days.reset();
|
2024-10-18 11:11:22 +02:00
|
|
|
usersActive7days.set(activeUsers.last7);
|
2023-09-18 15:05:17 +02:00
|
|
|
usersActive30days.reset();
|
2024-10-18 11:11:22 +02:00
|
|
|
usersActive30days.set(activeUsers.last30);
|
2023-09-18 15:05:17 +02:00
|
|
|
usersActive60days.reset();
|
2024-10-18 11:11:22 +02:00
|
|
|
usersActive60days.set(activeUsers.last60);
|
2023-09-18 15:05:17 +02:00
|
|
|
usersActive90days.reset();
|
2024-10-18 11:11:22 +02:00
|
|
|
usersActive90days.set(activeUsers.last90);
|
2023-09-18 15:05:17 +02:00
|
|
|
|
2024-11-13 15:00:47 +01:00
|
|
|
const licensedUsersStat =
|
|
|
|
await instanceStatsService.getLicencedUsers();
|
|
|
|
licensedUsers.reset();
|
|
|
|
licensedUsers.set(licensedUsersStat);
|
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
const productionChanges =
|
|
|
|
await instanceStatsService.getProductionChanges();
|
2023-10-10 12:32:23 +02:00
|
|
|
productionChanges30.reset();
|
2024-10-18 11:11:22 +02:00
|
|
|
productionChanges30.set(productionChanges.last30);
|
2023-10-10 12:32:23 +02:00
|
|
|
productionChanges60.reset();
|
2024-10-18 11:11:22 +02:00
|
|
|
productionChanges60.set(productionChanges.last60);
|
2023-10-10 12:32:23 +02:00
|
|
|
productionChanges90.reset();
|
2024-10-18 11:11:22 +02:00
|
|
|
productionChanges90.set(productionChanges.last90);
|
2022-10-25 13:10:27 +02:00
|
|
|
} catch (e) {}
|
2024-10-18 11:11:22 +02:00
|
|
|
},
|
|
|
|
};
|
|
|
|
}
|
|
|
|
export default class MetricsMonitor {
|
|
|
|
constructor() {}
|
2018-11-28 15:50:49 +01:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
async startMonitoring(
|
|
|
|
config: IUnleashConfig,
|
|
|
|
stores: IUnleashStores,
|
|
|
|
version: string,
|
|
|
|
eventBus: EventEmitter,
|
|
|
|
instanceStatsService: InstanceStatsService,
|
|
|
|
schedulerService: SchedulerService,
|
|
|
|
db: Knex,
|
|
|
|
): Promise<void> {
|
|
|
|
if (!config.server.serverMetrics) {
|
|
|
|
return Promise.resolve();
|
|
|
|
}
|
2024-02-15 14:58:48 +01:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
collectDefaultMetrics();
|
2024-03-12 10:15:24 +01:00
|
|
|
|
2024-10-22 15:11:57 +02:00
|
|
|
const { collectStaticCounters, collectAggDbMetrics } =
|
2024-10-18 11:11:22 +02:00
|
|
|
registerPrometheusMetrics(
|
|
|
|
config,
|
|
|
|
stores,
|
|
|
|
version,
|
|
|
|
eventBus,
|
|
|
|
instanceStatsService,
|
|
|
|
);
|
2024-02-22 14:29:21 +01:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
const postgresVersion = await stores.settingStore.postgresVersion();
|
|
|
|
registerPrometheusPostgresMetrics(db, eventBus, postgresVersion);
|
2024-07-31 14:52:39 +02:00
|
|
|
|
2024-10-18 11:11:22 +02:00
|
|
|
await schedulerService.schedule(
|
|
|
|
async () =>
|
2024-10-22 15:11:57 +02:00
|
|
|
Promise.all([collectStaticCounters(), collectAggDbMetrics()]),
|
2024-10-18 11:11:22 +02:00
|
|
|
hoursToMilliseconds(2),
|
|
|
|
'collectStaticCounters',
|
2022-09-08 11:01:27 +02:00
|
|
|
);
|
2024-10-18 11:11:22 +02:00
|
|
|
await schedulerService.schedule(
|
|
|
|
async () =>
|
|
|
|
this.registerPoolMetrics.bind(this, db.client.pool, eventBus),
|
|
|
|
minutesToMilliseconds(1),
|
|
|
|
'registerPoolMetrics',
|
2024-05-13 14:41:28 +02:00
|
|
|
);
|
2023-11-29 13:09:30 +01:00
|
|
|
|
|
|
|
return Promise.resolve();
|
2020-12-16 14:49:11 +01:00
|
|
|
}
|
|
|
|
|
2021-04-22 10:07:10 +02:00
|
|
|
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
|
|
|
|
registerPoolMetrics(pool: any, eventBus: EventEmitter) {
|
2021-02-17 15:24:43 +01:00
|
|
|
try {
|
|
|
|
eventBus.emit(DB_POOL_UPDATE, {
|
|
|
|
used: pool.numUsed(),
|
|
|
|
free: pool.numFree(),
|
|
|
|
pendingCreates: pool.numPendingCreates(),
|
|
|
|
pendingAcquires: pool.numPendingAcquires(),
|
|
|
|
});
|
|
|
|
// eslint-disable-next-line no-empty
|
|
|
|
} catch (e) {}
|
2021-02-04 14:14:46 +01:00
|
|
|
}
|
2020-12-16 14:49:11 +01:00
|
|
|
}
|
2024-05-08 10:33:51 +02:00
|
|
|
|
2021-04-22 10:07:10 +02:00
|
|
|
export function createMetricsMonitor(): MetricsMonitor {
|
|
|
|
return new MetricsMonitor();
|
|
|
|
}
|