2021-02-16 14:30:08 +01:00
|
|
|
import client from 'prom-client';
|
2021-04-22 10:07:10 +02:00
|
|
|
import EventEmitter from 'events';
|
|
|
|
import { Knex } from 'knex';
|
2021-04-29 10:21:29 +02:00
|
|
|
import * as events from './metric-events';
|
2021-02-16 14:30:08 +01:00
|
|
|
import {
|
2021-11-02 15:13:46 +01:00
|
|
|
DB_POOL_UPDATE,
|
2018-05-23 11:24:24 +02:00
|
|
|
FEATURE_ARCHIVED,
|
2021-11-02 15:13:46 +01:00
|
|
|
FEATURE_CREATED,
|
2018-05-23 11:24:24 +02:00
|
|
|
FEATURE_REVIVED,
|
2021-11-12 13:15:51 +01:00
|
|
|
FEATURE_STRATEGY_ADD,
|
|
|
|
FEATURE_STRATEGY_REMOVE,
|
|
|
|
FEATURE_STRATEGY_UPDATE,
|
2022-09-08 11:01:27 +02:00
|
|
|
FEATURE_ENVIRONMENT_ENABLED,
|
|
|
|
FEATURE_ENVIRONMENT_DISABLED,
|
|
|
|
FEATURE_VARIANTS_UPDATED,
|
|
|
|
FEATURE_METADATA_UPDATED,
|
2021-11-02 15:13:46 +01:00
|
|
|
FEATURE_UPDATED,
|
2021-12-09 21:02:58 +01:00
|
|
|
CLIENT_METRICS,
|
2022-07-22 11:00:22 +02:00
|
|
|
CLIENT_REGISTER,
|
2021-04-29 10:21:29 +02:00
|
|
|
} from './types/events';
|
2021-04-22 10:07:10 +02:00
|
|
|
import { IUnleashConfig } from './types/option';
|
|
|
|
import { IUnleashStores } from './types/stores';
|
2021-11-02 15:13:46 +01:00
|
|
|
import { hoursToMilliseconds, minutesToMilliseconds } from 'date-fns';
|
2021-05-28 11:10:24 +02:00
|
|
|
import Timer = NodeJS.Timer;
|
2023-09-18 15:05:17 +02:00
|
|
|
import { InstanceStatsService } from './features/instance-stats/instance-stats-service';
|
2023-02-15 09:13:32 +01:00
|
|
|
import { ValidatedClientMetrics } from './services/client-metrics/schema';
|
2020-04-14 22:29:11 +02:00
|
|
|
|
2021-04-22 10:07:10 +02:00
|
|
|
export default class MetricsMonitor {
|
2021-05-28 11:10:24 +02:00
|
|
|
timer?: Timer;
|
|
|
|
|
|
|
|
poolMetricsTimer?: Timer;
|
2021-02-16 14:30:08 +01:00
|
|
|
|
2020-12-16 14:49:11 +01:00
|
|
|
constructor() {
|
|
|
|
this.timer = null;
|
2021-05-28 11:10:24 +02:00
|
|
|
this.poolMetricsTimer = null;
|
2016-11-30 23:41:57 +01:00
|
|
|
}
|
|
|
|
|
2021-04-22 10:07:10 +02:00
|
|
|
startMonitoring(
|
|
|
|
config: IUnleashConfig,
|
|
|
|
stores: IUnleashStores,
|
|
|
|
version: string,
|
|
|
|
eventBus: EventEmitter,
|
2022-10-25 13:10:27 +02:00
|
|
|
instanceStatsService: InstanceStatsService,
|
2021-08-12 15:04:37 +02:00
|
|
|
db: Knex,
|
2021-04-22 10:07:10 +02:00
|
|
|
): Promise<void> {
|
|
|
|
if (!config.server.serverMetrics) {
|
2020-12-16 14:49:11 +01:00
|
|
|
return;
|
|
|
|
}
|
2020-08-03 13:34:10 +02:00
|
|
|
|
2022-10-25 13:10:27 +02:00
|
|
|
const { eventStore } = stores;
|
2017-06-28 14:21:05 +02:00
|
|
|
|
2020-12-16 14:49:11 +01:00
|
|
|
client.collectDefaultMetrics();
|
2020-02-28 14:50:32 +01:00
|
|
|
|
2020-12-16 14:49:11 +01:00
|
|
|
const requestDuration = new client.Summary({
|
|
|
|
name: 'http_request_duration_milliseconds',
|
|
|
|
help: 'App response time',
|
2022-09-30 15:28:50 +02:00
|
|
|
labelNames: ['path', 'method', 'status', 'appName'],
|
2021-06-07 10:34:32 +02:00
|
|
|
percentiles: [0.1, 0.5, 0.9, 0.95, 0.99],
|
2022-08-30 13:53:28 +02:00
|
|
|
maxAgeSeconds: 600,
|
|
|
|
ageBuckets: 5,
|
2020-12-16 14:49:11 +01:00
|
|
|
});
|
|
|
|
const dbDuration = new client.Summary({
|
|
|
|
name: 'db_query_duration_seconds',
|
|
|
|
help: 'DB query duration time',
|
|
|
|
labelNames: ['store', 'action'],
|
2021-06-07 10:34:32 +02:00
|
|
|
percentiles: [0.1, 0.5, 0.9, 0.95, 0.99],
|
2022-08-30 13:53:28 +02:00
|
|
|
maxAgeSeconds: 600,
|
|
|
|
ageBuckets: 5,
|
2020-12-16 14:49:11 +01:00
|
|
|
});
|
|
|
|
const featureToggleUpdateTotal = new client.Counter({
|
|
|
|
name: 'feature_toggle_update_total',
|
2022-09-08 11:01:27 +02:00
|
|
|
help: 'Number of times a toggle has been updated. Environment label would be "n/a" when it is not available, e.g. when a feature toggle is created.',
|
|
|
|
labelNames: ['toggle', 'project', 'environment'],
|
2020-12-16 14:49:11 +01:00
|
|
|
});
|
|
|
|
const featureToggleUsageTotal = new client.Counter({
|
|
|
|
name: 'feature_toggle_usage_total',
|
|
|
|
help: 'Number of times a feature toggle has been used',
|
|
|
|
labelNames: ['toggle', 'active', 'appName'],
|
|
|
|
});
|
|
|
|
const featureTogglesTotal = new client.Gauge({
|
|
|
|
name: 'feature_toggles_total',
|
|
|
|
help: 'Number of feature toggles',
|
|
|
|
labelNames: ['version'],
|
|
|
|
});
|
2021-08-27 10:10:14 +02:00
|
|
|
const usersTotal = new client.Gauge({
|
|
|
|
name: 'users_total',
|
|
|
|
help: 'Number of users',
|
|
|
|
});
|
2023-09-18 15:05:17 +02:00
|
|
|
const usersActive7days = new client.Gauge({
|
|
|
|
name: 'users_active_7',
|
|
|
|
help: 'Number of users active in the last 7 days',
|
|
|
|
});
|
|
|
|
const usersActive30days = new client.Gauge({
|
|
|
|
name: 'users_active_30',
|
|
|
|
help: 'Number of users active in the last 30 days',
|
|
|
|
});
|
|
|
|
const usersActive60days = new client.Gauge({
|
|
|
|
name: 'users_active_60',
|
|
|
|
help: 'Number of users active in the last 60 days',
|
|
|
|
});
|
|
|
|
const usersActive90days = new client.Gauge({
|
|
|
|
name: 'users_active_90',
|
|
|
|
help: 'Number of users active in the last 90 days',
|
|
|
|
});
|
2021-08-27 10:10:14 +02:00
|
|
|
const projectsTotal = new client.Gauge({
|
|
|
|
name: 'projects_total',
|
|
|
|
help: 'Number of projects',
|
2023-09-25 11:07:59 +02:00
|
|
|
labelNames: ['mode'],
|
2021-08-27 10:10:14 +02:00
|
|
|
});
|
2022-09-06 13:24:13 +02:00
|
|
|
const environmentsTotal = new client.Gauge({
|
|
|
|
name: 'environments_total',
|
|
|
|
help: 'Number of environments',
|
|
|
|
});
|
2022-10-25 13:10:27 +02:00
|
|
|
const groupsTotal = new client.Gauge({
|
|
|
|
name: 'groups_total',
|
|
|
|
help: 'Number of groups',
|
|
|
|
});
|
|
|
|
|
|
|
|
const rolesTotal = new client.Gauge({
|
|
|
|
name: 'roles_total',
|
|
|
|
help: 'Number of roles',
|
|
|
|
});
|
|
|
|
|
2023-08-07 15:59:29 +02:00
|
|
|
const customRootRolesTotal = new client.Gauge({
|
|
|
|
name: 'custom_root_roles_total',
|
|
|
|
help: 'Number of custom root roles',
|
|
|
|
});
|
|
|
|
|
feat: add prom metric for total custom root roles in use (#4438)
https://linear.app/unleash/issue/2-1311/add-a-new-prometheus-metric-with-custom-root-roles-in-use
As a follow-up to https://github.com/Unleash/unleash/pull/4435, this PR
adds a metric for total custom root roles in use by at least one entity:
users, service accounts, groups.
`custom_root_roles_in_use_total`
Output from `http://localhost:4242/internal-backstage/prometheus`:
```
# HELP process_cpu_user_seconds_total Total user CPU time spent in seconds.
# TYPE process_cpu_user_seconds_total counter
process_cpu_user_seconds_total 0.060755
# HELP process_cpu_system_seconds_total Total system CPU time spent in seconds.
# TYPE process_cpu_system_seconds_total counter
process_cpu_system_seconds_total 0.01666
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 0.077415
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1691420275
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 199196672
# HELP nodejs_eventloop_lag_seconds Lag of event loop in seconds.
# TYPE nodejs_eventloop_lag_seconds gauge
nodejs_eventloop_lag_seconds 0
# HELP nodejs_eventloop_lag_min_seconds The minimum recorded event loop delay.
# TYPE nodejs_eventloop_lag_min_seconds gauge
nodejs_eventloop_lag_min_seconds 0.009076736
# HELP nodejs_eventloop_lag_max_seconds The maximum recorded event loop delay.
# TYPE nodejs_eventloop_lag_max_seconds gauge
nodejs_eventloop_lag_max_seconds 0.037683199
# HELP nodejs_eventloop_lag_mean_seconds The mean of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_mean_seconds gauge
nodejs_eventloop_lag_mean_seconds 0.011063251638989169
# HELP nodejs_eventloop_lag_stddev_seconds The standard deviation of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_stddev_seconds gauge
nodejs_eventloop_lag_stddev_seconds 0.0013618102764025837
# HELP nodejs_eventloop_lag_p50_seconds The 50th percentile of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_p50_seconds gauge
nodejs_eventloop_lag_p50_seconds 0.011051007
# HELP nodejs_eventloop_lag_p90_seconds The 90th percentile of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_p90_seconds gauge
nodejs_eventloop_lag_p90_seconds 0.011321343
# HELP nodejs_eventloop_lag_p99_seconds The 99th percentile of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_p99_seconds gauge
nodejs_eventloop_lag_p99_seconds 0.013688831
# HELP nodejs_active_resources Number of active resources that are currently keeping the event loop alive, grouped by async resource type.
# TYPE nodejs_active_resources gauge
nodejs_active_resources{type="FSReqCallback"} 1
nodejs_active_resources{type="TTYWrap"} 3
nodejs_active_resources{type="TCPSocketWrap"} 5
nodejs_active_resources{type="TCPServerWrap"} 1
nodejs_active_resources{type="Timeout"} 1
nodejs_active_resources{type="Immediate"} 1
# HELP nodejs_active_resources_total Total number of active resources.
# TYPE nodejs_active_resources_total gauge
nodejs_active_resources_total 12
# HELP nodejs_active_handles Number of active libuv handles grouped by handle type. Every handle type is C++ class name.
# TYPE nodejs_active_handles gauge
nodejs_active_handles{type="WriteStream"} 2
nodejs_active_handles{type="ReadStream"} 1
nodejs_active_handles{type="Socket"} 5
nodejs_active_handles{type="Server"} 1
# HELP nodejs_active_handles_total Total number of active handles.
# TYPE nodejs_active_handles_total gauge
nodejs_active_handles_total 9
# HELP nodejs_active_requests Number of active libuv requests grouped by request type. Every request type is C++ class name.
# TYPE nodejs_active_requests gauge
nodejs_active_requests{type="FSReqCallback"} 1
# HELP nodejs_active_requests_total Total number of active requests.
# TYPE nodejs_active_requests_total gauge
nodejs_active_requests_total 1
# HELP nodejs_heap_size_total_bytes Process heap size from Node.js in bytes.
# TYPE nodejs_heap_size_total_bytes gauge
nodejs_heap_size_total_bytes 118587392
# HELP nodejs_heap_size_used_bytes Process heap size used from Node.js in bytes.
# TYPE nodejs_heap_size_used_bytes gauge
nodejs_heap_size_used_bytes 89642552
# HELP nodejs_external_memory_bytes Node.js external memory size in bytes.
# TYPE nodejs_external_memory_bytes gauge
nodejs_external_memory_bytes 1601594
# HELP nodejs_heap_space_size_total_bytes Process heap space size total from Node.js in bytes.
# TYPE nodejs_heap_space_size_total_bytes gauge
nodejs_heap_space_size_total_bytes{space="read_only"} 0
nodejs_heap_space_size_total_bytes{space="old"} 70139904
nodejs_heap_space_size_total_bytes{space="code"} 3588096
nodejs_heap_space_size_total_bytes{space="map"} 2899968
nodejs_heap_space_size_total_bytes{space="large_object"} 7258112
nodejs_heap_space_size_total_bytes{space="code_large_object"} 1146880
nodejs_heap_space_size_total_bytes{space="new_large_object"} 0
nodejs_heap_space_size_total_bytes{space="new"} 33554432
# HELP nodejs_heap_space_size_used_bytes Process heap space size used from Node.js in bytes.
# TYPE nodejs_heap_space_size_used_bytes gauge
nodejs_heap_space_size_used_bytes{space="read_only"} 0
nodejs_heap_space_size_used_bytes{space="old"} 66992120
nodejs_heap_space_size_used_bytes{space="code"} 2892640
nodejs_heap_space_size_used_bytes{space="map"} 2519280
nodejs_heap_space_size_used_bytes{space="large_object"} 7026824
nodejs_heap_space_size_used_bytes{space="code_large_object"} 983200
nodejs_heap_space_size_used_bytes{space="new_large_object"} 0
nodejs_heap_space_size_used_bytes{space="new"} 9236136
# HELP nodejs_heap_space_size_available_bytes Process heap space size available from Node.js in bytes.
# TYPE nodejs_heap_space_size_available_bytes gauge
nodejs_heap_space_size_available_bytes{space="read_only"} 0
nodejs_heap_space_size_available_bytes{space="old"} 1898360
nodejs_heap_space_size_available_bytes{space="code"} 7328
nodejs_heap_space_size_available_bytes{space="map"} 327888
nodejs_heap_space_size_available_bytes{space="large_object"} 0
nodejs_heap_space_size_available_bytes{space="code_large_object"} 0
nodejs_heap_space_size_available_bytes{space="new_large_object"} 16495616
nodejs_heap_space_size_available_bytes{space="new"} 7259480
# HELP nodejs_version_info Node.js version info.
# TYPE nodejs_version_info gauge
nodejs_version_info{version="v18.16.0",major="18",minor="16",patch="0"} 1
# HELP nodejs_gc_duration_seconds Garbage collection duration by kind, one of major, minor, incremental or weakcb.
# TYPE nodejs_gc_duration_seconds histogram
# HELP http_request_duration_milliseconds App response time
# TYPE http_request_duration_milliseconds summary
# HELP db_query_duration_seconds DB query duration time
# TYPE db_query_duration_seconds summary
db_query_duration_seconds{quantile="0.1",store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds{quantile="0.5",store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds{quantile="0.9",store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds{quantile="0.95",store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds{quantile="0.99",store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds_sum{store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds_count{store="api-tokens",action="getAllActive"} 1
# HELP feature_toggle_update_total Number of times a toggle has been updated. Environment label would be "n/a" when it is not available, e.g. when a feature toggle is created.
# TYPE feature_toggle_update_total counter
# HELP feature_toggle_usage_total Number of times a feature toggle has been used
# TYPE feature_toggle_usage_total counter
# HELP feature_toggles_total Number of feature toggles
# TYPE feature_toggles_total gauge
feature_toggles_total{version="5.3.0"} 31
# HELP users_total Number of users
# TYPE users_total gauge
users_total 1011
# HELP projects_total Number of projects
# TYPE projects_total gauge
projects_total 4
# HELP environments_total Number of environments
# TYPE environments_total gauge
environments_total 10
# HELP groups_total Number of groups
# TYPE groups_total gauge
groups_total 5
# HELP roles_total Number of roles
# TYPE roles_total gauge
roles_total 11
# HELP custom_root_roles_total Number of custom root roles
# TYPE custom_root_roles_total gauge
custom_root_roles_total 3
# HELP custom_root_roles_in_use_total Number of custom root roles in use
# TYPE custom_root_roles_in_use_total gauge
custom_root_roles_in_use_total 2
# HELP segments_total Number of segments
# TYPE segments_total gauge
segments_total 5
# HELP context_total Number of context
# TYPE context_total gauge
context_total 7
# HELP strategies_total Number of strategies
# TYPE strategies_total gauge
strategies_total 5
# HELP client_apps_total Number of registered client apps aggregated by range by last seen
# TYPE client_apps_total gauge
client_apps_total{range="allTime"} 0
client_apps_total{range="30d"} 0
client_apps_total{range="7d"} 0
# HELP saml_enabled Whether SAML is enabled
# TYPE saml_enabled gauge
saml_enabled 1
# HELP oidc_enabled Whether OIDC is enabled
# TYPE oidc_enabled gauge
oidc_enabled 0
# HELP client_sdk_versions Which sdk versions are being used
# TYPE client_sdk_versions counter
# HELP optimal_304_diffing Count the Optimal 304 diffing with status
# TYPE optimal_304_diffing counter
# HELP db_pool_min Minimum DB pool size
# TYPE db_pool_min gauge
db_pool_min 0
# HELP db_pool_max Maximum DB pool size
# TYPE db_pool_max gauge
db_pool_max 4
# HELP db_pool_free Current free connections in DB pool
# TYPE db_pool_free gauge
db_pool_free 0
# HELP db_pool_used Current connections in use in DB pool
# TYPE db_pool_used gauge
db_pool_used 4
# HELP db_pool_pending_creates how many asynchronous create calls are running in DB pool
# TYPE db_pool_pending_creates gauge
db_pool_pending_creates 0
# HELP db_pool_pending_acquires how many acquires are waiting for a resource to be released in DB pool
# TYPE db_pool_pending_acquires gauge
db_pool_pending_acquires 24
```
2023-08-08 09:14:40 +02:00
|
|
|
const customRootRolesInUseTotal = new client.Gauge({
|
|
|
|
name: 'custom_root_roles_in_use_total',
|
|
|
|
help: 'Number of custom root roles in use',
|
|
|
|
});
|
|
|
|
|
2022-10-25 13:10:27 +02:00
|
|
|
const segmentsTotal = new client.Gauge({
|
|
|
|
name: 'segments_total',
|
|
|
|
help: 'Number of segments',
|
|
|
|
});
|
|
|
|
|
|
|
|
const contextTotal = new client.Gauge({
|
|
|
|
name: 'context_total',
|
|
|
|
help: 'Number of context',
|
|
|
|
});
|
|
|
|
|
|
|
|
const strategiesTotal = new client.Gauge({
|
|
|
|
name: 'strategies_total',
|
|
|
|
help: 'Number of strategies',
|
|
|
|
});
|
|
|
|
|
2022-12-16 12:16:51 +01:00
|
|
|
const clientAppsTotal = new client.Gauge({
|
|
|
|
name: 'client_apps_total',
|
|
|
|
help: 'Number of registered client apps aggregated by range by last seen',
|
|
|
|
labelNames: ['range'],
|
|
|
|
});
|
|
|
|
|
2022-10-25 13:10:27 +02:00
|
|
|
const samlEnabled = new client.Gauge({
|
|
|
|
name: 'saml_enabled',
|
|
|
|
help: 'Whether SAML is enabled',
|
|
|
|
});
|
|
|
|
|
|
|
|
const oidcEnabled = new client.Gauge({
|
|
|
|
name: 'oidc_enabled',
|
|
|
|
help: 'Whether OIDC is enabled',
|
|
|
|
});
|
2020-02-28 14:50:32 +01:00
|
|
|
|
2022-07-22 11:00:22 +02:00
|
|
|
const clientSdkVersionUsage = new client.Counter({
|
|
|
|
name: 'client_sdk_versions',
|
|
|
|
help: 'Which sdk versions are being used',
|
|
|
|
labelNames: ['sdk_name', 'sdk_version'],
|
|
|
|
});
|
|
|
|
|
2023-03-17 14:10:21 +01:00
|
|
|
const optimal304DiffingCounter = new client.Counter({
|
|
|
|
name: 'optimal_304_diffing',
|
|
|
|
help: 'Count the Optimal 304 diffing with status',
|
|
|
|
labelNames: ['status'],
|
|
|
|
});
|
|
|
|
|
2021-08-27 10:10:14 +02:00
|
|
|
async function collectStaticCounters() {
|
2021-02-17 15:24:43 +01:00
|
|
|
try {
|
2022-10-25 13:10:27 +02:00
|
|
|
const stats = await instanceStatsService.getStats();
|
|
|
|
|
|
|
|
featureTogglesTotal.reset();
|
|
|
|
featureTogglesTotal.labels(version).set(stats.featureToggles);
|
2021-02-17 15:24:43 +01:00
|
|
|
|
2021-08-27 10:10:14 +02:00
|
|
|
usersTotal.reset();
|
2022-10-25 13:10:27 +02:00
|
|
|
usersTotal.set(stats.users);
|
|
|
|
|
2023-09-18 15:05:17 +02:00
|
|
|
usersActive7days.reset();
|
|
|
|
usersActive7days.set(stats.activeUsers.last7);
|
|
|
|
usersActive30days.reset();
|
|
|
|
usersActive30days.set(stats.activeUsers.last30);
|
|
|
|
usersActive60days.reset();
|
|
|
|
usersActive60days.set(stats.activeUsers.last60);
|
|
|
|
usersActive90days.reset();
|
|
|
|
usersActive90days.set(stats.activeUsers.last90);
|
|
|
|
|
2021-08-27 10:10:14 +02:00
|
|
|
projectsTotal.reset();
|
2023-09-25 11:07:59 +02:00
|
|
|
stats.projects.forEach((projectStat) => {
|
|
|
|
projectsTotal
|
|
|
|
.labels({ mode: projectStat.mode })
|
|
|
|
.set(projectStat.count);
|
|
|
|
});
|
2022-10-25 13:10:27 +02:00
|
|
|
|
2022-09-06 13:24:13 +02:00
|
|
|
environmentsTotal.reset();
|
2022-10-25 13:10:27 +02:00
|
|
|
environmentsTotal.set(stats.environments);
|
|
|
|
|
|
|
|
groupsTotal.reset();
|
|
|
|
groupsTotal.set(stats.groups);
|
|
|
|
|
|
|
|
rolesTotal.reset();
|
|
|
|
rolesTotal.set(stats.roles);
|
|
|
|
|
2023-08-07 15:59:29 +02:00
|
|
|
customRootRolesTotal.reset();
|
|
|
|
customRootRolesTotal.set(stats.customRootRoles);
|
|
|
|
|
feat: add prom metric for total custom root roles in use (#4438)
https://linear.app/unleash/issue/2-1311/add-a-new-prometheus-metric-with-custom-root-roles-in-use
As a follow-up to https://github.com/Unleash/unleash/pull/4435, this PR
adds a metric for total custom root roles in use by at least one entity:
users, service accounts, groups.
`custom_root_roles_in_use_total`
Output from `http://localhost:4242/internal-backstage/prometheus`:
```
# HELP process_cpu_user_seconds_total Total user CPU time spent in seconds.
# TYPE process_cpu_user_seconds_total counter
process_cpu_user_seconds_total 0.060755
# HELP process_cpu_system_seconds_total Total system CPU time spent in seconds.
# TYPE process_cpu_system_seconds_total counter
process_cpu_system_seconds_total 0.01666
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 0.077415
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1691420275
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 199196672
# HELP nodejs_eventloop_lag_seconds Lag of event loop in seconds.
# TYPE nodejs_eventloop_lag_seconds gauge
nodejs_eventloop_lag_seconds 0
# HELP nodejs_eventloop_lag_min_seconds The minimum recorded event loop delay.
# TYPE nodejs_eventloop_lag_min_seconds gauge
nodejs_eventloop_lag_min_seconds 0.009076736
# HELP nodejs_eventloop_lag_max_seconds The maximum recorded event loop delay.
# TYPE nodejs_eventloop_lag_max_seconds gauge
nodejs_eventloop_lag_max_seconds 0.037683199
# HELP nodejs_eventloop_lag_mean_seconds The mean of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_mean_seconds gauge
nodejs_eventloop_lag_mean_seconds 0.011063251638989169
# HELP nodejs_eventloop_lag_stddev_seconds The standard deviation of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_stddev_seconds gauge
nodejs_eventloop_lag_stddev_seconds 0.0013618102764025837
# HELP nodejs_eventloop_lag_p50_seconds The 50th percentile of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_p50_seconds gauge
nodejs_eventloop_lag_p50_seconds 0.011051007
# HELP nodejs_eventloop_lag_p90_seconds The 90th percentile of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_p90_seconds gauge
nodejs_eventloop_lag_p90_seconds 0.011321343
# HELP nodejs_eventloop_lag_p99_seconds The 99th percentile of the recorded event loop delays.
# TYPE nodejs_eventloop_lag_p99_seconds gauge
nodejs_eventloop_lag_p99_seconds 0.013688831
# HELP nodejs_active_resources Number of active resources that are currently keeping the event loop alive, grouped by async resource type.
# TYPE nodejs_active_resources gauge
nodejs_active_resources{type="FSReqCallback"} 1
nodejs_active_resources{type="TTYWrap"} 3
nodejs_active_resources{type="TCPSocketWrap"} 5
nodejs_active_resources{type="TCPServerWrap"} 1
nodejs_active_resources{type="Timeout"} 1
nodejs_active_resources{type="Immediate"} 1
# HELP nodejs_active_resources_total Total number of active resources.
# TYPE nodejs_active_resources_total gauge
nodejs_active_resources_total 12
# HELP nodejs_active_handles Number of active libuv handles grouped by handle type. Every handle type is C++ class name.
# TYPE nodejs_active_handles gauge
nodejs_active_handles{type="WriteStream"} 2
nodejs_active_handles{type="ReadStream"} 1
nodejs_active_handles{type="Socket"} 5
nodejs_active_handles{type="Server"} 1
# HELP nodejs_active_handles_total Total number of active handles.
# TYPE nodejs_active_handles_total gauge
nodejs_active_handles_total 9
# HELP nodejs_active_requests Number of active libuv requests grouped by request type. Every request type is C++ class name.
# TYPE nodejs_active_requests gauge
nodejs_active_requests{type="FSReqCallback"} 1
# HELP nodejs_active_requests_total Total number of active requests.
# TYPE nodejs_active_requests_total gauge
nodejs_active_requests_total 1
# HELP nodejs_heap_size_total_bytes Process heap size from Node.js in bytes.
# TYPE nodejs_heap_size_total_bytes gauge
nodejs_heap_size_total_bytes 118587392
# HELP nodejs_heap_size_used_bytes Process heap size used from Node.js in bytes.
# TYPE nodejs_heap_size_used_bytes gauge
nodejs_heap_size_used_bytes 89642552
# HELP nodejs_external_memory_bytes Node.js external memory size in bytes.
# TYPE nodejs_external_memory_bytes gauge
nodejs_external_memory_bytes 1601594
# HELP nodejs_heap_space_size_total_bytes Process heap space size total from Node.js in bytes.
# TYPE nodejs_heap_space_size_total_bytes gauge
nodejs_heap_space_size_total_bytes{space="read_only"} 0
nodejs_heap_space_size_total_bytes{space="old"} 70139904
nodejs_heap_space_size_total_bytes{space="code"} 3588096
nodejs_heap_space_size_total_bytes{space="map"} 2899968
nodejs_heap_space_size_total_bytes{space="large_object"} 7258112
nodejs_heap_space_size_total_bytes{space="code_large_object"} 1146880
nodejs_heap_space_size_total_bytes{space="new_large_object"} 0
nodejs_heap_space_size_total_bytes{space="new"} 33554432
# HELP nodejs_heap_space_size_used_bytes Process heap space size used from Node.js in bytes.
# TYPE nodejs_heap_space_size_used_bytes gauge
nodejs_heap_space_size_used_bytes{space="read_only"} 0
nodejs_heap_space_size_used_bytes{space="old"} 66992120
nodejs_heap_space_size_used_bytes{space="code"} 2892640
nodejs_heap_space_size_used_bytes{space="map"} 2519280
nodejs_heap_space_size_used_bytes{space="large_object"} 7026824
nodejs_heap_space_size_used_bytes{space="code_large_object"} 983200
nodejs_heap_space_size_used_bytes{space="new_large_object"} 0
nodejs_heap_space_size_used_bytes{space="new"} 9236136
# HELP nodejs_heap_space_size_available_bytes Process heap space size available from Node.js in bytes.
# TYPE nodejs_heap_space_size_available_bytes gauge
nodejs_heap_space_size_available_bytes{space="read_only"} 0
nodejs_heap_space_size_available_bytes{space="old"} 1898360
nodejs_heap_space_size_available_bytes{space="code"} 7328
nodejs_heap_space_size_available_bytes{space="map"} 327888
nodejs_heap_space_size_available_bytes{space="large_object"} 0
nodejs_heap_space_size_available_bytes{space="code_large_object"} 0
nodejs_heap_space_size_available_bytes{space="new_large_object"} 16495616
nodejs_heap_space_size_available_bytes{space="new"} 7259480
# HELP nodejs_version_info Node.js version info.
# TYPE nodejs_version_info gauge
nodejs_version_info{version="v18.16.0",major="18",minor="16",patch="0"} 1
# HELP nodejs_gc_duration_seconds Garbage collection duration by kind, one of major, minor, incremental or weakcb.
# TYPE nodejs_gc_duration_seconds histogram
# HELP http_request_duration_milliseconds App response time
# TYPE http_request_duration_milliseconds summary
# HELP db_query_duration_seconds DB query duration time
# TYPE db_query_duration_seconds summary
db_query_duration_seconds{quantile="0.1",store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds{quantile="0.5",store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds{quantile="0.9",store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds{quantile="0.95",store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds{quantile="0.99",store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds_sum{store="api-tokens",action="getAllActive"} 0.03091475
db_query_duration_seconds_count{store="api-tokens",action="getAllActive"} 1
# HELP feature_toggle_update_total Number of times a toggle has been updated. Environment label would be "n/a" when it is not available, e.g. when a feature toggle is created.
# TYPE feature_toggle_update_total counter
# HELP feature_toggle_usage_total Number of times a feature toggle has been used
# TYPE feature_toggle_usage_total counter
# HELP feature_toggles_total Number of feature toggles
# TYPE feature_toggles_total gauge
feature_toggles_total{version="5.3.0"} 31
# HELP users_total Number of users
# TYPE users_total gauge
users_total 1011
# HELP projects_total Number of projects
# TYPE projects_total gauge
projects_total 4
# HELP environments_total Number of environments
# TYPE environments_total gauge
environments_total 10
# HELP groups_total Number of groups
# TYPE groups_total gauge
groups_total 5
# HELP roles_total Number of roles
# TYPE roles_total gauge
roles_total 11
# HELP custom_root_roles_total Number of custom root roles
# TYPE custom_root_roles_total gauge
custom_root_roles_total 3
# HELP custom_root_roles_in_use_total Number of custom root roles in use
# TYPE custom_root_roles_in_use_total gauge
custom_root_roles_in_use_total 2
# HELP segments_total Number of segments
# TYPE segments_total gauge
segments_total 5
# HELP context_total Number of context
# TYPE context_total gauge
context_total 7
# HELP strategies_total Number of strategies
# TYPE strategies_total gauge
strategies_total 5
# HELP client_apps_total Number of registered client apps aggregated by range by last seen
# TYPE client_apps_total gauge
client_apps_total{range="allTime"} 0
client_apps_total{range="30d"} 0
client_apps_total{range="7d"} 0
# HELP saml_enabled Whether SAML is enabled
# TYPE saml_enabled gauge
saml_enabled 1
# HELP oidc_enabled Whether OIDC is enabled
# TYPE oidc_enabled gauge
oidc_enabled 0
# HELP client_sdk_versions Which sdk versions are being used
# TYPE client_sdk_versions counter
# HELP optimal_304_diffing Count the Optimal 304 diffing with status
# TYPE optimal_304_diffing counter
# HELP db_pool_min Minimum DB pool size
# TYPE db_pool_min gauge
db_pool_min 0
# HELP db_pool_max Maximum DB pool size
# TYPE db_pool_max gauge
db_pool_max 4
# HELP db_pool_free Current free connections in DB pool
# TYPE db_pool_free gauge
db_pool_free 0
# HELP db_pool_used Current connections in use in DB pool
# TYPE db_pool_used gauge
db_pool_used 4
# HELP db_pool_pending_creates how many asynchronous create calls are running in DB pool
# TYPE db_pool_pending_creates gauge
db_pool_pending_creates 0
# HELP db_pool_pending_acquires how many acquires are waiting for a resource to be released in DB pool
# TYPE db_pool_pending_acquires gauge
db_pool_pending_acquires 24
```
2023-08-08 09:14:40 +02:00
|
|
|
customRootRolesInUseTotal.reset();
|
|
|
|
customRootRolesInUseTotal.set(stats.customRootRolesInUse);
|
|
|
|
|
2022-10-25 13:10:27 +02:00
|
|
|
segmentsTotal.reset();
|
|
|
|
segmentsTotal.set(stats.segments);
|
|
|
|
|
|
|
|
contextTotal.reset();
|
|
|
|
contextTotal.set(stats.contextFields);
|
|
|
|
|
|
|
|
strategiesTotal.reset();
|
|
|
|
strategiesTotal.set(stats.strategies);
|
|
|
|
|
|
|
|
samlEnabled.reset();
|
|
|
|
samlEnabled.set(stats.SAMLenabled ? 1 : 0);
|
|
|
|
|
|
|
|
oidcEnabled.reset();
|
|
|
|
oidcEnabled.set(stats.OIDCenabled ? 1 : 0);
|
2022-12-16 12:16:51 +01:00
|
|
|
|
|
|
|
clientAppsTotal.reset();
|
|
|
|
stats.clientApps.forEach((clientStat) =>
|
|
|
|
clientAppsTotal
|
|
|
|
.labels({ range: clientStat.range })
|
|
|
|
.set(clientStat.count),
|
|
|
|
);
|
2022-10-25 13:10:27 +02:00
|
|
|
} catch (e) {}
|
2020-12-16 14:49:11 +01:00
|
|
|
}
|
2016-12-01 17:43:08 +01:00
|
|
|
|
2022-10-25 13:10:27 +02:00
|
|
|
process.nextTick(() => {
|
|
|
|
collectStaticCounters();
|
|
|
|
this.timer = setInterval(
|
|
|
|
() => collectStaticCounters(),
|
|
|
|
hoursToMilliseconds(2),
|
|
|
|
).unref();
|
|
|
|
});
|
2018-05-23 11:24:24 +02:00
|
|
|
|
2020-12-16 14:49:11 +01:00
|
|
|
eventBus.on(
|
|
|
|
events.REQUEST_TIME,
|
2022-09-30 15:28:50 +02:00
|
|
|
({ path, method, time, statusCode, appName }) => {
|
|
|
|
requestDuration
|
|
|
|
.labels(path, method, statusCode, appName)
|
|
|
|
.observe(time);
|
2020-12-16 14:49:11 +01:00
|
|
|
},
|
|
|
|
);
|
2019-08-04 11:10:51 +02:00
|
|
|
|
2023-03-17 14:10:21 +01:00
|
|
|
eventBus.on('optimal304Differ', ({ status }) => {
|
|
|
|
optimal304DiffingCounter.labels(status).inc();
|
|
|
|
});
|
|
|
|
|
2020-12-16 14:49:11 +01:00
|
|
|
eventBus.on(events.DB_TIME, ({ store, action, time }) => {
|
|
|
|
dbDuration.labels(store, action).observe(time);
|
|
|
|
});
|
2018-11-28 15:50:49 +01:00
|
|
|
|
2022-09-08 11:01:27 +02:00
|
|
|
eventStore.on(FEATURE_CREATED, ({ featureName, project }) => {
|
|
|
|
featureToggleUpdateTotal.labels(featureName, project, 'n/a').inc();
|
|
|
|
});
|
|
|
|
eventStore.on(FEATURE_VARIANTS_UPDATED, ({ featureName, project }) => {
|
|
|
|
featureToggleUpdateTotal.labels(featureName, project, 'n/a').inc();
|
|
|
|
});
|
|
|
|
eventStore.on(FEATURE_METADATA_UPDATED, ({ featureName, project }) => {
|
|
|
|
featureToggleUpdateTotal.labels(featureName, project, 'n/a').inc();
|
|
|
|
});
|
|
|
|
eventStore.on(FEATURE_UPDATED, ({ featureName, project }) => {
|
|
|
|
featureToggleUpdateTotal
|
|
|
|
.labels(featureName, project, 'default')
|
|
|
|
.inc();
|
|
|
|
});
|
|
|
|
eventStore.on(
|
|
|
|
FEATURE_STRATEGY_ADD,
|
|
|
|
({ featureName, project, environment }) => {
|
|
|
|
featureToggleUpdateTotal
|
|
|
|
.labels(featureName, project, environment)
|
|
|
|
.inc();
|
|
|
|
},
|
|
|
|
);
|
|
|
|
eventStore.on(
|
|
|
|
FEATURE_STRATEGY_REMOVE,
|
|
|
|
({ featureName, project, environment }) => {
|
|
|
|
featureToggleUpdateTotal
|
|
|
|
.labels(featureName, project, environment)
|
|
|
|
.inc();
|
|
|
|
},
|
|
|
|
);
|
|
|
|
eventStore.on(
|
|
|
|
FEATURE_STRATEGY_UPDATE,
|
|
|
|
({ featureName, project, environment }) => {
|
|
|
|
featureToggleUpdateTotal
|
|
|
|
.labels(featureName, project, environment)
|
|
|
|
.inc();
|
|
|
|
},
|
|
|
|
);
|
|
|
|
eventStore.on(
|
|
|
|
FEATURE_ENVIRONMENT_DISABLED,
|
|
|
|
({ featureName, project, environment }) => {
|
|
|
|
featureToggleUpdateTotal
|
|
|
|
.labels(featureName, project, environment)
|
|
|
|
.inc();
|
|
|
|
},
|
|
|
|
);
|
|
|
|
eventStore.on(
|
|
|
|
FEATURE_ENVIRONMENT_ENABLED,
|
|
|
|
({ featureName, project, environment }) => {
|
|
|
|
featureToggleUpdateTotal
|
|
|
|
.labels(featureName, project, environment)
|
|
|
|
.inc();
|
|
|
|
},
|
|
|
|
);
|
|
|
|
eventStore.on(FEATURE_ARCHIVED, ({ featureName, project }) => {
|
|
|
|
featureToggleUpdateTotal.labels(featureName, project, 'n/a').inc();
|
2021-11-12 13:15:51 +01:00
|
|
|
});
|
2022-09-08 11:01:27 +02:00
|
|
|
eventStore.on(FEATURE_REVIVED, ({ featureName, project }) => {
|
|
|
|
featureToggleUpdateTotal.labels(featureName, project, 'n/a').inc();
|
2020-12-16 14:49:11 +01:00
|
|
|
});
|
|
|
|
|
2023-02-15 09:13:32 +01:00
|
|
|
eventBus.on(CLIENT_METRICS, (m: ValidatedClientMetrics) => {
|
2021-04-22 10:07:10 +02:00
|
|
|
for (const entry of Object.entries(m.bucket.toggles)) {
|
2020-12-16 14:49:11 +01:00
|
|
|
featureToggleUsageTotal
|
2021-02-16 14:30:08 +01:00
|
|
|
.labels(entry[0], 'true', m.appName)
|
2021-04-22 10:07:10 +02:00
|
|
|
.inc(entry[1].yes);
|
2020-12-16 14:49:11 +01:00
|
|
|
featureToggleUsageTotal
|
2021-02-16 14:30:08 +01:00
|
|
|
.labels(entry[0], 'false', m.appName)
|
2021-04-22 10:07:10 +02:00
|
|
|
.inc(entry[1].no);
|
2020-12-16 14:49:11 +01:00
|
|
|
}
|
|
|
|
});
|
2022-09-27 11:06:06 +02:00
|
|
|
eventStore.on(CLIENT_REGISTER, (m) => {
|
2022-07-22 11:00:22 +02:00
|
|
|
if (m.sdkVersion && m.sdkVersion.indexOf(':') > -1) {
|
|
|
|
const [sdkName, sdkVersion] = m.sdkVersion.split(':');
|
|
|
|
clientSdkVersionUsage.labels(sdkName, sdkVersion).inc();
|
|
|
|
}
|
|
|
|
});
|
2021-02-04 14:14:46 +01:00
|
|
|
|
2021-08-12 15:04:37 +02:00
|
|
|
this.configureDbMetrics(db, eventBus);
|
2020-12-16 14:49:11 +01:00
|
|
|
}
|
|
|
|
|
2021-04-22 10:07:10 +02:00
|
|
|
stopMonitoring(): void {
|
2020-12-16 14:49:11 +01:00
|
|
|
clearInterval(this.timer);
|
2021-05-28 11:10:24 +02:00
|
|
|
clearInterval(this.poolMetricsTimer);
|
2020-12-16 14:49:11 +01:00
|
|
|
}
|
2021-02-04 14:14:46 +01:00
|
|
|
|
2021-04-22 10:07:10 +02:00
|
|
|
configureDbMetrics(db: Knex, eventBus: EventEmitter): void {
|
|
|
|
if (db && db.client) {
|
2021-02-04 14:14:46 +01:00
|
|
|
const dbPoolMin = new client.Gauge({
|
|
|
|
name: 'db_pool_min',
|
|
|
|
help: 'Minimum DB pool size',
|
|
|
|
});
|
2021-04-22 10:07:10 +02:00
|
|
|
dbPoolMin.set(db.client.pool.min);
|
2021-02-04 14:14:46 +01:00
|
|
|
const dbPoolMax = new client.Gauge({
|
|
|
|
name: 'db_pool_max',
|
|
|
|
help: 'Maximum DB pool size',
|
|
|
|
});
|
2021-04-22 10:07:10 +02:00
|
|
|
dbPoolMax.set(db.client.pool.max);
|
2021-02-04 14:14:46 +01:00
|
|
|
const dbPoolFree = new client.Gauge({
|
|
|
|
name: 'db_pool_free',
|
|
|
|
help: 'Current free connections in DB pool',
|
|
|
|
});
|
|
|
|
const dbPoolUsed = new client.Gauge({
|
|
|
|
name: 'db_pool_used',
|
|
|
|
help: 'Current connections in use in DB pool',
|
|
|
|
});
|
|
|
|
const dbPoolPendingCreates = new client.Gauge({
|
|
|
|
name: 'db_pool_pending_creates',
|
2021-08-12 15:04:37 +02:00
|
|
|
help: 'how many asynchronous create calls are running in DB pool',
|
2021-02-04 14:14:46 +01:00
|
|
|
});
|
|
|
|
const dbPoolPendingAcquires = new client.Gauge({
|
|
|
|
name: 'db_pool_pending_acquires',
|
2021-08-12 15:04:37 +02:00
|
|
|
help: 'how many acquires are waiting for a resource to be released in DB pool',
|
2021-02-04 14:14:46 +01:00
|
|
|
});
|
|
|
|
|
2021-08-12 15:04:37 +02:00
|
|
|
eventBus.on(DB_POOL_UPDATE, (data) => {
|
2021-02-04 14:14:46 +01:00
|
|
|
dbPoolFree.set(data.free);
|
|
|
|
dbPoolUsed.set(data.used);
|
|
|
|
dbPoolPendingCreates.set(data.pendingCreates);
|
|
|
|
dbPoolPendingAcquires.set(data.pendingAcquires);
|
|
|
|
});
|
|
|
|
|
2021-04-22 10:07:10 +02:00
|
|
|
this.registerPoolMetrics(db.client.pool, eventBus);
|
2021-05-28 11:10:24 +02:00
|
|
|
this.poolMetricsTimer = setInterval(
|
2021-04-22 10:07:10 +02:00
|
|
|
() => this.registerPoolMetrics(db.client.pool, eventBus),
|
2021-11-02 15:13:46 +01:00
|
|
|
minutesToMilliseconds(1),
|
2021-02-04 14:14:46 +01:00
|
|
|
);
|
2021-05-28 11:10:24 +02:00
|
|
|
this.poolMetricsTimer.unref();
|
2021-02-04 14:14:46 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-22 10:07:10 +02:00
|
|
|
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
|
|
|
|
registerPoolMetrics(pool: any, eventBus: EventEmitter) {
|
2021-02-17 15:24:43 +01:00
|
|
|
try {
|
|
|
|
eventBus.emit(DB_POOL_UPDATE, {
|
|
|
|
used: pool.numUsed(),
|
|
|
|
free: pool.numFree(),
|
|
|
|
pendingCreates: pool.numPendingCreates(),
|
|
|
|
pendingAcquires: pool.numPendingAcquires(),
|
|
|
|
});
|
|
|
|
// eslint-disable-next-line no-empty
|
|
|
|
} catch (e) {}
|
2021-02-04 14:14:46 +01:00
|
|
|
}
|
2020-12-16 14:49:11 +01:00
|
|
|
}
|
2021-04-22 10:07:10 +02:00
|
|
|
export function createMetricsMonitor(): MetricsMonitor {
|
|
|
|
return new MetricsMonitor();
|
|
|
|
}
|
2020-12-16 14:49:11 +01:00
|
|
|
|
|
|
|
module.exports = {
|
2021-04-22 10:07:10 +02:00
|
|
|
createMetricsMonitor,
|
2016-12-04 14:09:37 +01:00
|
|
|
};
|