1
0
mirror of https://github.com/Unleash/unleash.git synced 2025-01-31 00:16:47 +01:00
unleash.unleash/src/lib/metrics.ts

1101 lines
37 KiB
TypeScript
Raw Normal View History

import { collectDefaultMetrics } from 'prom-client';
import memoizee from 'memoizee';
import type EventEmitter from 'events';
import type { Knex } from 'knex';
2021-04-29 10:21:29 +02:00
import * as events from './metric-events';
import {
DB_POOL_UPDATE,
FEATURE_ARCHIVED,
FEATURE_CREATED,
FEATURE_REVIVED,
FEATURE_STRATEGY_ADD,
FEATURE_STRATEGY_REMOVE,
FEATURE_STRATEGY_UPDATE,
FEATURE_ENVIRONMENT_ENABLED,
FEATURE_ENVIRONMENT_DISABLED,
FEATURE_VARIANTS_UPDATED,
FEATURE_METADATA_UPDATED,
FEATURE_UPDATED,
2021-12-09 21:02:58 +01:00
CLIENT_METRICS,
CLIENT_REGISTER,
chore: Establish a baseline for the number of envs disabled per project (#6807) This PR adds a counter in Prometheus for counting the number of "environment disabled" events we get per project. The purpose of this is to establish a baseline for one of the "project management UI" project's key results. ## On gauges vs counters This PR uses a counter. Using a gauge would give you the total number of envs disabled, not the number of disable events. The difference is subtle, but important. For projects that were created before the new feature, the gauge might be appropriate. Because each disabled env would require at least one disabled event, we can get a floor of how many events were triggered for each project. However, for projects created after we introduce the planned change, we're not interested in the total envs anymore, because you can disable a hundred envs on creation with a single action. In this case, a gauge showing 100 disabled envs would be misleading, because it didn't take 100 events to disable them. So the interesting metric here is how many times did you specifically disable an environment in project settings, hence the counter. ## Assumptions and future plans To make this easier on ourselves, we make the follow assumption: people primarily disable envs **when creating a project**. This means that there might be a few lagging indicators granting some projects a smaller number of events than expected, but we may be able to filter those out. Further, if we had a metric for each project and its creation date, we could correlate that with the metrics to answer the question "how many envs do people disable in the first week? Two weeks? A month?". Or worded differently: after creating a project, how long does it take for people to configure environments? Similarly, if we gather that data, it will also make filtering out the number of events for projects created **after** the new changes have been released much easier. The good news: Because the project creation metric with dates is a static aggregate, it can be applied at any time, even retroactively, to see the effects.
2024-04-10 08:49:15 +02:00
PROJECT_ENVIRONMENT_REMOVED,
PROJECT_CREATED,
PROJECT_ARCHIVED,
PROJECT_REVIVED,
PROJECT_DELETED,
2021-04-29 10:21:29 +02:00
} from './types/events';
import type { IUnleashConfig } from './types/option';
import type { IUnleashStores } from './types/stores';
import { hoursToMilliseconds, minutesToMilliseconds } from 'date-fns';
import type { InstanceStatsService } from './features/instance-stats/instance-stats-service';
import type { IEnvironment, ISdkHeartbeat } from './types';
import {
createCounter,
createGauge,
createSummary,
createHistogram,
} from './util/metrics';
import type { SchedulerService } from './services';
import type { IClientMetricsEnv } from './features/metrics/client-metrics/client-metrics-store-v2-type';
import { DbMetricsMonitor } from './metrics-gauge';
export function registerPrometheusPostgresMetrics(
db: Knex,
eventBus: EventEmitter,
postgresVersion: string,
) {
if (db?.client) {
const dbPoolMin = createGauge({
name: 'db_pool_min',
help: 'Minimum DB pool size',
});
dbPoolMin.set(db.client.pool.min);
const dbPoolMax = createGauge({
name: 'db_pool_max',
help: 'Maximum DB pool size',
});
dbPoolMax.set(db.client.pool.max);
const dbPoolFree = createGauge({
name: 'db_pool_free',
help: 'Current free connections in DB pool',
});
const dbPoolUsed = createGauge({
name: 'db_pool_used',
help: 'Current connections in use in DB pool',
});
const dbPoolPendingCreates = createGauge({
name: 'db_pool_pending_creates',
help: 'how many asynchronous create calls are running in DB pool',
});
const dbPoolPendingAcquires = createGauge({
name: 'db_pool_pending_acquires',
help: 'how many acquires are waiting for a resource to be released in DB pool',
});
eventBus.on(DB_POOL_UPDATE, (data) => {
dbPoolFree.set(data.free);
dbPoolUsed.set(data.used);
dbPoolPendingCreates.set(data.pendingCreates);
dbPoolPendingAcquires.set(data.pendingAcquires);
});
const database_version = createGauge({
name: 'postgres_version',
help: 'Which version of postgres is running (SHOW server_version)',
labelNames: ['version'],
});
database_version.labels({ version: postgresVersion }).set(1);
}
}
export function registerPrometheusMetrics(
config: IUnleashConfig,
stores: IUnleashStores,
version: string,
eventBus: EventEmitter,
instanceStatsService: InstanceStatsService,
) {
const resolveEnvironmentType = async (
environment: string,
cachedEnvironments: () => Promise<IEnvironment[]>,
): Promise<string> => {
const environments = await cachedEnvironments();
const env = environments.find((e) => e.name === environment);
if (env) {
return env.type;
} else {
return 'unknown';
}
};
const { eventStore, environmentStore } = stores;
const { flagResolver } = config;
const dbMetrics = new DbMetricsMonitor(config);
const cachedEnvironments: () => Promise<IEnvironment[]> = memoizee(
async () => environmentStore.getAll(),
{
promise: true,
maxAge: hoursToMilliseconds(1),
},
);
const requestDuration = createSummary({
name: 'http_request_duration_milliseconds',
help: 'App response time',
labelNames: ['path', 'method', 'status', 'appName'],
percentiles: [0.1, 0.5, 0.9, 0.95, 0.99],
maxAgeSeconds: 600,
ageBuckets: 5,
});
const schedulerDuration = createSummary({
name: 'scheduler_duration_seconds',
help: 'Scheduler duration time',
labelNames: ['jobId'],
percentiles: [0.1, 0.5, 0.9, 0.95, 0.99],
maxAgeSeconds: 600,
ageBuckets: 5,
});
const dbDuration = createSummary({
name: 'db_query_duration_seconds',
help: 'DB query duration time',
labelNames: ['store', 'action'],
percentiles: [0.1, 0.5, 0.9, 0.95, 0.99],
maxAgeSeconds: 600,
ageBuckets: 5,
});
const functionDuration = createSummary({
name: 'function_duration_seconds',
help: 'Function duration time',
labelNames: ['functionName', 'className'],
percentiles: [0.1, 0.5, 0.9, 0.95, 0.99],
maxAgeSeconds: 600,
ageBuckets: 5,
});
const featureFlagUpdateTotal = createCounter({
name: 'feature_toggle_update_total',
2024-11-26 08:57:43 +01:00
help: 'Number of times a flag has been updated. Environment label would be "n/a" when it is not available, e.g. when a feature flag is created.',
labelNames: [
'toggle',
'project',
'environment',
'environmentType',
'action',
],
});
const featureFlagUsageTotal = createCounter({
name: 'feature_toggle_usage_total',
help: 'Number of times a feature flag has been used',
labelNames: ['toggle', 'active', 'appName'],
});
dbMetrics.registerGaugeDbMetric({
name: 'feature_toggles_total',
help: 'Number of feature flags',
labelNames: ['version'],
query: () => instanceStatsService.getToggleCount(),
map: (value) => ({ value, labels: { version } }),
});
dbMetrics.registerGaugeDbMetric({
name: 'max_feature_environment_strategies',
help: 'Maximum number of environment strategies in one feature',
labelNames: ['feature', 'environment'],
query: () =>
stores.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(),
map: (result) => ({
value: result.count,
labels: {
environment: result.environment,
feature: result.feature,
},
}),
});
dbMetrics.registerGaugeDbMetric({
name: 'max_feature_strategies',
help: 'Maximum number of strategies in one feature',
labelNames: ['feature'],
query: () =>
stores.featureStrategiesReadModel.getMaxFeatureStrategies(),
map: (result) => ({
value: result.count,
labels: { feature: result.feature },
}),
});
dbMetrics.registerGaugeDbMetric({
name: 'max_constraint_values',
help: 'Maximum number of constraint values used in a single constraint',
labelNames: ['feature', 'environment'],
query: () => stores.featureStrategiesReadModel.getMaxConstraintValues(),
map: (result) => ({
value: result.count,
labels: {
environment: result.environment,
feature: result.feature,
},
}),
});
dbMetrics.registerGaugeDbMetric({
name: 'max_strategy_constraints',
help: 'Maximum number of constraints used on a single strategy',
labelNames: ['feature', 'environment'],
query: () =>
stores.featureStrategiesReadModel.getMaxConstraintsPerStrategy(),
map: (result) => ({
value: result.count,
labels: {
environment: result.environment,
feature: result.feature,
},
}),
});
dbMetrics.registerGaugeDbMetric({
name: 'largest_project_environment_size',
help: 'The largest project environment size (bytes) based on strategies, constraints, variants and parameters',
labelNames: ['project', 'environment'],
query: () =>
stores.largestResourcesReadModel.getLargestProjectEnvironments(1),
map: (results) => {
const result = results[0];
return {
value: result.size,
labels: {
project: result.project,
environment: result.environment,
},
};
},
});
dbMetrics.registerGaugeDbMetric({
name: 'largest_feature_environment_size',
help: 'The largest feature environment size (bytes) base on strategies, constraints, variants and parameters',
labelNames: ['feature', 'environment'],
query: () =>
stores.largestResourcesReadModel.getLargestFeatureEnvironments(1),
map: (results) => {
const result = results[0];
return {
value: result.size,
labels: {
feature: result.feature,
environment: result.environment,
},
};
},
});
const featureTogglesArchivedTotal = createGauge({
name: 'feature_toggles_archived_total',
help: 'Number of archived feature flags',
});
const usersTotal = createGauge({
name: 'users_total',
help: 'Number of users',
});
const trafficTotal = createGauge({
name: 'traffic_total',
help: 'Traffic used current month',
});
const serviceAccounts = createGauge({
name: 'service_accounts_total',
help: 'Number of service accounts',
});
const apiTokens = createGauge({
name: 'api_tokens_total',
help: 'Number of API tokens',
labelNames: ['type'],
});
const enabledMetricsBucketsPreviousDay = createGauge({
name: 'enabled_metrics_buckets_previous_day',
help: 'Number of hourly enabled/disabled metric buckets in the previous day',
});
const variantMetricsBucketsPreviousDay = createGauge({
name: 'variant_metrics_buckets_previous_day',
help: 'Number of hourly variant metric buckets in the previous day',
});
const usersActive7days = createGauge({
name: 'users_active_7',
help: 'Number of users active in the last 7 days',
});
const usersActive30days = createGauge({
name: 'users_active_30',
help: 'Number of users active in the last 30 days',
});
const usersActive60days = createGauge({
name: 'users_active_60',
help: 'Number of users active in the last 60 days',
});
const usersActive90days = createGauge({
name: 'users_active_90',
help: 'Number of users active in the last 90 days',
});
dbMetrics.registerGaugeDbMetric({
name: 'projects_total',
help: 'Number of projects',
labelNames: ['mode'],
query: () => instanceStatsService.getProjectModeCount(),
map: (projects) =>
projects.map((projectStat) => ({
value: projectStat.count,
labels: { mode: projectStat.mode },
})),
});
dbMetrics.registerGaugeDbMetric({
name: 'environments_total',
help: 'Number of environments',
query: () => instanceStatsService.environmentCount(),
map: (result) => ({ value: result }),
});
dbMetrics.registerGaugeDbMetric({
name: 'groups_total',
help: 'Number of groups',
query: () => instanceStatsService.groupCount(),
map: (result) => ({ value: result }),
});
dbMetrics.registerGaugeDbMetric({
name: 'roles_total',
help: 'Number of roles',
query: () => instanceStatsService.roleCount(),
map: (result) => ({ value: result }),
});
dbMetrics.registerGaugeDbMetric({
name: 'custom_root_roles_total',
help: 'Number of custom root roles',
query: () => instanceStatsService.customRolesCount(),
map: (result) => ({ value: result }),
});
dbMetrics.registerGaugeDbMetric({
name: 'custom_root_roles_in_use_total',
help: 'Number of custom root roles in use',
query: () => instanceStatsService.customRolesCountInUse(),
map: (result) => ({ value: result }),
});
dbMetrics.registerGaugeDbMetric({
name: 'segments_total',
help: 'Number of segments',
query: () => instanceStatsService.segmentCount(),
map: (result) => ({ value: result }),
});
dbMetrics.registerGaugeDbMetric({
name: 'context_total',
help: 'Number of context',
query: () => instanceStatsService.contextFieldCount(),
map: (result) => ({ value: result }),
});
dbMetrics.registerGaugeDbMetric({
name: 'strategies_total',
help: 'Number of strategies',
query: () => instanceStatsService.strategiesCount(),
map: (result) => ({ value: result }),
});
dbMetrics.registerGaugeDbMetric({
name: 'client_apps_total',
help: 'Number of registered client apps aggregated by range by last seen',
labelNames: ['range'],
query: () => instanceStatsService.getLabeledAppCounts(),
map: (result) =>
Object.entries(result).map(([range, count]) => ({
value: count,
labels: { range },
})),
});
dbMetrics.registerGaugeDbMetric({
name: 'saml_enabled',
help: 'Whether SAML is enabled',
query: () => instanceStatsService.hasSAML(),
map: (result) => ({ value: result ? 1 : 0 }),
});
dbMetrics.registerGaugeDbMetric({
name: 'oidc_enabled',
help: 'Whether OIDC is enabled',
query: () => instanceStatsService.hasOIDC(),
map: (result) => ({ value: result ? 1 : 0 }),
});
dbMetrics.registerGaugeDbMetric({
2024-11-13 12:10:53 +01:00
name: 'password_auth_enabled',
help: 'Whether password auth is enabled',
query: () => instanceStatsService.hasPasswordAuth(),
map: (result) => ({ value: result ? 1 : 0 }),
});
dbMetrics.registerGaugeDbMetric({
name: 'scim_enabled',
help: 'Whether SCIM is enabled',
query: () => instanceStatsService.hasSCIM(),
map: (result) => ({ value: result ? 1 : 0 }),
});
const clientSdkVersionUsage = createCounter({
name: 'client_sdk_versions',
help: 'Which sdk versions are being used',
labelNames: [
'sdk_name',
'sdk_version',
'platform_name',
'platform_version',
'yggdrasil_version',
'spec_version',
],
});
const productionChanges30 = createGauge({
name: 'production_changes_30',
help: 'Changes made to production environment last 30 days',
labelNames: ['environment'],
});
const productionChanges60 = createGauge({
name: 'production_changes_60',
help: 'Changes made to production environment last 60 days',
labelNames: ['environment'],
});
const productionChanges90 = createGauge({
name: 'production_changes_90',
help: 'Changes made to production environment last 90 days',
labelNames: ['environment'],
});
const rateLimits = createGauge({
name: 'rate_limits',
help: 'Rate limits (per minute) for METHOD/ENDPOINT pairs',
labelNames: ['endpoint', 'method'],
});
rateLimits
.labels({
endpoint: '/api/client/metrics',
method: 'POST',
})
.set(config.metricsRateLimiting.clientMetricsMaxPerMinute);
rateLimits
.labels({
endpoint: '/api/client/register',
method: 'POST',
})
.set(config.metricsRateLimiting.clientRegisterMaxPerMinute);
rateLimits
.labels({
endpoint: '/api/frontend/metrics',
method: 'POST',
})
.set(config.metricsRateLimiting.frontendMetricsMaxPerMinute);
rateLimits
.labels({
endpoint: '/api/frontend/register',
method: 'POST',
})
.set(config.metricsRateLimiting.frontendRegisterMaxPerMinute);
rateLimits
.labels({
endpoint: '/api/admin/user-admin',
method: 'POST',
})
.set(config.rateLimiting.createUserMaxPerMinute);
rateLimits
.labels({
endpoint: '/auth/simple',
method: 'POST',
})
.set(config.rateLimiting.simpleLoginMaxPerMinute);
rateLimits
.labels({
endpoint: '/auth/reset/password-email',
method: 'POST',
})
.set(config.rateLimiting.passwordResetMaxPerMinute);
rateLimits
.labels({
endpoint: '/api/signal-endpoint/:name',
method: 'POST',
})
.set(config.rateLimiting.callSignalEndpointMaxPerSecond * 60);
const featureCreatedByMigration = createCounter({
name: 'feature_created_by_migration_count',
help: 'Feature createdBy migration count',
});
const eventCreatedByMigration = createCounter({
name: 'event_created_by_migration_count',
help: 'Event createdBy migration count',
});
const proxyRepositoriesCreated = createCounter({
name: 'proxy_repositories_created',
help: 'Proxy repositories created',
});
const frontendApiRepositoriesCreated = createCounter({
name: 'frontend_api_repositories_created',
help: 'Frontend API repositories created',
});
const mapFeaturesForClientDuration = createHistogram({
name: 'map_features_for_client_duration',
help: 'Duration of mapFeaturesForClient function',
});
dbMetrics.registerGaugeDbMetric({
name: 'feature_lifecycle_stage_duration',
labelNames: ['stage', 'project_id'],
help: 'Duration of feature lifecycle stages',
query: () => stores.featureLifecycleReadModel.getAllWithStageDuration(),
map: (result) =>
result.map((stageResult) => ({
value: stageResult.duration,
labels: {
project_id: stageResult.project,
stage: stageResult.stage,
},
})),
});
dbMetrics.registerGaugeDbMetric({
name: 'onboarding_duration',
labelNames: ['event'],
help: 'firstLogin, secondLogin, firstFeatureFlag, firstPreLive, firstLive from first user creation',
query: () => stores.onboardingReadModel.getInstanceOnboardingMetrics(),
map: (result) =>
Object.keys(result)
.filter((key) => Number.isInteger(result[key]))
.map((key) => ({
value: result[key],
labels: {
event: key,
},
})),
});
dbMetrics.registerGaugeDbMetric({
name: 'project_onboarding_duration',
labelNames: ['event', 'project'],
help: 'firstFeatureFlag, firstPreLive, firstLive from project creation',
query: () => stores.onboardingReadModel.getProjectsOnboardingMetrics(),
map: (projectsOnboardingMetrics) =>
projectsOnboardingMetrics.flatMap(
({ project, ...projectMetrics }) =>
Object.keys(projectMetrics)
.filter((key) => Number.isInteger(projectMetrics[key]))
.map((key) => ({
value: projectMetrics[key],
labels: {
event: key,
project,
},
})),
),
});
dbMetrics.registerGaugeDbMetric({
name: 'feature_lifecycle_stage_count_by_project',
help: 'Count features in a given stage by project id',
labelNames: ['stage', 'project_id'],
query: () => stores.featureLifecycleReadModel.getStageCountByProject(),
map: (result) =>
result.map((stageResult) => ({
value: stageResult.count,
labels: {
project_id: stageResult.project,
stage: stageResult.stage,
},
})),
});
const featureLifecycleStageEnteredCounter = createCounter({
name: 'feature_lifecycle_stage_entered',
help: 'Count how many features entered a given stage',
labelNames: ['stage'],
});
const projectActionsCounter = createCounter({
name: 'project_actions_count',
help: 'Count project actions',
labelNames: ['action'],
});
const projectEnvironmentsDisabled = createCounter({
name: 'project_environments_disabled',
help: 'How many "environment disabled" events we have received for each project',
labelNames: ['project_id'],
});
const orphanedTokensTotal = createGauge({
name: 'orphaned_api_tokens_total',
help: 'Number of API tokens without a project',
});
const orphanedTokensActive = createGauge({
name: 'orphaned_api_tokens_active',
help: 'Number of API tokens without a project, last seen within 3 months',
});
const legacyTokensTotal = createGauge({
name: 'legacy_api_tokens_total',
help: 'Number of API tokens with v1 format',
});
const legacyTokensActive = createGauge({
name: 'legacy_api_tokens_active',
help: 'Number of API tokens with v1 format, last seen within 3 months',
});
const exceedsLimitErrorCounter = createCounter({
name: 'exceeds_limit_error',
help: 'The number of exceeds limit errors registered by this instance.',
labelNames: ['resource', 'limit'],
});
const requestOriginCounter = createCounter({
name: 'request_origin_counter',
help: 'Number of authenticated requests, including origin information.',
labelNames: ['type', 'method', 'source'],
});
const resourceLimit = createGauge({
name: 'resource_limit',
help: 'The maximum number of resources allowed.',
labelNames: ['resource'],
});
for (const [resource, limit] of Object.entries(config.resourceLimits)) {
resourceLimit.labels({ resource }).set(limit);
}
const licensedUsers = createGauge({
name: 'licensed_users',
help: 'The number of licensed users.',
});
const addonEventsHandledCounter = createCounter({
name: 'addon_events_handled',
help: 'Events handled by addons and the result.',
labelNames: ['result', 'destination'],
});
// register event listeners
eventBus.on(
events.EXCEEDS_LIMIT,
({ resource, limit }: { resource: string; limit: number }) => {
exceedsLimitErrorCounter.increment({ resource, limit });
},
);
eventBus.on(
events.STAGE_ENTERED,
(entered: { stage: string; feature: string }) => {
featureLifecycleStageEnteredCounter.increment({
stage: entered.stage,
});
},
);
eventBus.on(
events.REQUEST_TIME,
({ path, method, time, statusCode, appName }) => {
requestDuration
.labels({
path,
method,
status: statusCode,
appName,
})
.observe(time);
},
);
eventBus.on(events.SCHEDULER_JOB_TIME, ({ jobId, time }) => {
schedulerDuration.labels(jobId).observe(time);
});
eventBus.on(events.FUNCTION_TIME, ({ functionName, className, time }) => {
functionDuration
.labels({
functionName,
className,
})
.observe(time);
});
eventBus.on(events.EVENTS_CREATED_BY_PROCESSED, ({ updated }) => {
eventCreatedByMigration.inc(updated);
});
eventBus.on(events.FEATURES_CREATED_BY_PROCESSED, ({ updated }) => {
featureCreatedByMigration.inc(updated);
});
eventBus.on(events.DB_TIME, ({ store, action, time }) => {
dbDuration
.labels({
store,
action,
})
.observe(time);
});
eventBus.on(events.PROXY_REPOSITORY_CREATED, () => {
proxyRepositoriesCreated.inc();
});
eventBus.on(events.FRONTEND_API_REPOSITORY_CREATED, () => {
frontendApiRepositoriesCreated.inc();
});
eventBus.on(events.PROXY_FEATURES_FOR_TOKEN_TIME, ({ duration }) => {
mapFeaturesForClientDuration.observe(duration);
});
events.onMetricEvent(
eventBus,
events.REQUEST_ORIGIN,
({ type, method, source }) => {
requestOriginCounter.increment({ type, method, source });
},
);
eventStore.on(FEATURE_CREATED, ({ featureName, project }) => {
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment: 'n/a',
environmentType: 'n/a',
action: 'created',
});
});
eventStore.on(FEATURE_VARIANTS_UPDATED, ({ featureName, project }) => {
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment: 'n/a',
environmentType: 'n/a',
action: 'updated',
});
});
eventStore.on(FEATURE_METADATA_UPDATED, ({ featureName, project }) => {
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment: 'n/a',
environmentType: 'n/a',
action: 'updated',
});
});
eventStore.on(FEATURE_UPDATED, ({ featureName, project }) => {
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment: 'default',
environmentType: 'production',
action: 'updated',
});
});
eventStore.on(
FEATURE_STRATEGY_ADD,
async ({ featureName, project, environment }) => {
const environmentType = await resolveEnvironmentType(
environment,
cachedEnvironments,
);
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment,
environmentType,
action: 'updated',
});
},
);
eventStore.on(
FEATURE_STRATEGY_REMOVE,
async ({ featureName, project, environment }) => {
const environmentType = await resolveEnvironmentType(
environment,
cachedEnvironments,
);
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment,
environmentType,
action: 'updated',
});
},
);
eventStore.on(
FEATURE_STRATEGY_UPDATE,
async ({ featureName, project, environment }) => {
const environmentType = await resolveEnvironmentType(
environment,
cachedEnvironments,
);
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment,
environmentType,
action: 'updated',
});
},
);
eventStore.on(
FEATURE_ENVIRONMENT_DISABLED,
async ({ featureName, project, environment }) => {
const environmentType = await resolveEnvironmentType(
environment,
cachedEnvironments,
);
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment,
environmentType,
action: 'updated',
});
},
);
eventStore.on(
FEATURE_ENVIRONMENT_ENABLED,
async ({ featureName, project, environment }) => {
const environmentType = await resolveEnvironmentType(
environment,
cachedEnvironments,
);
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment,
environmentType,
action: 'updated',
});
},
);
eventStore.on(FEATURE_ARCHIVED, ({ featureName, project }) => {
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment: 'n/a',
environmentType: 'n/a',
action: 'archived',
});
});
eventStore.on(FEATURE_REVIVED, ({ featureName, project }) => {
featureFlagUpdateTotal.increment({
toggle: featureName,
project,
environment: 'n/a',
environmentType: 'n/a',
action: 'revived',
});
});
eventStore.on(PROJECT_CREATED, () => {
projectActionsCounter.increment({ action: PROJECT_CREATED });
});
eventStore.on(PROJECT_ARCHIVED, () => {
projectActionsCounter.increment({ action: PROJECT_ARCHIVED });
});
eventStore.on(PROJECT_REVIVED, () => {
projectActionsCounter.increment({ action: PROJECT_REVIVED });
});
eventStore.on(PROJECT_DELETED, () => {
projectActionsCounter.increment({ action: PROJECT_DELETED });
});
const logger = config.getLogger('metrics.ts');
eventBus.on(CLIENT_METRICS, (metrics: IClientMetricsEnv[]) => {
try {
for (const metric of metrics) {
featureFlagUsageTotal.increment(
{
toggle: metric.featureName,
active: 'true',
appName: metric.appName,
},
metric.yes,
);
featureFlagUsageTotal.increment(
{
toggle: metric.featureName,
active: 'false',
appName: metric.appName,
},
metric.no,
);
}
} catch (e) {
logger.warn('Metrics registration failed', e);
}
});
eventStore.on(CLIENT_REGISTER, (heartbeatEvent: ISdkHeartbeat) => {
if (!heartbeatEvent.sdkName || !heartbeatEvent.sdkVersion) {
return;
}
if (flagResolver.isEnabled('extendedMetrics')) {
clientSdkVersionUsage.increment({
sdk_name: heartbeatEvent.sdkName,
sdk_version: heartbeatEvent.sdkVersion,
platform_name:
heartbeatEvent.metadata?.platformName ?? 'not-set',
platform_version:
heartbeatEvent.metadata?.platformVersion ?? 'not-set',
yggdrasil_version:
heartbeatEvent.metadata?.yggdrasilVersion ?? 'not-set',
spec_version: heartbeatEvent.metadata?.specVersion ?? 'not-set',
});
} else {
clientSdkVersionUsage.increment({
sdk_name: heartbeatEvent.sdkName,
sdk_version: heartbeatEvent.sdkVersion,
platform_name: 'not-set',
platform_version: 'not-set',
yggdrasil_version: 'not-set',
spec_version: 'not-set',
});
}
});
eventStore.on(PROJECT_ENVIRONMENT_REMOVED, ({ project }) => {
projectEnvironmentsDisabled.increment({ project_id: project });
});
feat: create gauges for all resource limits (#7718) This PR adds Grafana gauges for all the existing resource limits. The primary purpose is to be able to use this in alerting. Secondarily, we can also use it to get better insights into how many customers have increased their limits, as well as how many people are approaching their limit, regdardless of whether it's been increased or not. ## Discussion points ### Implementation The first approach I took (in https://github.com/Unleash/unleash/pull/7718/commits/87528b4c6785899d7afec34721cb9f0df9bf0bfc), was to add a new gauge for each resource limit. However, there's a lot of boilerplate for it. I thought doing it like this (the current implementation) would make it easier. We should still be able to use the labelName to collate this in Grafana, as far as I understand? As a bonus, we'd automatically get new resource limits when we add them to the schema. ``` tsx const resourceLimit = createGauge({ name: 'resource_limit', help: 'The maximum number of resources allowed.', labelNames: ['resource'], }); // ... for (const [resource, limit] of Object.entries(config.resourceLimits)) { resourceLimit.labels({ resource }).set(limit); } ``` That way, when checking the stats, we should be able to do something like this: ``` promql resource_limit{resource="constraintValues"} ``` ### Do we need to reset gauges? I noticed that we reset gauges before setting values in them all over the place. I don't know if that's necessary. I'd like to get that double clarified before merging this.
2024-08-01 09:59:25 +02:00
eventBus.on(events.ADDON_EVENTS_HANDLED, ({ result, destination }) => {
addonEventsHandledCounter.increment({ result, destination });
});
return {
collectAggDbMetrics: dbMetrics.refreshMetrics,
collectStaticCounters: async () => {
try {
featureTogglesArchivedTotal.reset();
featureTogglesArchivedTotal.set(
await instanceStatsService.getArchivedToggleCount(),
);
2021-08-27 10:10:14 +02:00
usersTotal.reset();
usersTotal.set(await instanceStatsService.getRegisteredUsers());
serviceAccounts.reset();
serviceAccounts.set(
await instanceStatsService.countServiceAccounts(),
);
trafficTotal.reset();
trafficTotal.set(
await instanceStatsService.getCurrentTrafficData(),
);
apiTokens.reset();
for (const [
type,
value,
] of await instanceStatsService.countApiTokensByType()) {
apiTokens.labels({ type }).set(value);
}
const deprecatedTokens =
await stores.apiTokenStore.countDeprecatedTokens();
orphanedTokensTotal.reset();
orphanedTokensTotal.set(deprecatedTokens.orphanedTokens);
orphanedTokensActive.reset();
orphanedTokensActive.set(deprecatedTokens.activeOrphanedTokens);
legacyTokensTotal.reset();
legacyTokensTotal.set(deprecatedTokens.legacyTokens);
legacyTokensActive.reset();
legacyTokensActive.set(deprecatedTokens.activeLegacyTokens);
const previousDayMetricsBucketsCount =
await instanceStatsService.countPreviousDayHourlyMetricsBuckets();
enabledMetricsBucketsPreviousDay.reset();
enabledMetricsBucketsPreviousDay.set(
previousDayMetricsBucketsCount.enabledCount,
);
variantMetricsBucketsPreviousDay.reset();
variantMetricsBucketsPreviousDay.set(
previousDayMetricsBucketsCount.variantCount,
);
const activeUsers = await instanceStatsService.getActiveUsers();
usersActive7days.reset();
usersActive7days.set(activeUsers.last7);
usersActive30days.reset();
usersActive30days.set(activeUsers.last30);
usersActive60days.reset();
usersActive60days.set(activeUsers.last60);
usersActive90days.reset();
usersActive90days.set(activeUsers.last90);
const licensedUsersStat =
await instanceStatsService.getLicencedUsers();
licensedUsers.reset();
licensedUsers.set(licensedUsersStat);
const productionChanges =
await instanceStatsService.getProductionChanges();
productionChanges30.reset();
productionChanges30.set(productionChanges.last30);
productionChanges60.reset();
productionChanges60.set(productionChanges.last60);
productionChanges90.reset();
productionChanges90.set(productionChanges.last90);
} catch (e) {}
},
};
}
export default class MetricsMonitor {
constructor() {}
async startMonitoring(
config: IUnleashConfig,
stores: IUnleashStores,
version: string,
eventBus: EventEmitter,
instanceStatsService: InstanceStatsService,
schedulerService: SchedulerService,
db: Knex,
): Promise<void> {
if (!config.server.serverMetrics) {
return Promise.resolve();
}
collectDefaultMetrics();
const { collectStaticCounters, collectAggDbMetrics } =
registerPrometheusMetrics(
config,
stores,
version,
eventBus,
instanceStatsService,
);
const postgresVersion = await stores.settingStore.postgresVersion();
registerPrometheusPostgresMetrics(db, eventBus, postgresVersion);
await schedulerService.schedule(
async () =>
Promise.all([collectStaticCounters(), collectAggDbMetrics()]),
hoursToMilliseconds(2),
'collectStaticCounters',
);
await schedulerService.schedule(
async () =>
this.registerPoolMetrics.bind(this, db.client.pool, eventBus),
minutesToMilliseconds(1),
'registerPoolMetrics',
);
return Promise.resolve();
}
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
registerPoolMetrics(pool: any, eventBus: EventEmitter) {
try {
eventBus.emit(DB_POOL_UPDATE, {
used: pool.numUsed(),
free: pool.numFree(),
pendingCreates: pool.numPendingCreates(),
pendingAcquires: pool.numPendingAcquires(),
});
// eslint-disable-next-line no-empty
} catch (e) {}
}
}
export function createMetricsMonitor(): MetricsMonitor {
return new MetricsMonitor();
}