From 4234020b8d34fa40bd3845f621d1ea72951518cf Mon Sep 17 00:00:00 2001 From: Jaanus Sellin Date: Wed, 20 Nov 2024 09:10:07 +0200 Subject: [PATCH] feat: backfill licensed users (#8791) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **This migration introduces a query that calculates the licensed user counts and inserts them into the licensed_users table.** **The logic ensures that:** 1. All users created up to a specific date are included as active users until they are explicitly deleted. 2. Deleted users are excluded after their deletion date, except when their deletion date falls within the last 30 days or before their creation date. 3. The migration avoids duplicating data by ensuring records are only inserted if they don’t already exist in the licensed_users table. **Logic Breakdown:** **Identify User Events (user_events):** Extracts email addresses from user-related events (user-created and user-deleted) and tracks the type and timestamp of the event. This step ensures the ability to differentiate between user creation and deletion activities. **Generate a Date Range (dates):** Creates a continuous range of dates spanning from the earliest recorded event up to the current date. This ensures we analyze every date, even those without events. **Determine Active Users (active_emails):** Links dates with user events to calculate the status of each email address (active or deleted) on a given day. This step handles: - The user's creation date. - The user's deletion date (if applicable). **Calculate Daily Active User Counts (result):** For each date, counts the distinct email addresses that are active based on the conditions: - The user has no deletion date. - The user's deletion date is within the last 30 days relative to the current date. - The user's creation date is before the deletion date. --- .../20241119105837-licensed-users-backfill.js | 85 ++++++++ src/test/e2e/licensed-users-backfill.test.ts | 204 ++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100644 src/migrations/20241119105837-licensed-users-backfill.js create mode 100644 src/test/e2e/licensed-users-backfill.test.ts diff --git a/src/migrations/20241119105837-licensed-users-backfill.js b/src/migrations/20241119105837-licensed-users-backfill.js new file mode 100644 index 0000000000..93f2fa62a2 --- /dev/null +++ b/src/migrations/20241119105837-licensed-users-backfill.js @@ -0,0 +1,85 @@ +exports.up = (db, cb) => { + db.runSql(` + WITH user_events AS ( + SELECT + DISTINCT CASE WHEN type = 'user-deleted' THEN pre_data ->> 'email' ELSE data ->> 'email' END AS email, + type, + created_at AS event_date + FROM + events + WHERE + type IN ('user-created', 'user-deleted') + ), + dates AS ( + WITH RECURSIVE generated_dates AS ( + SELECT + MIN(event_date):: timestamp AS date + FROM + user_events + UNION ALL + SELECT + date + INTERVAL '1 day' + FROM + generated_dates + WHERE + date + INTERVAL '1 day' <= CURRENT_DATE + ) + SELECT + date :: date + FROM + generated_dates + ), + active_emails AS ( + SELECT + d.date, + ue.email, + MAX( + CASE WHEN ue.type = 'user-created' THEN ue.event_date ELSE NULL END + ) AS created_date, + MAX( + CASE WHEN ue.type = 'user-deleted' THEN ue.event_date ELSE NULL END + ) AS deleted_date + FROM + dates d + LEFT JOIN user_events ue ON ue.event_date <= d.date + GROUP BY + d.date, + ue.email + ), + result AS ( + SELECT + d.date, + COALESCE( + COUNT( + DISTINCT CASE + WHEN ae.deleted_date IS NULL + OR ae.deleted_date >= d.date - INTERVAL '30 days' + OR ae.deleted_date < ae.created_date + THEN ae.email + ELSE NULL + END + ), + 0 + ) AS active_emails_count + FROM + dates d + LEFT JOIN active_emails ae ON d.date = ae.date + GROUP BY + d.date + ORDER BY + d.date + ) INSERT INTO licensed_users (date, count) + SELECT date, active_emails_count + FROM result + WHERE EXISTS ( + SELECT 1 FROM user_events + ) + ON CONFLICT (date) DO NOTHING; + `, cb); + +}; + +exports.down = (db, cb) => { + db.runSql(``, cb); +}; + diff --git a/src/test/e2e/licensed-users-backfill.test.ts b/src/test/e2e/licensed-users-backfill.test.ts new file mode 100644 index 0000000000..43db20302e --- /dev/null +++ b/src/test/e2e/licensed-users-backfill.test.ts @@ -0,0 +1,204 @@ +import { getDbConfig } from './helpers/database-config'; +import { createTestConfig } from '../config/test-config'; +import { getInstance } from 'db-migrate'; +import { Client } from 'pg'; + +async function initSchema(db) { + const client = new Client(db); + await client.connect(); + await client.query(`DROP SCHEMA IF EXISTS ${db.schema} CASCADE`); + await client.query(`CREATE SCHEMA IF NOT EXISTS ${db.schema}`); + await client.end(); +} + +async function insertEvents(client, events) { + const values = events + .map( + (e) => + `('${e.type}', '${JSON.stringify(e.data || {})}', '${JSON.stringify( + e.pre_data || {}, + )}', '${e.created_at}', '${e.created_by}')`, + ) + .join(','); + + await client.query(` + INSERT INTO events (type, data, pre_data, created_at, created_by) + VALUES ${values}; + `); +} + +describe('licensed_users backfill', () => { + jest.setTimeout(15000); + + let client: any; + let dbm: any; + const config = createTestConfig({ + db: { + ...getDbConfig(), + pool: { min: 1, max: 4 }, + schema: 'licensed_users_test', + ssl: false, + }, + }); + + beforeAll(async () => { + await initSchema(config.db); + + dbm = getInstance(true, { + cwd: `${__dirname}/../../`, + config: { e2e: { ...config.db, connectionTimeoutMillis: 2000 } }, + env: 'e2e', + }); + + await dbm.up('20241114103646-licensed-users.js'); + client = new Client(config.db); + await client.connect(); + await client.query(`SET search_path = 'licensed_users_test';`); + }); + + afterAll(async () => { + await client.end(); + await dbm.reset(); + }); + + beforeEach(async () => { + await client.query('delete from events;'); + await client.query('delete from licensed_users;'); + await client.query( + "DELETE FROM migrations WHERE name = '/20241119105837-licensed-users-backfill';", + ); + }); + + test('Counts users from their creation date until their deletion date and 30 days', async () => { + await insertEvents(client, [ + { + type: 'user-created', + data: { email: 'user1@test.com' }, + created_at: '2024-10-01', + created_by: 'test', + }, + { + type: 'user-deleted', + pre_data: { email: 'user1@test.com' }, + created_at: '2024-10-05', + created_by: 'test', + }, + ]); + + await dbm.up('20241119105837-licensed-users-backfill.js'); + + const { rows } = await client.query( + "SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;", + ); + expect(rows.find((row) => row.date === '2024-10-01').count).toBe(1); + expect(rows.find((row) => row.date === '2024-10-02').count).toBe(1); + expect(rows.find((row) => row.date === '2024-10-05').count).toBe(1); + expect(rows.find((row) => row.date === '2024-11-04').count).toBe(1); + expect(rows.find((row) => row.date === '2024-11-05').count).toBe(0); // 30 days has passed + }); + + test('Counts multiple users correctly over their active periods, including 30-day retention from deletion', async () => { + await insertEvents(client, [ + { + type: 'user-created', + data: { email: 'user1@test.com' }, + created_at: '2024-09-01', + created_by: 'test', + }, + { + type: 'user-created', + data: { email: 'user2@test.com' }, + created_at: '2024-10-01', + created_by: 'test', + }, + { + type: 'user-deleted', + pre_data: { email: 'user1@test.com' }, + created_at: '2024-10-05', + created_by: 'test', + }, + ]); + + await dbm.up('20241119105837-licensed-users-backfill.js'); + + const { rows } = await client.query( + "SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;", + ); + expect(rows.find((row) => row.date === '2024-09-01').count).toBe(1); // user1 created + expect(rows.find((row) => row.date === '2024-10-01').count).toBe(2); // user1 active, user2 created + expect(rows.find((row) => row.date === '2024-10-05').count).toBe(2); // user1 within retention, user2 active + expect(rows.find((row) => row.date === '2024-11-19').count).toBe(1); // Only user2 active, user1's retention has ended + }); + + test('Handles users created but not deleted', async () => { + await insertEvents(client, [ + { + type: 'user-created', + data: { email: 'user1@test.com' }, + created_at: '2024-11-01', + created_by: 'test', + }, + ]); + + await dbm.up('20241119105837-licensed-users-backfill.js'); + + const { rows } = await client.query( + "SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;", + ); + expect(rows.find((row) => row.date === '2024-11-01').count).toBe(1); // user1 created + expect(rows.find((row) => row.date === '2024-11-19').count).toBe(1); // user1 still active + }); + + test('Handles overlapping creation and deletion periods with multiple events for the same email (one month earlier)', async () => { + await insertEvents(client, [ + { + type: 'user-created', + data: { email: 'user1@test.com' }, + created_at: '2024-10-01 00:00:00+00', + created_by: 'test', + }, + { + type: 'user-deleted', + pre_data: { email: 'user1@test.com' }, + created_at: '2024-10-01 00:01:00+00', + created_by: 'test', + }, + { + type: 'user-created', + data: { email: 'user1@test.com' }, + created_at: '2024-10-03', + created_by: 'test', + }, + { + type: 'user-deleted', + pre_data: { email: 'user1@test.com' }, + created_at: '2024-10-07', + created_by: 'test', + }, + { + type: 'user-created', + data: { email: 'user2@test.com' }, + created_at: '2024-10-05', + created_by: 'test', + }, + { + type: 'user-deleted', + pre_data: { email: 'user2@test.com' }, + created_at: '2024-10-10', + created_by: 'test', + }, + ]); + + await dbm.up('20241119105837-licensed-users-backfill.js'); + + const { rows } = await client.query( + "SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;", + ); + expect(rows.find((row) => row.date === '2024-10-01').count).toBe(1); // user1 created and deleted on the same day + expect(rows.find((row) => row.date === '2024-10-03').count).toBe(1); // user1 re-created + expect(rows.find((row) => row.date === '2024-10-05').count).toBe(2); // user1 within retention, user2 created + expect(rows.find((row) => row.date === '2024-10-07').count).toBe(2); // user1 within retention, user2 active + expect(rows.find((row) => row.date === '2024-11-07').count).toBe(1); // user2 within retention, user1 expired + expect(rows.find((row) => row.date === '2024-11-10').count).toBe(0); // Both users expired + }); +});