diff --git a/docs/immersion-tracking.md b/docs/immersion-tracking.md index 0573eab..69ae1c2 100644 --- a/docs/immersion-tracking.md +++ b/docs/immersion-tracking.md @@ -10,7 +10,7 @@ SubMiner stores immersion analytics in local SQLite (`immersion.sqlite`) by defa - Flush policy defaults to `25` writes or `500ms` max delay. - SQLite pragmas: `journal_mode=WAL`, `synchronous=NORMAL`, `foreign_keys=ON`, `busy_timeout=2500`. -## Schema (v1) +## Schema (v2) Schema versioning table: @@ -18,15 +18,15 @@ Schema versioning table: Core entities: -- `imm_videos`: video key/title/source metadata + optional media metadata fields -- `imm_sessions`: session UUID, video reference, timing/status fields -- `imm_session_telemetry`: high-frequency session aggregates over time -- `imm_session_events`: event stream with compact numeric event types +- `imm_videos`: video key/title/source metadata + optional media metadata fields, `CREATED_DATE`/`LAST_UPDATE_DATE` +- `imm_sessions`: session UUID, video reference, timing/status fields, `CREATED_DATE`/`LAST_UPDATE_DATE` +- `imm_session_telemetry`: high-frequency session aggregates over time, `CREATED_DATE`/`LAST_UPDATE_DATE` +- `imm_session_events`: event stream with compact numeric event types, `CREATED_DATE`/`LAST_UPDATE_DATE` Rollups: -- `imm_daily_rollups` -- `imm_monthly_rollups` +- `imm_daily_rollups`: includes `CREATED_DATE`/`LAST_UPDATE_DATE` +- `imm_monthly_rollups`: includes `CREATED_DATE`/`LAST_UPDATE_DATE` Primary index coverage: diff --git a/src/core/services/immersion-tracker-service.test.ts b/src/core/services/immersion-tracker-service.test.ts index 9e98128..2ba06d8 100644 --- a/src/core/services/immersion-tracker-service.test.ts +++ b/src/core/services/immersion-tracker-service.test.ts @@ -286,8 +286,8 @@ testIfSqlite('monthly rollups are grouped by calendar month', async () => { canonical_title, source_type, duration_ms, - created_at_ms, - updated_at_ms + CREATED_DATE, + LAST_UPDATE_DATE ) VALUES ( 1, 'local:/tmp/video.mkv', @@ -306,8 +306,8 @@ testIfSqlite('monthly rollups are grouped by calendar month', async () => { video_id, started_at_ms, status, - created_at_ms, - updated_at_ms, + CREATED_DATE, + LAST_UPDATE_DATE, ended_at_ms ) VALUES ( 1, @@ -363,8 +363,8 @@ testIfSqlite('monthly rollups are grouped by calendar month', async () => { video_id, started_at_ms, status, - created_at_ms, - updated_at_ms, + CREATED_DATE, + LAST_UPDATE_DATE, ended_at_ms ) VALUES ( 2, @@ -479,8 +479,8 @@ testIfSqlite('flushSingle reuses cached prepared statements', async () => { canonical_title, source_type, duration_ms, - created_at_ms, - updated_at_ms + CREATED_DATE, + LAST_UPDATE_DATE ) VALUES ( 1, 'local:/tmp/prepared.mkv', @@ -499,8 +499,8 @@ testIfSqlite('flushSingle reuses cached prepared statements', async () => { video_id, started_at_ms, status, - created_at_ms, - updated_at_ms, + CREATED_DATE, + LAST_UPDATE_DATE, ended_at_ms ) VALUES ( 1, diff --git a/src/core/services/immersion-tracker/maintenance.ts b/src/core/services/immersion-tracker/maintenance.ts index 406e7ca..17c449d 100644 --- a/src/core/services/immersion-tracker/maintenance.ts +++ b/src/core/services/immersion-tracker/maintenance.ts @@ -32,11 +32,13 @@ export function pruneRetention( } export function runRollupMaintenance(db: DatabaseSync): void { + const rollupNowMs = Date.now(); + db.exec(` - INSERT OR REPLACE INTO imm_daily_rollups ( + INSERT INTO imm_daily_rollups ( rollup_day, video_id, total_sessions, total_active_min, total_lines_seen, total_words_seen, total_tokens_seen, total_cards, cards_per_hour, - words_per_min, lookup_hit_rate + words_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE ) SELECT CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day, @@ -61,17 +63,31 @@ export function runRollupMaintenance(db: DatabaseSync): void { WHEN COALESCE(SUM(t.lookup_count), 0) > 0 THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL) ELSE NULL - END AS lookup_hit_rate + END AS lookup_hit_rate, + ${rollupNowMs} AS CREATED_DATE, + ${rollupNowMs} AS LAST_UPDATE_DATE FROM imm_sessions s JOIN imm_session_telemetry t ON t.session_id = s.session_id GROUP BY rollup_day, s.video_id + ON CONFLICT (rollup_day, video_id) DO UPDATE SET + total_sessions = excluded.total_sessions, + total_active_min = excluded.total_active_min, + total_lines_seen = excluded.total_lines_seen, + total_words_seen = excluded.total_words_seen, + total_tokens_seen = excluded.total_tokens_seen, + total_cards = excluded.total_cards, + cards_per_hour = excluded.cards_per_hour, + words_per_min = excluded.words_per_min, + lookup_hit_rate = excluded.lookup_hit_rate, + CREATED_DATE = COALESCE(imm_daily_rollups.CREATED_DATE, excluded.CREATED_DATE), + LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE `); db.exec(` - INSERT OR REPLACE INTO imm_monthly_rollups ( + INSERT INTO imm_monthly_rollups ( rollup_month, video_id, total_sessions, total_active_min, total_lines_seen, - total_words_seen, total_tokens_seen, total_cards + total_words_seen, total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE ) SELECT CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month, @@ -81,10 +97,21 @@ export function runRollupMaintenance(db: DatabaseSync): void { COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen, COALESCE(SUM(t.words_seen), 0) AS total_words_seen, COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen, - COALESCE(SUM(t.cards_mined), 0) AS total_cards + COALESCE(SUM(t.cards_mined), 0) AS total_cards, + ${rollupNowMs} AS CREATED_DATE, + ${rollupNowMs} AS LAST_UPDATE_DATE FROM imm_sessions s JOIN imm_session_telemetry t ON t.session_id = s.session_id GROUP BY rollup_month, s.video_id + ON CONFLICT (rollup_month, video_id) DO UPDATE SET + total_sessions = excluded.total_sessions, + total_active_min = excluded.total_active_min, + total_lines_seen = excluded.total_lines_seen, + total_words_seen = excluded.total_words_seen, + total_tokens_seen = excluded.total_tokens_seen, + total_cards = excluded.total_cards, + CREATED_DATE = COALESCE(imm_monthly_rollups.CREATED_DATE, excluded.CREATED_DATE), + LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE `); } diff --git a/src/core/services/immersion-tracker/session.ts b/src/core/services/immersion-tracker/session.ts index 2c39de1..4d717b1 100644 --- a/src/core/services/immersion-tracker/session.ts +++ b/src/core/services/immersion-tracker/session.ts @@ -10,15 +10,24 @@ export function startSessionRecord( startedAtMs = Date.now(), ): { sessionId: number; state: SessionState } { const sessionUuid = crypto.randomUUID(); + const nowMs = Date.now(); const result = db .prepare( ` INSERT INTO imm_sessions ( - session_uuid, video_id, started_at_ms, status, created_at_ms, updated_at_ms - ) VALUES (?, ?, ?, ?, ?, ?) - `, - ) - .run(sessionUuid, videoId, startedAtMs, SESSION_STATUS_ACTIVE, startedAtMs, startedAtMs); + session_uuid, video_id, started_at_ms, status, + CREATED_DATE, LAST_UPDATE_DATE + ) VALUES (?, ?, ?, ?, ?, ?) + `, + ) + .run( + sessionUuid, + videoId, + startedAtMs, + SESSION_STATUS_ACTIVE, + startedAtMs, + nowMs, + ); const sessionId = Number(result.lastInsertRowid); return { sessionId, @@ -32,6 +41,13 @@ export function finalizeSessionRecord( endedAtMs = Date.now(), ): void { db.prepare( - 'UPDATE imm_sessions SET ended_at_ms = ?, status = ?, updated_at_ms = ? WHERE session_id = ?', + ` + UPDATE imm_sessions + SET + ended_at_ms = ?, + status = ?, + LAST_UPDATE_DATE = ? + WHERE session_id = ? + `, ).run(endedAtMs, SESSION_STATUS_ENDED, Date.now(), sessionState.sessionId); } diff --git a/src/core/services/immersion-tracker/storage.ts b/src/core/services/immersion-tracker/storage.ts index bd40c69..1e42078 100644 --- a/src/core/services/immersion-tracker/storage.ts +++ b/src/core/services/immersion-tracker/storage.ts @@ -7,6 +7,25 @@ export interface TrackerPreparedStatements { eventInsertStmt: ReturnType; } +function hasColumn(db: DatabaseSync, tableName: string, columnName: string): boolean { + return db + .prepare(`PRAGMA table_info(${tableName})`) + .all() + .some((row) => (row as { name: string }).name === columnName); +} + +function addColumnIfMissing(db: DatabaseSync, tableName: string, columnName: string): void { + if (!hasColumn(db, tableName, columnName)) { + db.exec(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} INTEGER`); + } +} + +function dropColumnIfExists(db: DatabaseSync, tableName: string, columnName: string): void { + if (hasColumn(db, tableName, columnName)) { + db.exec(`ALTER TABLE ${tableName} DROP COLUMN ${columnName}`); + } +} + export function applyPragmas(db: DatabaseSync): void { db.exec('PRAGMA journal_mode = WAL'); db.exec('PRAGMA synchronous = NORMAL'); @@ -44,7 +63,8 @@ export function ensureSchema(db: DatabaseSync): void { bitrate_kbps INTEGER, audio_codec_id INTEGER, hash_sha256 TEXT, screenshot_path TEXT, metadata_json TEXT, - created_at_ms INTEGER NOT NULL, updated_at_ms INTEGER NOT NULL + CREATED_DATE INTEGER, + LAST_UPDATE_DATE INTEGER ); `); db.exec(` @@ -56,7 +76,8 @@ export function ensureSchema(db: DatabaseSync): void { status INTEGER NOT NULL, locale_id INTEGER, target_lang_id INTEGER, difficulty_tier INTEGER, subtitle_mode INTEGER, - created_at_ms INTEGER NOT NULL, updated_at_ms INTEGER NOT NULL, + CREATED_DATE INTEGER, + LAST_UPDATE_DATE INTEGER, FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ); `); @@ -78,6 +99,8 @@ export function ensureSchema(db: DatabaseSync): void { seek_forward_count INTEGER NOT NULL DEFAULT 0, seek_backward_count INTEGER NOT NULL DEFAULT 0, media_buffer_events INTEGER NOT NULL DEFAULT 0, + CREATED_DATE INTEGER, + LAST_UPDATE_DATE INTEGER, FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE ); `); @@ -93,6 +116,8 @@ export function ensureSchema(db: DatabaseSync): void { words_delta INTEGER NOT NULL DEFAULT 0, cards_delta INTEGER NOT NULL DEFAULT 0, payload_json TEXT, + CREATED_DATE INTEGER, + LAST_UPDATE_DATE INTEGER, FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE ); `); @@ -109,6 +134,8 @@ export function ensureSchema(db: DatabaseSync): void { cards_per_hour REAL, words_per_min REAL, lookup_hit_rate REAL, + CREATED_DATE INTEGER, + LAST_UPDATE_DATE INTEGER, PRIMARY KEY (rollup_day, video_id) ); `); @@ -122,6 +149,8 @@ export function ensureSchema(db: DatabaseSync): void { total_words_seen INTEGER NOT NULL DEFAULT 0, total_tokens_seen INTEGER NOT NULL DEFAULT 0, total_cards INTEGER NOT NULL DEFAULT 0, + CREATED_DATE INTEGER, + LAST_UPDATE_DATE INTEGER, PRIMARY KEY (rollup_month, video_id) ); `); @@ -155,6 +184,78 @@ export function ensureSchema(db: DatabaseSync): void { ON imm_monthly_rollups(rollup_month, video_id) `); + if (currentVersion?.schema_version === 1) { + addColumnIfMissing(db, 'imm_videos', 'CREATED_DATE'); + addColumnIfMissing(db, 'imm_videos', 'LAST_UPDATE_DATE'); + addColumnIfMissing(db, 'imm_sessions', 'CREATED_DATE'); + addColumnIfMissing(db, 'imm_sessions', 'LAST_UPDATE_DATE'); + addColumnIfMissing(db, 'imm_session_telemetry', 'CREATED_DATE'); + addColumnIfMissing(db, 'imm_session_telemetry', 'LAST_UPDATE_DATE'); + addColumnIfMissing(db, 'imm_session_events', 'CREATED_DATE'); + addColumnIfMissing(db, 'imm_session_events', 'LAST_UPDATE_DATE'); + addColumnIfMissing(db, 'imm_daily_rollups', 'CREATED_DATE'); + addColumnIfMissing(db, 'imm_daily_rollups', 'LAST_UPDATE_DATE'); + addColumnIfMissing(db, 'imm_monthly_rollups', 'CREATED_DATE'); + addColumnIfMissing(db, 'imm_monthly_rollups', 'LAST_UPDATE_DATE'); + + const nowMs = Date.now(); + db.prepare( + ` + UPDATE imm_videos + SET + CREATED_DATE = COALESCE(CREATED_DATE, created_at_ms), + LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, created_at_ms) + `, + ).run(); + db.prepare( + ` + UPDATE imm_sessions + SET + CREATED_DATE = COALESCE(CREATED_DATE, started_at_ms), + LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, created_at_ms) + `, + ).run(); + db.prepare( + ` + UPDATE imm_session_telemetry + SET + CREATED_DATE = COALESCE(CREATED_DATE, sample_ms), + LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, sample_ms) + `, + ).run(); + db.prepare( + ` + UPDATE imm_session_events + SET + CREATED_DATE = COALESCE(CREATED_DATE, ts_ms), + LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, ts_ms) + `, + ).run(); + db.prepare( + ` + UPDATE imm_daily_rollups + SET + CREATED_DATE = COALESCE(CREATED_DATE, ?), + LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, ?) + `, + ).run(nowMs, nowMs); + db.prepare( + ` + UPDATE imm_monthly_rollups + SET + CREATED_DATE = COALESCE(CREATED_DATE, ?), + LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, ?) + `, + ).run(nowMs, nowMs); + } + + if (currentVersion?.schema_version === 1 || currentVersion?.schema_version === 2) { + dropColumnIfExists(db, 'imm_videos', 'created_at_ms'); + dropColumnIfExists(db, 'imm_videos', 'updated_at_ms'); + dropColumnIfExists(db, 'imm_sessions', 'created_at_ms'); + dropColumnIfExists(db, 'imm_sessions', 'updated_at_ms'); + } + db.exec(` INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (${SCHEMA_VERSION}, ${Date.now()}) @@ -169,17 +270,17 @@ export function createTrackerPreparedStatements(db: DatabaseSync): TrackerPrepar session_id, sample_ms, total_watched_ms, active_watched_ms, lines_seen, words_seen, tokens_seen, cards_mined, lookup_count, lookup_hits, pause_count, pause_ms, seek_forward_count, - seek_backward_count, media_buffer_events + seek_backward_count, media_buffer_events, CREATED_DATE, LAST_UPDATE_DATE ) VALUES ( - ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? ) `), eventInsertStmt: db.prepare(` INSERT INTO imm_session_events ( session_id, ts_ms, event_type, line_index, segment_start_ms, segment_end_ms, - words_delta, cards_delta, payload_json + words_delta, cards_delta, payload_json, CREATED_DATE, LAST_UPDATE_DATE ) VALUES ( - ?, ?, ?, ?, ?, ?, ?, ?, ? + ?, ?, ?, ?, ?, ?, ?, ?, ?, ? ) `), }; @@ -203,6 +304,8 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta write.seekForwardCount!, write.seekBackwardCount!, write.mediaBufferEvents!, + Date.now(), + Date.now(), ); return; } @@ -217,6 +320,8 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta write.wordsDelta ?? 0, write.cardsDelta ?? 0, write.payloadJson ?? null, + Date.now(), + Date.now(), ); } @@ -235,8 +340,18 @@ export function getOrCreateVideoRecord( .get(videoKey) as { video_id: number } | null; if (existing?.video_id) { db.prepare( - 'UPDATE imm_videos SET canonical_title = ?, updated_at_ms = ? WHERE video_id = ?', - ).run(details.canonicalTitle || 'unknown', Date.now(), existing.video_id); + ` + UPDATE imm_videos + SET + canonical_title = ?, + LAST_UPDATE_DATE = ? + WHERE video_id = ? + `, + ).run( + details.canonicalTitle || 'unknown', + Date.now(), + existing.video_id, + ); return existing.video_id; } @@ -246,7 +361,7 @@ export function getOrCreateVideoRecord( video_key, canonical_title, source_type, source_path, source_url, duration_ms, file_size_bytes, codec_id, container_id, width_px, height_px, fps_x100, bitrate_kbps, audio_codec_id, hash_sha256, screenshot_path, - metadata_json, created_at_ms, updated_at_ms + metadata_json, CREATED_DATE, LAST_UPDATE_DATE ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `); const result = insert.run( @@ -294,7 +409,7 @@ export function updateVideoMetadataRecord( hash_sha256 = ?, screenshot_path = ?, metadata_json = ?, - updated_at_ms = ? + LAST_UPDATE_DATE = ? WHERE video_id = ? `, ).run( @@ -320,9 +435,13 @@ export function updateVideoTitleRecord( videoId: number, canonicalTitle: string, ): void { - db.prepare('UPDATE imm_videos SET canonical_title = ?, updated_at_ms = ? WHERE video_id = ?').run( - canonicalTitle, - Date.now(), - videoId, - ); + db.prepare( + ` + UPDATE imm_videos + SET + canonical_title = ?, + LAST_UPDATE_DATE = ? + WHERE video_id = ? + `, + ).run(canonicalTitle, Date.now(), videoId); } diff --git a/src/core/services/immersion-tracker/types.ts b/src/core/services/immersion-tracker/types.ts index 52a9afc..72e43af 100644 --- a/src/core/services/immersion-tracker/types.ts +++ b/src/core/services/immersion-tracker/types.ts @@ -1,4 +1,4 @@ -export const SCHEMA_VERSION = 1; +export const SCHEMA_VERSION = 2; export const DEFAULT_QUEUE_CAP = 1_000; export const DEFAULT_BATCH_SIZE = 25; export const DEFAULT_FLUSH_INTERVAL_MS = 500;