feat: remove legacy immersion timestamps and add CREATED/LAST_UPDATE dates

This commit is contained in:
2026-02-28 23:54:43 -08:00
parent 30e3e858f6
commit cf9a444e08
6 changed files with 207 additions and 45 deletions

View File

@@ -10,7 +10,7 @@ SubMiner stores immersion analytics in local SQLite (`immersion.sqlite`) by defa
- Flush policy defaults to `25` writes or `500ms` max delay.
- SQLite pragmas: `journal_mode=WAL`, `synchronous=NORMAL`, `foreign_keys=ON`, `busy_timeout=2500`.
## Schema (v1)
## Schema (v2)
Schema versioning table:
@@ -18,15 +18,15 @@ Schema versioning table:
Core entities:
- `imm_videos`: video key/title/source metadata + optional media metadata fields
- `imm_sessions`: session UUID, video reference, timing/status fields
- `imm_session_telemetry`: high-frequency session aggregates over time
- `imm_session_events`: event stream with compact numeric event types
- `imm_videos`: video key/title/source metadata + optional media metadata fields, `CREATED_DATE`/`LAST_UPDATE_DATE`
- `imm_sessions`: session UUID, video reference, timing/status fields, `CREATED_DATE`/`LAST_UPDATE_DATE`
- `imm_session_telemetry`: high-frequency session aggregates over time, `CREATED_DATE`/`LAST_UPDATE_DATE`
- `imm_session_events`: event stream with compact numeric event types, `CREATED_DATE`/`LAST_UPDATE_DATE`
Rollups:
- `imm_daily_rollups`
- `imm_monthly_rollups`
- `imm_daily_rollups`: includes `CREATED_DATE`/`LAST_UPDATE_DATE`
- `imm_monthly_rollups`: includes `CREATED_DATE`/`LAST_UPDATE_DATE`
Primary index coverage:

View File

@@ -286,8 +286,8 @@ testIfSqlite('monthly rollups are grouped by calendar month', async () => {
canonical_title,
source_type,
duration_ms,
created_at_ms,
updated_at_ms
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (
1,
'local:/tmp/video.mkv',
@@ -306,8 +306,8 @@ testIfSqlite('monthly rollups are grouped by calendar month', async () => {
video_id,
started_at_ms,
status,
created_at_ms,
updated_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE,
ended_at_ms
) VALUES (
1,
@@ -363,8 +363,8 @@ testIfSqlite('monthly rollups are grouped by calendar month', async () => {
video_id,
started_at_ms,
status,
created_at_ms,
updated_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE,
ended_at_ms
) VALUES (
2,
@@ -479,8 +479,8 @@ testIfSqlite('flushSingle reuses cached prepared statements', async () => {
canonical_title,
source_type,
duration_ms,
created_at_ms,
updated_at_ms
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (
1,
'local:/tmp/prepared.mkv',
@@ -499,8 +499,8 @@ testIfSqlite('flushSingle reuses cached prepared statements', async () => {
video_id,
started_at_ms,
status,
created_at_ms,
updated_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE,
ended_at_ms
) VALUES (
1,

View File

@@ -32,11 +32,13 @@ export function pruneRetention(
}
export function runRollupMaintenance(db: DatabaseSync): void {
const rollupNowMs = Date.now();
db.exec(`
INSERT OR REPLACE INTO imm_daily_rollups (
INSERT INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards, cards_per_hour,
words_per_min, lookup_hit_rate
words_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
)
SELECT
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
@@ -61,17 +63,31 @@ export function runRollupMaintenance(db: DatabaseSync): void {
WHEN COALESCE(SUM(t.lookup_count), 0) > 0
THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL)
ELSE NULL
END AS lookup_hit_rate
END AS lookup_hit_rate,
${rollupNowMs} AS CREATED_DATE,
${rollupNowMs} AS LAST_UPDATE_DATE
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
GROUP BY rollup_day, s.video_id
ON CONFLICT (rollup_day, video_id) DO UPDATE SET
total_sessions = excluded.total_sessions,
total_active_min = excluded.total_active_min,
total_lines_seen = excluded.total_lines_seen,
total_words_seen = excluded.total_words_seen,
total_tokens_seen = excluded.total_tokens_seen,
total_cards = excluded.total_cards,
cards_per_hour = excluded.cards_per_hour,
words_per_min = excluded.words_per_min,
lookup_hit_rate = excluded.lookup_hit_rate,
CREATED_DATE = COALESCE(imm_daily_rollups.CREATED_DATE, excluded.CREATED_DATE),
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
`);
db.exec(`
INSERT OR REPLACE INTO imm_monthly_rollups (
INSERT INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards
total_words_seen, total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
)
SELECT
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
@@ -81,10 +97,21 @@ export function runRollupMaintenance(db: DatabaseSync): void {
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
${rollupNowMs} AS CREATED_DATE,
${rollupNowMs} AS LAST_UPDATE_DATE
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
GROUP BY rollup_month, s.video_id
ON CONFLICT (rollup_month, video_id) DO UPDATE SET
total_sessions = excluded.total_sessions,
total_active_min = excluded.total_active_min,
total_lines_seen = excluded.total_lines_seen,
total_words_seen = excluded.total_words_seen,
total_tokens_seen = excluded.total_tokens_seen,
total_cards = excluded.total_cards,
CREATED_DATE = COALESCE(imm_monthly_rollups.CREATED_DATE, excluded.CREATED_DATE),
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
`);
}

View File

@@ -10,15 +10,24 @@ export function startSessionRecord(
startedAtMs = Date.now(),
): { sessionId: number; state: SessionState } {
const sessionUuid = crypto.randomUUID();
const nowMs = Date.now();
const result = db
.prepare(
`
INSERT INTO imm_sessions (
session_uuid, video_id, started_at_ms, status, created_at_ms, updated_at_ms
) VALUES (?, ?, ?, ?, ?, ?)
`,
)
.run(sessionUuid, videoId, startedAtMs, SESSION_STATUS_ACTIVE, startedAtMs, startedAtMs);
session_uuid, video_id, started_at_ms, status,
CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?)
`,
)
.run(
sessionUuid,
videoId,
startedAtMs,
SESSION_STATUS_ACTIVE,
startedAtMs,
nowMs,
);
const sessionId = Number(result.lastInsertRowid);
return {
sessionId,
@@ -32,6 +41,13 @@ export function finalizeSessionRecord(
endedAtMs = Date.now(),
): void {
db.prepare(
'UPDATE imm_sessions SET ended_at_ms = ?, status = ?, updated_at_ms = ? WHERE session_id = ?',
`
UPDATE imm_sessions
SET
ended_at_ms = ?,
status = ?,
LAST_UPDATE_DATE = ?
WHERE session_id = ?
`,
).run(endedAtMs, SESSION_STATUS_ENDED, Date.now(), sessionState.sessionId);
}

View File

@@ -7,6 +7,25 @@ export interface TrackerPreparedStatements {
eventInsertStmt: ReturnType<DatabaseSync['prepare']>;
}
function hasColumn(db: DatabaseSync, tableName: string, columnName: string): boolean {
return db
.prepare(`PRAGMA table_info(${tableName})`)
.all()
.some((row) => (row as { name: string }).name === columnName);
}
function addColumnIfMissing(db: DatabaseSync, tableName: string, columnName: string): void {
if (!hasColumn(db, tableName, columnName)) {
db.exec(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} INTEGER`);
}
}
function dropColumnIfExists(db: DatabaseSync, tableName: string, columnName: string): void {
if (hasColumn(db, tableName, columnName)) {
db.exec(`ALTER TABLE ${tableName} DROP COLUMN ${columnName}`);
}
}
export function applyPragmas(db: DatabaseSync): void {
db.exec('PRAGMA journal_mode = WAL');
db.exec('PRAGMA synchronous = NORMAL');
@@ -44,7 +63,8 @@ export function ensureSchema(db: DatabaseSync): void {
bitrate_kbps INTEGER, audio_codec_id INTEGER,
hash_sha256 TEXT, screenshot_path TEXT,
metadata_json TEXT,
created_at_ms INTEGER NOT NULL, updated_at_ms INTEGER NOT NULL
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
`);
db.exec(`
@@ -56,7 +76,8 @@ export function ensureSchema(db: DatabaseSync): void {
status INTEGER NOT NULL,
locale_id INTEGER, target_lang_id INTEGER,
difficulty_tier INTEGER, subtitle_mode INTEGER,
created_at_ms INTEGER NOT NULL, updated_at_ms INTEGER NOT NULL,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER,
FOREIGN KEY(video_id) REFERENCES imm_videos(video_id)
);
`);
@@ -78,6 +99,8 @@ export function ensureSchema(db: DatabaseSync): void {
seek_forward_count INTEGER NOT NULL DEFAULT 0,
seek_backward_count INTEGER NOT NULL DEFAULT 0,
media_buffer_events INTEGER NOT NULL DEFAULT 0,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER,
FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE
);
`);
@@ -93,6 +116,8 @@ export function ensureSchema(db: DatabaseSync): void {
words_delta INTEGER NOT NULL DEFAULT 0,
cards_delta INTEGER NOT NULL DEFAULT 0,
payload_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER,
FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE
);
`);
@@ -109,6 +134,8 @@ export function ensureSchema(db: DatabaseSync): void {
cards_per_hour REAL,
words_per_min REAL,
lookup_hit_rate REAL,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER,
PRIMARY KEY (rollup_day, video_id)
);
`);
@@ -122,6 +149,8 @@ export function ensureSchema(db: DatabaseSync): void {
total_words_seen INTEGER NOT NULL DEFAULT 0,
total_tokens_seen INTEGER NOT NULL DEFAULT 0,
total_cards INTEGER NOT NULL DEFAULT 0,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER,
PRIMARY KEY (rollup_month, video_id)
);
`);
@@ -155,6 +184,78 @@ export function ensureSchema(db: DatabaseSync): void {
ON imm_monthly_rollups(rollup_month, video_id)
`);
if (currentVersion?.schema_version === 1) {
addColumnIfMissing(db, 'imm_videos', 'CREATED_DATE');
addColumnIfMissing(db, 'imm_videos', 'LAST_UPDATE_DATE');
addColumnIfMissing(db, 'imm_sessions', 'CREATED_DATE');
addColumnIfMissing(db, 'imm_sessions', 'LAST_UPDATE_DATE');
addColumnIfMissing(db, 'imm_session_telemetry', 'CREATED_DATE');
addColumnIfMissing(db, 'imm_session_telemetry', 'LAST_UPDATE_DATE');
addColumnIfMissing(db, 'imm_session_events', 'CREATED_DATE');
addColumnIfMissing(db, 'imm_session_events', 'LAST_UPDATE_DATE');
addColumnIfMissing(db, 'imm_daily_rollups', 'CREATED_DATE');
addColumnIfMissing(db, 'imm_daily_rollups', 'LAST_UPDATE_DATE');
addColumnIfMissing(db, 'imm_monthly_rollups', 'CREATED_DATE');
addColumnIfMissing(db, 'imm_monthly_rollups', 'LAST_UPDATE_DATE');
const nowMs = Date.now();
db.prepare(
`
UPDATE imm_videos
SET
CREATED_DATE = COALESCE(CREATED_DATE, created_at_ms),
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, created_at_ms)
`,
).run();
db.prepare(
`
UPDATE imm_sessions
SET
CREATED_DATE = COALESCE(CREATED_DATE, started_at_ms),
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, created_at_ms)
`,
).run();
db.prepare(
`
UPDATE imm_session_telemetry
SET
CREATED_DATE = COALESCE(CREATED_DATE, sample_ms),
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, sample_ms)
`,
).run();
db.prepare(
`
UPDATE imm_session_events
SET
CREATED_DATE = COALESCE(CREATED_DATE, ts_ms),
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, ts_ms)
`,
).run();
db.prepare(
`
UPDATE imm_daily_rollups
SET
CREATED_DATE = COALESCE(CREATED_DATE, ?),
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, ?)
`,
).run(nowMs, nowMs);
db.prepare(
`
UPDATE imm_monthly_rollups
SET
CREATED_DATE = COALESCE(CREATED_DATE, ?),
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, ?)
`,
).run(nowMs, nowMs);
}
if (currentVersion?.schema_version === 1 || currentVersion?.schema_version === 2) {
dropColumnIfExists(db, 'imm_videos', 'created_at_ms');
dropColumnIfExists(db, 'imm_videos', 'updated_at_ms');
dropColumnIfExists(db, 'imm_sessions', 'created_at_ms');
dropColumnIfExists(db, 'imm_sessions', 'updated_at_ms');
}
db.exec(`
INSERT INTO imm_schema_version(schema_version, applied_at_ms)
VALUES (${SCHEMA_VERSION}, ${Date.now()})
@@ -169,17 +270,17 @@ export function createTrackerPreparedStatements(db: DatabaseSync): TrackerPrepar
session_id, sample_ms, total_watched_ms, active_watched_ms,
lines_seen, words_seen, tokens_seen, cards_mined, lookup_count,
lookup_hits, pause_count, pause_ms, seek_forward_count,
seek_backward_count, media_buffer_events
seek_backward_count, media_buffer_events, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
)
`),
eventInsertStmt: db.prepare(`
INSERT INTO imm_session_events (
session_id, ts_ms, event_type, line_index, segment_start_ms, segment_end_ms,
words_delta, cards_delta, payload_json
words_delta, cards_delta, payload_json, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
?, ?, ?, ?, ?, ?, ?, ?, ?
?, ?, ?, ?, ?, ?, ?, ?, ?, ?
)
`),
};
@@ -203,6 +304,8 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta
write.seekForwardCount!,
write.seekBackwardCount!,
write.mediaBufferEvents!,
Date.now(),
Date.now(),
);
return;
}
@@ -217,6 +320,8 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta
write.wordsDelta ?? 0,
write.cardsDelta ?? 0,
write.payloadJson ?? null,
Date.now(),
Date.now(),
);
}
@@ -235,8 +340,18 @@ export function getOrCreateVideoRecord(
.get(videoKey) as { video_id: number } | null;
if (existing?.video_id) {
db.prepare(
'UPDATE imm_videos SET canonical_title = ?, updated_at_ms = ? WHERE video_id = ?',
).run(details.canonicalTitle || 'unknown', Date.now(), existing.video_id);
`
UPDATE imm_videos
SET
canonical_title = ?,
LAST_UPDATE_DATE = ?
WHERE video_id = ?
`,
).run(
details.canonicalTitle || 'unknown',
Date.now(),
existing.video_id,
);
return existing.video_id;
}
@@ -246,7 +361,7 @@ export function getOrCreateVideoRecord(
video_key, canonical_title, source_type, source_path, source_url,
duration_ms, file_size_bytes, codec_id, container_id, width_px, height_px,
fps_x100, bitrate_kbps, audio_codec_id, hash_sha256, screenshot_path,
metadata_json, created_at_ms, updated_at_ms
metadata_json, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const result = insert.run(
@@ -294,7 +409,7 @@ export function updateVideoMetadataRecord(
hash_sha256 = ?,
screenshot_path = ?,
metadata_json = ?,
updated_at_ms = ?
LAST_UPDATE_DATE = ?
WHERE video_id = ?
`,
).run(
@@ -320,9 +435,13 @@ export function updateVideoTitleRecord(
videoId: number,
canonicalTitle: string,
): void {
db.prepare('UPDATE imm_videos SET canonical_title = ?, updated_at_ms = ? WHERE video_id = ?').run(
canonicalTitle,
Date.now(),
videoId,
);
db.prepare(
`
UPDATE imm_videos
SET
canonical_title = ?,
LAST_UPDATE_DATE = ?
WHERE video_id = ?
`,
).run(canonicalTitle, Date.now(), videoId);
}

View File

@@ -1,4 +1,4 @@
export const SCHEMA_VERSION = 1;
export const SCHEMA_VERSION = 2;
export const DEFAULT_QUEUE_CAP = 1_000;
export const DEFAULT_BATCH_SIZE = 25;
export const DEFAULT_FLUSH_INTERVAL_MS = 500;