mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-07 03:22:17 -08:00
Overlay 2.0 (#12)
This commit is contained in:
@@ -1,5 +1,31 @@
|
||||
import type { DatabaseSync } from 'node:sqlite';
|
||||
|
||||
const ROLLUP_STATE_KEY = 'last_rollup_sample_ms';
|
||||
const DAILY_MS = 86_400_000;
|
||||
const ZERO_ID = 0;
|
||||
|
||||
interface RollupStateRow {
|
||||
state_value: number;
|
||||
}
|
||||
|
||||
interface RollupGroupRow {
|
||||
rollup_day: number;
|
||||
rollup_month: number;
|
||||
video_id: number;
|
||||
}
|
||||
|
||||
interface RollupTelemetryResult {
|
||||
maxSampleMs: number | null;
|
||||
}
|
||||
|
||||
interface RetentionResult {
|
||||
deletedSessionEvents: number;
|
||||
deletedTelemetryRows: number;
|
||||
deletedDailyRows: number;
|
||||
deletedMonthlyRows: number;
|
||||
deletedEndedSessions: number;
|
||||
}
|
||||
|
||||
export function toMonthKey(timestampMs: number): number {
|
||||
const monthDate = new Date(timestampMs);
|
||||
return monthDate.getUTCFullYear() * 100 + monthDate.getUTCMonth() + 1;
|
||||
@@ -14,29 +40,68 @@ export function pruneRetention(
|
||||
dailyRollupRetentionMs: number;
|
||||
monthlyRollupRetentionMs: number;
|
||||
},
|
||||
): void {
|
||||
): RetentionResult {
|
||||
const eventCutoff = nowMs - policy.eventsRetentionMs;
|
||||
const telemetryCutoff = nowMs - policy.telemetryRetentionMs;
|
||||
const dailyCutoff = nowMs - policy.dailyRollupRetentionMs;
|
||||
const monthlyCutoff = nowMs - policy.monthlyRollupRetentionMs;
|
||||
const dayCutoff = Math.floor(dailyCutoff / 86_400_000);
|
||||
const monthCutoff = toMonthKey(monthlyCutoff);
|
||||
const dayCutoff = nowMs - policy.dailyRollupRetentionMs;
|
||||
const monthCutoff = nowMs - policy.monthlyRollupRetentionMs;
|
||||
|
||||
db.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`).run(eventCutoff);
|
||||
db.prepare(`DELETE FROM imm_session_telemetry WHERE sample_ms < ?`).run(telemetryCutoff);
|
||||
db.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`).run(dayCutoff);
|
||||
db.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`).run(monthCutoff);
|
||||
db.prepare(`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`).run(
|
||||
telemetryCutoff,
|
||||
);
|
||||
const deletedSessionEvents = (db
|
||||
.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`)
|
||||
.run(eventCutoff) as { changes: number }).changes;
|
||||
const deletedTelemetryRows = (db
|
||||
.prepare(`DELETE FROM imm_session_telemetry WHERE sample_ms < ?`)
|
||||
.run(telemetryCutoff) as { changes: number }).changes;
|
||||
const deletedDailyRows = (db
|
||||
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
|
||||
.run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }).changes;
|
||||
const deletedMonthlyRows = (db
|
||||
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
|
||||
.run(toMonthKey(monthCutoff)) as { changes: number }).changes;
|
||||
const deletedEndedSessions = (db
|
||||
.prepare(
|
||||
`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`,
|
||||
)
|
||||
.run(telemetryCutoff) as { changes: number }).changes;
|
||||
|
||||
return {
|
||||
deletedSessionEvents,
|
||||
deletedTelemetryRows,
|
||||
deletedDailyRows,
|
||||
deletedMonthlyRows,
|
||||
deletedEndedSessions,
|
||||
};
|
||||
}
|
||||
|
||||
export function runRollupMaintenance(db: DatabaseSync): void {
|
||||
db.exec(`
|
||||
INSERT OR REPLACE INTO imm_daily_rollups (
|
||||
function getLastRollupSampleMs(db: DatabaseSync): number {
|
||||
const row = db
|
||||
.prepare(`SELECT state_value FROM imm_rollup_state WHERE state_key = ? LIMIT 1`)
|
||||
.get(ROLLUP_STATE_KEY) as unknown as RollupStateRow | null;
|
||||
return row ? Number(row.state_value) : ZERO_ID;
|
||||
}
|
||||
|
||||
function setLastRollupSampleMs(db: DatabaseSync, sampleMs: number): void {
|
||||
db.prepare(
|
||||
`INSERT INTO imm_rollup_state (state_key, state_value)
|
||||
VALUES (?, ?)
|
||||
ON CONFLICT(state_key) DO UPDATE SET state_value = excluded.state_value`,
|
||||
).run(ROLLUP_STATE_KEY, sampleMs);
|
||||
}
|
||||
|
||||
function upsertDailyRollupsForGroups(
|
||||
db: DatabaseSync,
|
||||
groups: Array<{ rollupDay: number; videoId: number }>,
|
||||
rollupNowMs: number,
|
||||
): void {
|
||||
if (groups.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const upsertStmt = db.prepare(`
|
||||
INSERT INTO imm_daily_rollups (
|
||||
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
|
||||
total_words_seen, total_tokens_seen, total_cards, cards_per_hour,
|
||||
words_per_min, lookup_hit_rate
|
||||
words_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
|
||||
)
|
||||
SELECT
|
||||
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
|
||||
@@ -61,17 +126,46 @@ export function runRollupMaintenance(db: DatabaseSync): void {
|
||||
WHEN COALESCE(SUM(t.lookup_count), 0) > 0
|
||||
THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL)
|
||||
ELSE NULL
|
||||
END AS lookup_hit_rate
|
||||
END AS lookup_hit_rate,
|
||||
? AS CREATED_DATE,
|
||||
? AS LAST_UPDATE_DATE
|
||||
FROM imm_sessions s
|
||||
JOIN imm_session_telemetry t
|
||||
ON t.session_id = s.session_id
|
||||
WHERE CAST(s.started_at_ms / 86400000 AS INTEGER) = ? AND s.video_id = ?
|
||||
GROUP BY rollup_day, s.video_id
|
||||
ON CONFLICT (rollup_day, video_id) DO UPDATE SET
|
||||
total_sessions = excluded.total_sessions,
|
||||
total_active_min = excluded.total_active_min,
|
||||
total_lines_seen = excluded.total_lines_seen,
|
||||
total_words_seen = excluded.total_words_seen,
|
||||
total_tokens_seen = excluded.total_tokens_seen,
|
||||
total_cards = excluded.total_cards,
|
||||
cards_per_hour = excluded.cards_per_hour,
|
||||
words_per_min = excluded.words_per_min,
|
||||
lookup_hit_rate = excluded.lookup_hit_rate,
|
||||
CREATED_DATE = COALESCE(imm_daily_rollups.CREATED_DATE, excluded.CREATED_DATE),
|
||||
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
|
||||
`);
|
||||
|
||||
db.exec(`
|
||||
INSERT OR REPLACE INTO imm_monthly_rollups (
|
||||
for (const { rollupDay, videoId } of groups) {
|
||||
upsertStmt.run(rollupNowMs, rollupNowMs, rollupDay, videoId);
|
||||
}
|
||||
}
|
||||
|
||||
function upsertMonthlyRollupsForGroups(
|
||||
db: DatabaseSync,
|
||||
groups: Array<{ rollupMonth: number; videoId: number }>,
|
||||
rollupNowMs: number,
|
||||
): void {
|
||||
if (groups.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const upsertStmt = db.prepare(`
|
||||
INSERT INTO imm_monthly_rollups (
|
||||
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
|
||||
total_words_seen, total_tokens_seen, total_cards
|
||||
total_words_seen, total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
|
||||
)
|
||||
SELECT
|
||||
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
|
||||
@@ -81,10 +175,112 @@ export function runRollupMaintenance(db: DatabaseSync): void {
|
||||
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
|
||||
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
|
||||
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
|
||||
COALESCE(SUM(t.cards_mined), 0) AS total_cards
|
||||
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
|
||||
? AS CREATED_DATE,
|
||||
? AS LAST_UPDATE_DATE
|
||||
FROM imm_sessions s
|
||||
JOIN imm_session_telemetry t
|
||||
ON t.session_id = s.session_id
|
||||
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) = ? AND s.video_id = ?
|
||||
GROUP BY rollup_month, s.video_id
|
||||
ON CONFLICT (rollup_month, video_id) DO UPDATE SET
|
||||
total_sessions = excluded.total_sessions,
|
||||
total_active_min = excluded.total_active_min,
|
||||
total_lines_seen = excluded.total_lines_seen,
|
||||
total_words_seen = excluded.total_words_seen,
|
||||
total_tokens_seen = excluded.total_tokens_seen,
|
||||
total_cards = excluded.total_cards,
|
||||
CREATED_DATE = COALESCE(imm_monthly_rollups.CREATED_DATE, excluded.CREATED_DATE),
|
||||
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
|
||||
`);
|
||||
|
||||
for (const { rollupMonth, videoId } of groups) {
|
||||
upsertStmt.run(rollupNowMs, rollupNowMs, rollupMonth, videoId);
|
||||
}
|
||||
}
|
||||
|
||||
function getAffectedRollupGroups(
|
||||
db: DatabaseSync,
|
||||
lastRollupSampleMs: number,
|
||||
): Array<{ rollupDay: number; rollupMonth: number; videoId: number }> {
|
||||
return (
|
||||
db
|
||||
.prepare(
|
||||
`
|
||||
SELECT DISTINCT
|
||||
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
|
||||
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
|
||||
s.video_id AS video_id
|
||||
FROM imm_session_telemetry t
|
||||
JOIN imm_sessions s
|
||||
ON s.session_id = t.session_id
|
||||
WHERE t.sample_ms > ?
|
||||
`,
|
||||
)
|
||||
.all(lastRollupSampleMs) as unknown as RollupGroupRow[]
|
||||
).map((row) => ({
|
||||
rollupDay: row.rollup_day,
|
||||
rollupMonth: row.rollup_month,
|
||||
videoId: row.video_id,
|
||||
}));
|
||||
}
|
||||
|
||||
function dedupeGroups<T extends { rollupDay?: number; rollupMonth?: number; videoId: number }>(
|
||||
groups: Array<T>,
|
||||
): Array<T> {
|
||||
const seen = new Set<string>();
|
||||
const result: Array<T> = [];
|
||||
for (const group of groups) {
|
||||
const key = `${group.rollupDay ?? group.rollupMonth}-${group.videoId}`;
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
result.push(group);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export function runRollupMaintenance(db: DatabaseSync, forceRebuild = false): void {
|
||||
const rollupNowMs = Date.now();
|
||||
const lastRollupSampleMs = forceRebuild ? ZERO_ID : getLastRollupSampleMs(db);
|
||||
|
||||
const maxSampleRow = db
|
||||
.prepare('SELECT MAX(sample_ms) AS maxSampleMs FROM imm_session_telemetry')
|
||||
.get() as unknown as RollupTelemetryResult | null;
|
||||
if (!maxSampleRow?.maxSampleMs) {
|
||||
if (forceRebuild) {
|
||||
setLastRollupSampleMs(db, ZERO_ID);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const affectedGroups = getAffectedRollupGroups(db, lastRollupSampleMs);
|
||||
if (!forceRebuild && affectedGroups.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const dailyGroups = dedupeGroups(
|
||||
affectedGroups.map((group) => ({
|
||||
rollupDay: group.rollupDay,
|
||||
videoId: group.videoId,
|
||||
})),
|
||||
);
|
||||
const monthlyGroups = dedupeGroups(
|
||||
affectedGroups.map((group) => ({
|
||||
rollupMonth: group.rollupMonth,
|
||||
videoId: group.videoId,
|
||||
})),
|
||||
);
|
||||
|
||||
db.exec('BEGIN IMMEDIATE');
|
||||
try {
|
||||
upsertDailyRollupsForGroups(db, dailyGroups, rollupNowMs);
|
||||
upsertMonthlyRollupsForGroups(db, monthlyGroups, rollupNowMs);
|
||||
setLastRollupSampleMs(db, Number(maxSampleRow.maxSampleMs));
|
||||
db.exec('COMMIT');
|
||||
} catch (error) {
|
||||
db.exec('ROLLBACK');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
22
src/core/services/immersion-tracker/reducer.test.ts
Normal file
22
src/core/services/immersion-tracker/reducer.test.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { extractLineVocabulary, isKanji } from './reducer';
|
||||
|
||||
test('isKanji follows canonical CJK ranges', () => {
|
||||
assert.ok(isKanji('日'));
|
||||
assert.ok(isKanji('𠀀'));
|
||||
assert.ok(!isKanji('あ'));
|
||||
assert.ok(!isKanji('a'));
|
||||
});
|
||||
|
||||
test('extractLineVocabulary returns words and unique kanji', () => {
|
||||
const result = extractLineVocabulary('hello 你好 猫');
|
||||
|
||||
assert.equal(result.words.length, 3);
|
||||
assert.deepEqual(
|
||||
new Set(result.words.map((entry) => `${entry.headword}/${entry.word}`)),
|
||||
new Set(['hello/hello', '你好/你好', '猫/猫']),
|
||||
);
|
||||
assert.equal(result.words.every((entry) => entry.reading === ''), true);
|
||||
assert.deepEqual(new Set(result.kanji), new Set(['你', '好', '猫']));
|
||||
});
|
||||
@@ -76,6 +76,53 @@ export function normalizeText(value: string | null | undefined): string {
|
||||
return value.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
export interface ExtractedLineVocabulary {
|
||||
words: Array<{ headword: string; word: string; reading: string }>;
|
||||
kanji: string[];
|
||||
}
|
||||
|
||||
export function isKanji(char: string): boolean {
|
||||
if (!char) return false;
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) return false;
|
||||
return (
|
||||
(code >= 0x4e00 && code <= 0x9fff) ||
|
||||
(code >= 0x3400 && code <= 0x4dbf) ||
|
||||
(code >= 0x20000 && code <= 0x2a6df)
|
||||
);
|
||||
}
|
||||
|
||||
export function extractLineVocabulary(value: string): ExtractedLineVocabulary {
|
||||
const cleaned = normalizeText(value);
|
||||
if (!cleaned) return { words: [], kanji: [] };
|
||||
|
||||
const wordSet = new Set<string>();
|
||||
const tokenPattern = /[A-Za-z0-9']+|[\u3040-\u30ff]+|[\u3400-\u4dbf\u4e00-\u9fff\u20000-\u2a6df]+/g;
|
||||
const rawWords = cleaned.match(tokenPattern) ?? [];
|
||||
for (const rawWord of rawWords) {
|
||||
const normalizedWord = normalizeText(rawWord.toLowerCase());
|
||||
if (!normalizedWord) continue;
|
||||
wordSet.add(normalizedWord);
|
||||
}
|
||||
|
||||
const kanji = new Set<string>();
|
||||
for (const char of cleaned) {
|
||||
if (isKanji(char)) {
|
||||
kanji.add(char);
|
||||
}
|
||||
}
|
||||
|
||||
const words = Array.from(wordSet).map((word) => ({
|
||||
headword: word,
|
||||
word,
|
||||
reading: '',
|
||||
}));
|
||||
return {
|
||||
words,
|
||||
kanji: Array.from(kanji),
|
||||
};
|
||||
}
|
||||
|
||||
export function buildVideoKey(mediaPath: string, sourceType: number): string {
|
||||
if (sourceType === SOURCE_TYPE_REMOTE) {
|
||||
return `remote:${mediaPath}`;
|
||||
|
||||
@@ -10,15 +10,24 @@ export function startSessionRecord(
|
||||
startedAtMs = Date.now(),
|
||||
): { sessionId: number; state: SessionState } {
|
||||
const sessionUuid = crypto.randomUUID();
|
||||
const nowMs = Date.now();
|
||||
const result = db
|
||||
.prepare(
|
||||
`
|
||||
INSERT INTO imm_sessions (
|
||||
session_uuid, video_id, started_at_ms, status, created_at_ms, updated_at_ms
|
||||
) VALUES (?, ?, ?, ?, ?, ?)
|
||||
`,
|
||||
)
|
||||
.run(sessionUuid, videoId, startedAtMs, SESSION_STATUS_ACTIVE, startedAtMs, startedAtMs);
|
||||
session_uuid, video_id, started_at_ms, status,
|
||||
CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (?, ?, ?, ?, ?, ?)
|
||||
`,
|
||||
)
|
||||
.run(
|
||||
sessionUuid,
|
||||
videoId,
|
||||
startedAtMs,
|
||||
SESSION_STATUS_ACTIVE,
|
||||
startedAtMs,
|
||||
nowMs,
|
||||
);
|
||||
const sessionId = Number(result.lastInsertRowid);
|
||||
return {
|
||||
sessionId,
|
||||
@@ -32,6 +41,13 @@ export function finalizeSessionRecord(
|
||||
endedAtMs = Date.now(),
|
||||
): void {
|
||||
db.prepare(
|
||||
'UPDATE imm_sessions SET ended_at_ms = ?, status = ?, updated_at_ms = ? WHERE session_id = ?',
|
||||
`
|
||||
UPDATE imm_sessions
|
||||
SET
|
||||
ended_at_ms = ?,
|
||||
status = ?,
|
||||
LAST_UPDATE_DATE = ?
|
||||
WHERE session_id = ?
|
||||
`,
|
||||
).run(endedAtMs, SESSION_STATUS_ENDED, Date.now(), sessionState.sessionId);
|
||||
}
|
||||
|
||||
@@ -54,6 +54,19 @@ testIfSqlite('ensureSchema creates immersion core tables', () => {
|
||||
assert.ok(tableNames.has('imm_session_events'));
|
||||
assert.ok(tableNames.has('imm_daily_rollups'));
|
||||
assert.ok(tableNames.has('imm_monthly_rollups'));
|
||||
assert.ok(tableNames.has('imm_words'));
|
||||
assert.ok(tableNames.has('imm_kanji'));
|
||||
assert.ok(tableNames.has('imm_rollup_state'));
|
||||
|
||||
const rollupStateRow = db
|
||||
.prepare(
|
||||
'SELECT state_value FROM imm_rollup_state WHERE state_key = ?',
|
||||
)
|
||||
.get('last_rollup_sample_ms') as {
|
||||
state_value: number;
|
||||
} | null;
|
||||
assert.ok(rollupStateRow);
|
||||
assert.equal(rollupStateRow?.state_value, 0);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
@@ -160,3 +173,47 @@ testIfSqlite('executeQueuedWrite inserts event and telemetry rows', () => {
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
testIfSqlite('executeQueuedWrite inserts and upserts word and kanji rows', () => {
|
||||
const dbPath = makeDbPath();
|
||||
const db = new DatabaseSync!(dbPath);
|
||||
|
||||
try {
|
||||
ensureSchema(db);
|
||||
const stmts = createTrackerPreparedStatements(db);
|
||||
|
||||
stmts.wordUpsertStmt.run('猫', '猫', '', 10.0, 10.0);
|
||||
stmts.wordUpsertStmt.run('猫', '猫', '', 5.0, 15.0);
|
||||
stmts.kanjiUpsertStmt.run('日', 9.0, 9.0);
|
||||
stmts.kanjiUpsertStmt.run('日', 8.0, 11.0);
|
||||
|
||||
const wordRow = db
|
||||
.prepare('SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?')
|
||||
.get('猫') as {
|
||||
headword: string;
|
||||
frequency: number;
|
||||
first_seen: number;
|
||||
last_seen: number;
|
||||
} | null;
|
||||
const kanjiRow = db
|
||||
.prepare('SELECT kanji, frequency, first_seen, last_seen FROM imm_kanji WHERE kanji = ?')
|
||||
.get('日') as {
|
||||
kanji: string;
|
||||
frequency: number;
|
||||
first_seen: number;
|
||||
last_seen: number;
|
||||
} | null;
|
||||
|
||||
assert.ok(wordRow);
|
||||
assert.ok(kanjiRow);
|
||||
assert.equal(wordRow?.frequency, 2);
|
||||
assert.equal(kanjiRow?.frequency, 2);
|
||||
assert.equal(wordRow?.first_seen, 5);
|
||||
assert.equal(wordRow?.last_seen, 15);
|
||||
assert.equal(kanjiRow?.first_seen, 8);
|
||||
assert.equal(kanjiRow?.last_seen, 11);
|
||||
} finally {
|
||||
db.close();
|
||||
cleanupDbPath(dbPath);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -5,6 +5,27 @@ import type { QueuedWrite, VideoMetadata } from './types';
|
||||
export interface TrackerPreparedStatements {
|
||||
telemetryInsertStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
eventInsertStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
wordUpsertStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
kanjiUpsertStmt: ReturnType<DatabaseSync['prepare']>;
|
||||
}
|
||||
|
||||
function hasColumn(db: DatabaseSync, tableName: string, columnName: string): boolean {
|
||||
return db
|
||||
.prepare(`PRAGMA table_info(${tableName})`)
|
||||
.all()
|
||||
.some((row) => (row as { name: string }).name === columnName);
|
||||
}
|
||||
|
||||
function addColumnIfMissing(db: DatabaseSync, tableName: string, columnName: string): void {
|
||||
if (!hasColumn(db, tableName, columnName)) {
|
||||
db.exec(`ALTER TABLE ${tableName} ADD COLUMN ${columnName} INTEGER`);
|
||||
}
|
||||
}
|
||||
|
||||
function dropColumnIfExists(db: DatabaseSync, tableName: string, columnName: string): void {
|
||||
if (hasColumn(db, tableName, columnName)) {
|
||||
db.exec(`ALTER TABLE ${tableName} DROP COLUMN ${columnName}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function applyPragmas(db: DatabaseSync): void {
|
||||
@@ -21,6 +42,17 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
applied_at_ms INTEGER NOT NULL
|
||||
);
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS imm_rollup_state(
|
||||
state_key TEXT PRIMARY KEY,
|
||||
state_value INTEGER NOT NULL
|
||||
);
|
||||
`);
|
||||
db.exec(`
|
||||
INSERT INTO imm_rollup_state(state_key, state_value)
|
||||
VALUES ('last_rollup_sample_ms', 0)
|
||||
ON CONFLICT(state_key) DO NOTHING
|
||||
`);
|
||||
|
||||
const currentVersion = db
|
||||
.prepare('SELECT schema_version FROM imm_schema_version ORDER BY schema_version DESC LIMIT 1')
|
||||
@@ -44,7 +76,8 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
bitrate_kbps INTEGER, audio_codec_id INTEGER,
|
||||
hash_sha256 TEXT, screenshot_path TEXT,
|
||||
metadata_json TEXT,
|
||||
created_at_ms INTEGER NOT NULL, updated_at_ms INTEGER NOT NULL
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER
|
||||
);
|
||||
`);
|
||||
db.exec(`
|
||||
@@ -56,7 +89,8 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
status INTEGER NOT NULL,
|
||||
locale_id INTEGER, target_lang_id INTEGER,
|
||||
difficulty_tier INTEGER, subtitle_mode INTEGER,
|
||||
created_at_ms INTEGER NOT NULL, updated_at_ms INTEGER NOT NULL,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
FOREIGN KEY(video_id) REFERENCES imm_videos(video_id)
|
||||
);
|
||||
`);
|
||||
@@ -78,6 +112,8 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
seek_forward_count INTEGER NOT NULL DEFAULT 0,
|
||||
seek_backward_count INTEGER NOT NULL DEFAULT 0,
|
||||
media_buffer_events INTEGER NOT NULL DEFAULT 0,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE
|
||||
);
|
||||
`);
|
||||
@@ -93,6 +129,8 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
words_delta INTEGER NOT NULL DEFAULT 0,
|
||||
cards_delta INTEGER NOT NULL DEFAULT 0,
|
||||
payload_json TEXT,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
FOREIGN KEY(session_id) REFERENCES imm_sessions(session_id) ON DELETE CASCADE
|
||||
);
|
||||
`);
|
||||
@@ -109,6 +147,8 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
cards_per_hour REAL,
|
||||
words_per_min REAL,
|
||||
lookup_hit_rate REAL,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
PRIMARY KEY (rollup_day, video_id)
|
||||
);
|
||||
`);
|
||||
@@ -122,9 +162,33 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
total_words_seen INTEGER NOT NULL DEFAULT 0,
|
||||
total_tokens_seen INTEGER NOT NULL DEFAULT 0,
|
||||
total_cards INTEGER NOT NULL DEFAULT 0,
|
||||
CREATED_DATE INTEGER,
|
||||
LAST_UPDATE_DATE INTEGER,
|
||||
PRIMARY KEY (rollup_month, video_id)
|
||||
);
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS imm_words(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
headword TEXT,
|
||||
word TEXT,
|
||||
reading TEXT,
|
||||
first_seen REAL,
|
||||
last_seen REAL,
|
||||
frequency INTEGER,
|
||||
UNIQUE(headword, word, reading)
|
||||
);
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS imm_kanji(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
kanji TEXT,
|
||||
first_seen REAL,
|
||||
last_seen REAL,
|
||||
frequency INTEGER,
|
||||
UNIQUE(kanji)
|
||||
);
|
||||
`);
|
||||
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_sessions_video_started
|
||||
@@ -154,6 +218,86 @@ export function ensureSchema(db: DatabaseSync): void {
|
||||
CREATE INDEX IF NOT EXISTS idx_rollups_month_video
|
||||
ON imm_monthly_rollups(rollup_month, video_id)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_words_headword_word_reading
|
||||
ON imm_words(headword, word, reading)
|
||||
`);
|
||||
db.exec(`
|
||||
CREATE INDEX IF NOT EXISTS idx_kanji_kanji
|
||||
ON imm_kanji(kanji)
|
||||
`);
|
||||
|
||||
if (currentVersion?.schema_version === 1) {
|
||||
addColumnIfMissing(db, 'imm_videos', 'CREATED_DATE');
|
||||
addColumnIfMissing(db, 'imm_videos', 'LAST_UPDATE_DATE');
|
||||
addColumnIfMissing(db, 'imm_sessions', 'CREATED_DATE');
|
||||
addColumnIfMissing(db, 'imm_sessions', 'LAST_UPDATE_DATE');
|
||||
addColumnIfMissing(db, 'imm_session_telemetry', 'CREATED_DATE');
|
||||
addColumnIfMissing(db, 'imm_session_telemetry', 'LAST_UPDATE_DATE');
|
||||
addColumnIfMissing(db, 'imm_session_events', 'CREATED_DATE');
|
||||
addColumnIfMissing(db, 'imm_session_events', 'LAST_UPDATE_DATE');
|
||||
addColumnIfMissing(db, 'imm_daily_rollups', 'CREATED_DATE');
|
||||
addColumnIfMissing(db, 'imm_daily_rollups', 'LAST_UPDATE_DATE');
|
||||
addColumnIfMissing(db, 'imm_monthly_rollups', 'CREATED_DATE');
|
||||
addColumnIfMissing(db, 'imm_monthly_rollups', 'LAST_UPDATE_DATE');
|
||||
|
||||
const nowMs = Date.now();
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_videos
|
||||
SET
|
||||
CREATED_DATE = COALESCE(CREATED_DATE, created_at_ms),
|
||||
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, created_at_ms)
|
||||
`,
|
||||
).run();
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_sessions
|
||||
SET
|
||||
CREATED_DATE = COALESCE(CREATED_DATE, started_at_ms),
|
||||
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, created_at_ms)
|
||||
`,
|
||||
).run();
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_session_telemetry
|
||||
SET
|
||||
CREATED_DATE = COALESCE(CREATED_DATE, sample_ms),
|
||||
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, sample_ms)
|
||||
`,
|
||||
).run();
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_session_events
|
||||
SET
|
||||
CREATED_DATE = COALESCE(CREATED_DATE, ts_ms),
|
||||
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, ts_ms)
|
||||
`,
|
||||
).run();
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_daily_rollups
|
||||
SET
|
||||
CREATED_DATE = COALESCE(CREATED_DATE, ?),
|
||||
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, ?)
|
||||
`,
|
||||
).run(nowMs, nowMs);
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_monthly_rollups
|
||||
SET
|
||||
CREATED_DATE = COALESCE(CREATED_DATE, ?),
|
||||
LAST_UPDATE_DATE = COALESCE(LAST_UPDATE_DATE, ?)
|
||||
`,
|
||||
).run(nowMs, nowMs);
|
||||
}
|
||||
|
||||
if (currentVersion?.schema_version === 1 || currentVersion?.schema_version === 2) {
|
||||
dropColumnIfExists(db, 'imm_videos', 'created_at_ms');
|
||||
dropColumnIfExists(db, 'imm_videos', 'updated_at_ms');
|
||||
dropColumnIfExists(db, 'imm_sessions', 'created_at_ms');
|
||||
dropColumnIfExists(db, 'imm_sessions', 'updated_at_ms');
|
||||
}
|
||||
|
||||
db.exec(`
|
||||
INSERT INTO imm_schema_version(schema_version, applied_at_ms)
|
||||
@@ -169,19 +313,41 @@ export function createTrackerPreparedStatements(db: DatabaseSync): TrackerPrepar
|
||||
session_id, sample_ms, total_watched_ms, active_watched_ms,
|
||||
lines_seen, words_seen, tokens_seen, cards_mined, lookup_count,
|
||||
lookup_hits, pause_count, pause_ms, seek_forward_count,
|
||||
seek_backward_count, media_buffer_events
|
||||
seek_backward_count, media_buffer_events, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
|
||||
)
|
||||
`),
|
||||
eventInsertStmt: db.prepare(`
|
||||
INSERT INTO imm_session_events (
|
||||
session_id, ts_ms, event_type, line_index, segment_start_ms, segment_end_ms,
|
||||
words_delta, cards_delta, payload_json
|
||||
words_delta, cards_delta, payload_json, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?
|
||||
?, ?, ?, ?, ?, ?, ?, ?, ?, ?
|
||||
)
|
||||
`),
|
||||
wordUpsertStmt: db.prepare(`
|
||||
INSERT INTO imm_words (
|
||||
headword, word, reading, first_seen, last_seen, frequency
|
||||
) VALUES (
|
||||
?, ?, ?, ?, ?, 1
|
||||
)
|
||||
ON CONFLICT(headword, word, reading) DO UPDATE SET
|
||||
frequency = COALESCE(frequency, 0) + 1,
|
||||
first_seen = MIN(COALESCE(first_seen, excluded.first_seen), excluded.first_seen),
|
||||
last_seen = MAX(COALESCE(last_seen, excluded.last_seen), excluded.last_seen)
|
||||
`),
|
||||
kanjiUpsertStmt: db.prepare(`
|
||||
INSERT INTO imm_kanji (
|
||||
kanji, first_seen, last_seen, frequency
|
||||
) VALUES (
|
||||
?, ?, ?, 1
|
||||
)
|
||||
ON CONFLICT(kanji) DO UPDATE SET
|
||||
frequency = COALESCE(frequency, 0) + 1,
|
||||
first_seen = MIN(COALESCE(first_seen, excluded.first_seen), excluded.first_seen),
|
||||
last_seen = MAX(COALESCE(last_seen, excluded.last_seen), excluded.last_seen)
|
||||
`),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -203,9 +369,25 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta
|
||||
write.seekForwardCount!,
|
||||
write.seekBackwardCount!,
|
||||
write.mediaBufferEvents!,
|
||||
Date.now(),
|
||||
Date.now(),
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (write.kind === 'word') {
|
||||
stmts.wordUpsertStmt.run(
|
||||
write.headword,
|
||||
write.word,
|
||||
write.reading,
|
||||
write.firstSeen,
|
||||
write.lastSeen,
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (write.kind === 'kanji') {
|
||||
stmts.kanjiUpsertStmt.run(write.kanji, write.firstSeen, write.lastSeen);
|
||||
return;
|
||||
}
|
||||
|
||||
stmts.eventInsertStmt.run(
|
||||
write.sessionId,
|
||||
@@ -217,6 +399,8 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta
|
||||
write.wordsDelta ?? 0,
|
||||
write.cardsDelta ?? 0,
|
||||
write.payloadJson ?? null,
|
||||
Date.now(),
|
||||
Date.now(),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -235,8 +419,18 @@ export function getOrCreateVideoRecord(
|
||||
.get(videoKey) as { video_id: number } | null;
|
||||
if (existing?.video_id) {
|
||||
db.prepare(
|
||||
'UPDATE imm_videos SET canonical_title = ?, updated_at_ms = ? WHERE video_id = ?',
|
||||
).run(details.canonicalTitle || 'unknown', Date.now(), existing.video_id);
|
||||
`
|
||||
UPDATE imm_videos
|
||||
SET
|
||||
canonical_title = ?,
|
||||
LAST_UPDATE_DATE = ?
|
||||
WHERE video_id = ?
|
||||
`,
|
||||
).run(
|
||||
details.canonicalTitle || 'unknown',
|
||||
Date.now(),
|
||||
existing.video_id,
|
||||
);
|
||||
return existing.video_id;
|
||||
}
|
||||
|
||||
@@ -246,7 +440,7 @@ export function getOrCreateVideoRecord(
|
||||
video_key, canonical_title, source_type, source_path, source_url,
|
||||
duration_ms, file_size_bytes, codec_id, container_id, width_px, height_px,
|
||||
fps_x100, bitrate_kbps, audio_codec_id, hash_sha256, screenshot_path,
|
||||
metadata_json, created_at_ms, updated_at_ms
|
||||
metadata_json, CREATED_DATE, LAST_UPDATE_DATE
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`);
|
||||
const result = insert.run(
|
||||
@@ -294,7 +488,7 @@ export function updateVideoMetadataRecord(
|
||||
hash_sha256 = ?,
|
||||
screenshot_path = ?,
|
||||
metadata_json = ?,
|
||||
updated_at_ms = ?
|
||||
LAST_UPDATE_DATE = ?
|
||||
WHERE video_id = ?
|
||||
`,
|
||||
).run(
|
||||
@@ -320,9 +514,13 @@ export function updateVideoTitleRecord(
|
||||
videoId: number,
|
||||
canonicalTitle: string,
|
||||
): void {
|
||||
db.prepare('UPDATE imm_videos SET canonical_title = ?, updated_at_ms = ? WHERE video_id = ?').run(
|
||||
canonicalTitle,
|
||||
Date.now(),
|
||||
videoId,
|
||||
);
|
||||
db.prepare(
|
||||
`
|
||||
UPDATE imm_videos
|
||||
SET
|
||||
canonical_title = ?,
|
||||
LAST_UPDATE_DATE = ?
|
||||
WHERE video_id = ?
|
||||
`,
|
||||
).run(canonicalTitle, Date.now(), videoId);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
export const SCHEMA_VERSION = 1;
|
||||
export const SCHEMA_VERSION = 3;
|
||||
export const DEFAULT_QUEUE_CAP = 1_000;
|
||||
export const DEFAULT_BATCH_SIZE = 25;
|
||||
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
|
||||
@@ -74,8 +74,8 @@ export interface SessionState extends TelemetryAccumulator {
|
||||
pendingTelemetry: boolean;
|
||||
}
|
||||
|
||||
export interface QueuedWrite {
|
||||
kind: 'telemetry' | 'event';
|
||||
interface QueuedTelemetryWrite {
|
||||
kind: 'telemetry';
|
||||
sessionId: number;
|
||||
sampleMs?: number;
|
||||
totalWatchedMs?: number;
|
||||
@@ -100,6 +100,37 @@ export interface QueuedWrite {
|
||||
payloadJson?: string | null;
|
||||
}
|
||||
|
||||
interface QueuedEventWrite {
|
||||
kind: 'event';
|
||||
sessionId: number;
|
||||
sampleMs?: number;
|
||||
eventType?: number;
|
||||
lineIndex?: number | null;
|
||||
segmentStartMs?: number | null;
|
||||
segmentEndMs?: number | null;
|
||||
wordsDelta?: number;
|
||||
cardsDelta?: number;
|
||||
payloadJson?: string | null;
|
||||
}
|
||||
|
||||
interface QueuedWordWrite {
|
||||
kind: 'word';
|
||||
headword: string;
|
||||
word: string;
|
||||
reading: string;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
interface QueuedKanjiWrite {
|
||||
kind: 'kanji';
|
||||
kanji: string;
|
||||
firstSeen: number;
|
||||
lastSeen: number;
|
||||
}
|
||||
|
||||
export type QueuedWrite = QueuedTelemetryWrite | QueuedEventWrite | QueuedWordWrite | QueuedKanjiWrite;
|
||||
|
||||
export interface VideoMetadata {
|
||||
sourceType: number;
|
||||
canonicalTitle: string;
|
||||
|
||||
Reference in New Issue
Block a user