feat(stats): add v1 immersion stats dashboard (#19)

This commit is contained in:
2026-03-20 02:43:28 -07:00
committed by GitHub
parent 42abdd1268
commit 6749ff843c
555 changed files with 46356 additions and 2553 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,71 @@
import type { Token } from '../../../types';
import type { LegacyVocabularyPosResolution } from './types';
import { deriveStoredPartOfSpeech } from '../tokenizer/part-of-speech';
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
const KATAKANA_CODEPOINT_START = 0x30a1;
const KATAKANA_CODEPOINT_END = 0x30f6;
function normalizeLookupText(value: string | null | undefined): string {
return typeof value === 'string' ? value.trim() : '';
}
function katakanaToHiragana(text: string): string {
let normalized = '';
for (const char of text) {
const code = char.codePointAt(0);
if (code === undefined) {
continue;
}
if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
continue;
}
normalized += char;
}
return normalized;
}
function toResolution(token: Token): LegacyVocabularyPosResolution {
return {
headword: normalizeLookupText(token.headword) || normalizeLookupText(token.word),
reading: katakanaToHiragana(normalizeLookupText(token.katakanaReading)),
partOfSpeech: deriveStoredPartOfSpeech({
partOfSpeech: token.partOfSpeech,
pos1: token.pos1,
}),
pos1: normalizeLookupText(token.pos1),
pos2: normalizeLookupText(token.pos2),
pos3: normalizeLookupText(token.pos3),
};
}
export function resolveLegacyVocabularyPosFromTokens(
lookupText: string,
tokens: Token[] | null,
): LegacyVocabularyPosResolution | null {
const normalizedLookup = normalizeLookupText(lookupText);
if (!normalizedLookup || !tokens || tokens.length === 0) {
return null;
}
const exactSurfaceMatches = tokens.filter(
(token) => normalizeLookupText(token.word) === normalizedLookup,
);
if (exactSurfaceMatches.length === 1) {
return toResolution(exactSurfaceMatches[0]!);
}
const exactHeadwordMatches = tokens.filter(
(token) => normalizeLookupText(token.headword) === normalizedLookup,
);
if (exactHeadwordMatches.length === 1) {
return toResolution(exactHeadwordMatches[0]!);
}
if (tokens.length === 1) {
return toResolution(tokens[0]!);
}
return null;
}

View File

@@ -0,0 +1,569 @@
import type { DatabaseSync } from './sqlite';
import { finalizeSessionRecord } from './session';
import type { LifetimeRebuildSummary, SessionState } from './types';
interface TelemetryRow {
active_watched_ms: number | null;
cards_mined: number | null;
lines_seen: number | null;
tokens_seen: number | null;
}
interface VideoRow {
anime_id: number | null;
watched: number;
}
interface AnimeRow {
episodes_total: number | null;
}
function asPositiveNumber(value: number | null, fallback: number): number {
if (value === null || !Number.isFinite(value)) {
return fallback;
}
return Math.max(0, Math.floor(value));
}
interface ExistenceRow {
count: number;
}
interface LifetimeMediaStateRow {
completed: number;
}
interface LifetimeAnimeStateRow {
episodes_completed: number;
}
interface RetainedSessionRow {
sessionId: number;
videoId: number;
startedAtMs: number;
endedAtMs: number;
lastMediaMs: number | null;
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
yomitanLookupCount: number;
pauseCount: number;
pauseMs: number;
seekForwardCount: number;
seekBackwardCount: number;
mediaBufferEvents: number;
}
function hasRetainedPriorSession(
db: DatabaseSync,
videoId: number,
startedAtMs: number,
currentSessionId: number,
): boolean {
return (
Number(
(
db
.prepare(
`
SELECT COUNT(*) AS count
FROM imm_sessions
WHERE video_id = ?
AND (
started_at_ms < ?
OR (started_at_ms = ? AND session_id < ?)
)
`,
)
.get(videoId, startedAtMs, startedAtMs, currentSessionId) as ExistenceRow | null
)?.count ?? 0,
) > 0
);
}
function isFirstSessionForLocalDay(
db: DatabaseSync,
currentSessionId: number,
startedAtMs: number,
): boolean {
return (
(
db
.prepare(
`
SELECT COUNT(*) AS count
FROM imm_sessions
WHERE CAST(strftime('%s', started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) / 86400
= CAST(strftime('%s', ? / 1000, 'unixepoch', 'localtime') AS INTEGER) / 86400
AND (
started_at_ms < ?
OR (started_at_ms = ? AND session_id < ?)
)
`,
)
.get(startedAtMs, startedAtMs, startedAtMs, currentSessionId) as ExistenceRow | null
)?.count === 0
);
}
function resetLifetimeSummaries(db: DatabaseSync, nowMs: number): void {
db.exec(`
DELETE FROM imm_lifetime_anime;
DELETE FROM imm_lifetime_media;
DELETE FROM imm_lifetime_applied_sessions;
`);
db.prepare(
`
UPDATE imm_lifetime_global
SET
total_sessions = 0,
total_active_ms = 0,
total_cards = 0,
active_days = 0,
episodes_started = 0,
episodes_completed = 0,
anime_completed = 0,
last_rebuilt_ms = ?,
LAST_UPDATE_DATE = ?
WHERE global_id = 1
`,
).run(nowMs, nowMs);
}
function toRebuildSessionState(row: RetainedSessionRow): SessionState {
return {
sessionId: row.sessionId,
videoId: row.videoId,
startedAtMs: row.startedAtMs,
currentLineIndex: 0,
lastWallClockMs: row.endedAtMs,
lastMediaMs: row.lastMediaMs,
lastPauseStartMs: null,
isPaused: false,
pendingTelemetry: false,
markedWatched: false,
totalWatchedMs: Math.max(0, row.totalWatchedMs),
activeWatchedMs: Math.max(0, row.activeWatchedMs),
linesSeen: Math.max(0, row.linesSeen),
tokensSeen: Math.max(0, row.tokensSeen),
cardsMined: Math.max(0, row.cardsMined),
lookupCount: Math.max(0, row.lookupCount),
lookupHits: Math.max(0, row.lookupHits),
yomitanLookupCount: Math.max(0, row.yomitanLookupCount),
pauseCount: Math.max(0, row.pauseCount),
pauseMs: Math.max(0, row.pauseMs),
seekForwardCount: Math.max(0, row.seekForwardCount),
seekBackwardCount: Math.max(0, row.seekBackwardCount),
mediaBufferEvents: Math.max(0, row.mediaBufferEvents),
};
}
function getRetainedStaleActiveSessions(db: DatabaseSync): RetainedSessionRow[] {
return db
.prepare(
`
SELECT
s.session_id AS sessionId,
s.video_id AS videoId,
s.started_at_ms AS startedAtMs,
COALESCE(t.sample_ms, s.LAST_UPDATE_DATE, s.started_at_ms) AS endedAtMs,
s.ended_media_ms AS lastMediaMs,
COALESCE(t.total_watched_ms, s.total_watched_ms, 0) AS totalWatchedMs,
COALESCE(t.active_watched_ms, s.active_watched_ms, 0) AS activeWatchedMs,
COALESCE(t.lines_seen, s.lines_seen, 0) AS linesSeen,
COALESCE(t.tokens_seen, s.tokens_seen, 0) AS tokensSeen,
COALESCE(t.cards_mined, s.cards_mined, 0) AS cardsMined,
COALESCE(t.lookup_count, s.lookup_count, 0) AS lookupCount,
COALESCE(t.lookup_hits, s.lookup_hits, 0) AS lookupHits,
COALESCE(t.yomitan_lookup_count, s.yomitan_lookup_count, 0) AS yomitanLookupCount,
COALESCE(t.pause_count, s.pause_count, 0) AS pauseCount,
COALESCE(t.pause_ms, s.pause_ms, 0) AS pauseMs,
COALESCE(t.seek_forward_count, s.seek_forward_count, 0) AS seekForwardCount,
COALESCE(t.seek_backward_count, s.seek_backward_count, 0) AS seekBackwardCount,
COALESCE(t.media_buffer_events, s.media_buffer_events, 0) AS mediaBufferEvents
FROM imm_sessions s
LEFT JOIN imm_session_telemetry t
ON t.telemetry_id = (
SELECT telemetry_id
FROM imm_session_telemetry
WHERE session_id = s.session_id
ORDER BY sample_ms DESC, telemetry_id DESC
LIMIT 1
)
WHERE s.ended_at_ms IS NULL
ORDER BY s.started_at_ms ASC, s.session_id ASC
`,
)
.all() as RetainedSessionRow[];
}
function upsertLifetimeMedia(
db: DatabaseSync,
videoId: number,
nowMs: number,
activeMs: number,
cardsMined: number,
linesSeen: number,
tokensSeen: number,
completed: number,
startedAtMs: number,
endedAtMs: number,
): void {
db.prepare(
`
INSERT INTO imm_lifetime_media(
video_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
VALUES (?, 1, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(video_id) DO UPDATE SET
total_sessions = total_sessions + 1,
total_active_ms = total_active_ms + excluded.total_active_ms,
total_cards = total_cards + excluded.total_cards,
total_lines_seen = total_lines_seen + excluded.total_lines_seen,
total_tokens_seen = total_tokens_seen + excluded.total_tokens_seen,
completed = MAX(completed, excluded.completed),
first_watched_ms = CASE
WHEN excluded.first_watched_ms IS NULL THEN first_watched_ms
WHEN first_watched_ms IS NULL THEN excluded.first_watched_ms
WHEN excluded.first_watched_ms < first_watched_ms THEN excluded.first_watched_ms
ELSE first_watched_ms
END,
last_watched_ms = CASE
WHEN excluded.last_watched_ms IS NULL THEN last_watched_ms
WHEN last_watched_ms IS NULL THEN excluded.last_watched_ms
WHEN excluded.last_watched_ms > last_watched_ms THEN excluded.last_watched_ms
ELSE last_watched_ms
END,
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
`,
).run(
videoId,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
completed,
startedAtMs,
endedAtMs,
nowMs,
nowMs,
);
}
function upsertLifetimeAnime(
db: DatabaseSync,
animeId: number,
nowMs: number,
activeMs: number,
cardsMined: number,
linesSeen: number,
tokensSeen: number,
episodesStartedDelta: number,
episodesCompletedDelta: number,
startedAtMs: number,
endedAtMs: number,
): void {
db.prepare(
`
INSERT INTO imm_lifetime_anime(
anime_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
episodes_started,
episodes_completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
)
VALUES (?, 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(anime_id) DO UPDATE SET
total_sessions = total_sessions + 1,
total_active_ms = total_active_ms + excluded.total_active_ms,
total_cards = total_cards + excluded.total_cards,
total_lines_seen = total_lines_seen + excluded.total_lines_seen,
total_tokens_seen = total_tokens_seen + excluded.total_tokens_seen,
episodes_started = episodes_started + excluded.episodes_started,
episodes_completed = episodes_completed + excluded.episodes_completed,
first_watched_ms = CASE
WHEN excluded.first_watched_ms IS NULL THEN first_watched_ms
WHEN first_watched_ms IS NULL THEN excluded.first_watched_ms
WHEN excluded.first_watched_ms < first_watched_ms THEN excluded.first_watched_ms
ELSE first_watched_ms
END,
last_watched_ms = CASE
WHEN excluded.last_watched_ms IS NULL THEN last_watched_ms
WHEN last_watched_ms IS NULL THEN excluded.last_watched_ms
WHEN excluded.last_watched_ms > last_watched_ms THEN excluded.last_watched_ms
ELSE last_watched_ms
END,
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
`,
).run(
animeId,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
episodesStartedDelta,
episodesCompletedDelta,
startedAtMs,
endedAtMs,
nowMs,
nowMs,
);
}
export function applySessionLifetimeSummary(
db: DatabaseSync,
session: SessionState,
endedAtMs: number,
): void {
const applyResult = db
.prepare(
`
INSERT INTO imm_lifetime_applied_sessions (
session_id,
applied_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (
?, ?, ?, ?
)
ON CONFLICT(session_id) DO NOTHING
`,
)
.run(session.sessionId, endedAtMs, Date.now(), Date.now());
if ((applyResult.changes ?? 0) <= 0) {
return;
}
const telemetry = db
.prepare(
`
SELECT
active_watched_ms,
cards_mined,
lines_seen,
tokens_seen
FROM imm_session_telemetry
WHERE session_id = ?
ORDER BY sample_ms DESC, telemetry_id DESC
LIMIT 1
`,
)
.get(session.sessionId) as TelemetryRow | null;
const video = db
.prepare('SELECT anime_id, watched FROM imm_videos WHERE video_id = ?')
.get(session.videoId) as VideoRow | null;
const mediaLifetime =
(db
.prepare('SELECT completed FROM imm_lifetime_media WHERE video_id = ?')
.get(session.videoId) as LifetimeMediaStateRow | null | undefined) ?? null;
const animeLifetime = video?.anime_id
? ((db
.prepare('SELECT episodes_completed FROM imm_lifetime_anime WHERE anime_id = ?')
.get(video.anime_id) as LifetimeAnimeStateRow | null | undefined) ?? null)
: null;
const anime = video?.anime_id
? ((db
.prepare('SELECT episodes_total FROM imm_anime WHERE anime_id = ?')
.get(video.anime_id) as AnimeRow | null | undefined) ?? null)
: null;
const activeMs = telemetry
? asPositiveNumber(telemetry.active_watched_ms, session.activeWatchedMs)
: session.activeWatchedMs;
const cardsMined = telemetry
? asPositiveNumber(telemetry.cards_mined, session.cardsMined)
: session.cardsMined;
const linesSeen = telemetry
? asPositiveNumber(telemetry.lines_seen, session.linesSeen)
: session.linesSeen;
const tokensSeen = telemetry
? asPositiveNumber(telemetry.tokens_seen, session.tokensSeen)
: session.tokensSeen;
const watched = video?.watched ?? 0;
const isFirstSessionForVideoRun =
mediaLifetime === null &&
!hasRetainedPriorSession(db, session.videoId, session.startedAtMs, session.sessionId);
const isFirstCompletedSessionForVideoRun =
watched > 0 && Number(mediaLifetime?.completed ?? 0) <= 0;
const isFirstSessionForDay = isFirstSessionForLocalDay(
db,
session.sessionId,
session.startedAtMs,
);
const episodesCompletedBefore = Number(animeLifetime?.episodes_completed ?? 0);
const animeEpisodesTotal = anime?.episodes_total ?? null;
const animeCompletedDelta =
watched > 0 &&
isFirstCompletedSessionForVideoRun &&
animeEpisodesTotal !== null &&
animeEpisodesTotal > 0 &&
episodesCompletedBefore < animeEpisodesTotal &&
episodesCompletedBefore + 1 >= animeEpisodesTotal
? 1
: 0;
const nowMs = Date.now();
db.prepare(
`
UPDATE imm_lifetime_global
SET
total_sessions = total_sessions + 1,
total_active_ms = total_active_ms + ?,
total_cards = total_cards + ?,
active_days = active_days + ?,
episodes_started = episodes_started + ?,
episodes_completed = episodes_completed + ?,
anime_completed = anime_completed + ?,
LAST_UPDATE_DATE = ?
WHERE global_id = 1
`,
).run(
activeMs,
cardsMined,
isFirstSessionForDay ? 1 : 0,
isFirstSessionForVideoRun ? 1 : 0,
isFirstCompletedSessionForVideoRun ? 1 : 0,
animeCompletedDelta,
nowMs,
);
upsertLifetimeMedia(
db,
session.videoId,
nowMs,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
watched > 0 ? 1 : 0,
session.startedAtMs,
endedAtMs,
);
if (video?.anime_id) {
upsertLifetimeAnime(
db,
video.anime_id,
nowMs,
activeMs,
cardsMined,
linesSeen,
tokensSeen,
isFirstSessionForVideoRun ? 1 : 0,
isFirstCompletedSessionForVideoRun ? 1 : 0,
session.startedAtMs,
endedAtMs,
);
}
}
export function rebuildLifetimeSummaries(db: DatabaseSync): LifetimeRebuildSummary {
const rebuiltAtMs = Date.now();
const sessions = db
.prepare(
`
SELECT
session_id AS sessionId,
video_id AS videoId,
started_at_ms AS startedAtMs,
ended_at_ms AS endedAtMs,
total_watched_ms AS totalWatchedMs,
active_watched_ms AS activeWatchedMs,
lines_seen AS linesSeen,
tokens_seen AS tokensSeen,
cards_mined AS cardsMined,
lookup_count AS lookupCount,
lookup_hits AS lookupHits,
yomitan_lookup_count AS yomitanLookupCount,
pause_count AS pauseCount,
pause_ms AS pauseMs,
seek_forward_count AS seekForwardCount,
seek_backward_count AS seekBackwardCount,
media_buffer_events AS mediaBufferEvents
FROM imm_sessions
WHERE ended_at_ms IS NOT NULL
ORDER BY started_at_ms ASC, session_id ASC
`,
)
.all() as RetainedSessionRow[];
db.exec('BEGIN');
try {
resetLifetimeSummaries(db, rebuiltAtMs);
for (const session of sessions) {
applySessionLifetimeSummary(db, toRebuildSessionState(session), session.endedAtMs);
}
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
return {
appliedSessions: sessions.length,
rebuiltAtMs,
};
}
export function reconcileStaleActiveSessions(db: DatabaseSync): number {
const sessions = getRetainedStaleActiveSessions(db);
if (sessions.length === 0) {
return 0;
}
db.exec('BEGIN');
try {
for (const session of sessions) {
const state = toRebuildSessionState(session);
finalizeSessionRecord(db, state, session.endedAtMs);
applySessionLifetimeSummary(db, state, session.endedAtMs);
}
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
return sessions.length;
}
export function shouldBackfillLifetimeSummaries(db: DatabaseSync): boolean {
const globalRow = db
.prepare('SELECT total_sessions AS totalSessions FROM imm_lifetime_global WHERE global_id = 1')
.get() as { totalSessions: number } | null;
const appliedRow = db
.prepare('SELECT COUNT(*) AS count FROM imm_lifetime_applied_sessions')
.get() as ExistenceRow | null;
const endedRow = db
.prepare('SELECT COUNT(*) AS count FROM imm_sessions WHERE ended_at_ms IS NOT NULL')
.get() as ExistenceRow | null;
const totalSessions = Number(globalRow?.totalSessions ?? 0);
const appliedSessions = Number(appliedRow?.count ?? 0);
const retainedEndedSessions = Number(endedRow?.count ?? 0);
return retainedEndedSessions > 0 && (appliedSessions === 0 || totalSessions === 0);
}

View File

@@ -0,0 +1,200 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { Database } from './sqlite';
import {
pruneRawRetention,
pruneRollupRetention,
runOptimizeMaintenance,
toMonthKey,
} from './maintenance';
import { ensureSchema } from './storage';
function makeDbPath(): string {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-maintenance-test-'));
return path.join(dir, 'tracker.db');
}
function cleanupDbPath(dbPath: string): void {
try {
fs.rmSync(path.dirname(dbPath), { recursive: true, force: true });
} catch {
// best effort
}
}
test('pruneRawRetention uses session retention separately from telemetry retention', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const nowMs = 90 * 86_400_000;
const staleEndedAtMs = nowMs - 40 * 86_400_000;
const keptEndedAtMs = nowMs - 5 * 86_400_000;
db.exec(`
INSERT INTO imm_videos (
video_id, video_key, canonical_title, source_type, duration_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, 'local:/tmp/video.mkv', 'Video', 1, 0, ${nowMs}, ${nowMs}
);
INSERT INTO imm_sessions (
session_id, session_uuid, video_id, started_at_ms, ended_at_ms, status, CREATED_DATE, LAST_UPDATE_DATE
) VALUES
(1, 'session-1', 1, ${staleEndedAtMs - 1_000}, ${staleEndedAtMs}, 2, ${staleEndedAtMs}, ${staleEndedAtMs}),
(2, 'session-2', 1, ${keptEndedAtMs - 1_000}, ${keptEndedAtMs}, 2, ${keptEndedAtMs}, ${keptEndedAtMs});
INSERT INTO imm_session_telemetry (
session_id, sample_ms, total_watched_ms, active_watched_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES
(1, ${nowMs - 2 * 86_400_000}, 0, 0, ${nowMs}, ${nowMs}),
(2, ${nowMs - 12 * 60 * 60 * 1000}, 0, 0, ${nowMs}, ${nowMs});
`);
const result = pruneRawRetention(db, nowMs, {
eventsRetentionMs: 7 * 86_400_000,
telemetryRetentionMs: 1 * 86_400_000,
sessionsRetentionMs: 30 * 86_400_000,
});
const remainingSessions = db
.prepare('SELECT session_id FROM imm_sessions ORDER BY session_id')
.all() as Array<{ session_id: number }>;
const remainingTelemetry = db
.prepare('SELECT session_id FROM imm_session_telemetry ORDER BY session_id')
.all() as Array<{ session_id: number }>;
assert.equal(result.deletedTelemetryRows, 1);
assert.equal(result.deletedEndedSessions, 1);
assert.deepEqual(
remainingSessions.map((row) => row.session_id),
[2],
);
assert.deepEqual(
remainingTelemetry.map((row) => row.session_id),
[2],
);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('raw retention keeps rollups and rollup retention prunes them separately', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const nowMs = Date.UTC(2026, 2, 16, 12, 0, 0, 0);
const oldDay = Math.floor((nowMs - 90 * 86_400_000) / 86_400_000);
const oldMonth = toMonthKey(nowMs - 400 * 86_400_000);
db.exec(`
INSERT INTO imm_videos (
video_id, video_key, canonical_title, source_type, duration_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, 'local:/tmp/video.mkv', 'Video', 1, 0, ${nowMs}, ${nowMs}
);
INSERT INTO imm_sessions (
session_id, session_uuid, video_id, started_at_ms, ended_at_ms, status, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, 'session-1', 1, ${nowMs - 90 * 86_400_000}, ${nowMs - 90 * 86_400_000 + 1_000}, 2, ${nowMs}, ${nowMs}
);
INSERT INTO imm_session_telemetry (
session_id, sample_ms, total_watched_ms, active_watched_ms, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
1, ${nowMs - 90 * 86_400_000}, 0, 0, ${nowMs}, ${nowMs}
);
INSERT INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards
) VALUES (
${oldDay}, 1, 1, 10, 1, 1, 1
);
INSERT INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (
${oldMonth}, 1, 1, 10, 1, 1, 1, ${nowMs}, ${nowMs}
);
`);
pruneRawRetention(db, nowMs, {
eventsRetentionMs: 7 * 86_400_000,
telemetryRetentionMs: 30 * 86_400_000,
sessionsRetentionMs: 30 * 86_400_000,
});
const rollupsAfterRawPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_daily_rollups')
.get() as { total: number } | null;
const monthlyAfterRawPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_monthly_rollups')
.get() as { total: number } | null;
assert.equal(rollupsAfterRawPrune?.total, 1);
assert.equal(monthlyAfterRawPrune?.total, 1);
const rollupPrune = pruneRollupRetention(db, nowMs, {
dailyRollupRetentionMs: 30 * 86_400_000,
monthlyRollupRetentionMs: 365 * 86_400_000,
});
const rollupsAfterRollupPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_daily_rollups')
.get() as { total: number } | null;
const monthlyAfterRollupPrune = db
.prepare('SELECT COUNT(*) AS total FROM imm_monthly_rollups')
.get() as { total: number } | null;
assert.equal(rollupPrune.deletedDailyRows, 1);
assert.equal(rollupPrune.deletedMonthlyRows, 1);
assert.equal(rollupsAfterRollupPrune?.total, 0);
assert.equal(monthlyAfterRollupPrune?.total, 0);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema adds sample_ms index for telemetry rollup scans', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const indexes = db.prepare("PRAGMA index_list('imm_session_telemetry')").all() as Array<{
name: string;
}>;
const hasSampleMsIndex = indexes.some((row) => row.name === 'idx_telemetry_sample_ms');
assert.equal(hasSampleMsIndex, true);
const indexColumns = db.prepare("PRAGMA index_info('idx_telemetry_sample_ms')").all() as Array<{
name: string;
}>;
assert.deepEqual(
indexColumns.map((column) => column.name),
['sample_ms'],
);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('runOptimizeMaintenance executes PRAGMA optimize', () => {
const executedSql: string[] = [];
const db = {
exec(source: string) {
executedSql.push(source);
return this;
},
} as unknown as Parameters<typeof runOptimizeMaintenance>[0];
runOptimizeMaintenance(db);
assert.deepEqual(executedSql, ['PRAGMA optimize']);
});

View File

@@ -18,11 +18,9 @@ interface RollupTelemetryResult {
maxSampleMs: number | null;
}
interface RetentionResult {
interface RawRetentionResult {
deletedSessionEvents: number;
deletedTelemetryRows: number;
deletedDailyRows: number;
deletedMonthlyRows: number;
deletedEndedSessions: number;
}
@@ -31,20 +29,18 @@ export function toMonthKey(timestampMs: number): number {
return monthDate.getUTCFullYear() * 100 + monthDate.getUTCMonth() + 1;
}
export function pruneRetention(
export function pruneRawRetention(
db: DatabaseSync,
nowMs: number,
policy: {
eventsRetentionMs: number;
telemetryRetentionMs: number;
dailyRollupRetentionMs: number;
monthlyRollupRetentionMs: number;
sessionsRetentionMs: number;
},
): RetentionResult {
): RawRetentionResult {
const eventCutoff = nowMs - policy.eventsRetentionMs;
const telemetryCutoff = nowMs - policy.telemetryRetentionMs;
const dayCutoff = nowMs - policy.dailyRollupRetentionMs;
const monthCutoff = nowMs - policy.monthlyRollupRetentionMs;
const sessionsCutoff = nowMs - policy.sessionsRetentionMs;
const deletedSessionEvents = (
db.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`).run(eventCutoff) as {
@@ -56,28 +52,49 @@ export function pruneRetention(
changes: number;
}
).changes;
const deletedDailyRows = (
db
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
.run(Math.floor(dayCutoff / DAILY_MS)) as { changes: number }
).changes;
const deletedMonthlyRows = (
db
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
.run(toMonthKey(monthCutoff)) as { changes: number }
).changes;
const deletedEndedSessions = (
db
.prepare(`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`)
.run(telemetryCutoff) as { changes: number }
.run(sessionsCutoff) as { changes: number }
).changes;
return {
deletedSessionEvents,
deletedTelemetryRows,
deletedEndedSessions,
};
}
export function pruneRollupRetention(
db: DatabaseSync,
nowMs: number,
policy: {
dailyRollupRetentionMs: number;
monthlyRollupRetentionMs: number;
},
): { deletedDailyRows: number; deletedMonthlyRows: number } {
const deletedDailyRows = Number.isFinite(policy.dailyRollupRetentionMs)
? (
db
.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`)
.run(Math.floor((nowMs - policy.dailyRollupRetentionMs) / DAILY_MS)) as {
changes: number;
}
).changes
: 0;
const deletedMonthlyRows = Number.isFinite(policy.monthlyRollupRetentionMs)
? (
db
.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`)
.run(toMonthKey(nowMs - policy.monthlyRollupRetentionMs)) as {
changes: number;
}
).changes
: 0;
return {
deletedDailyRows,
deletedMonthlyRows,
deletedEndedSessions,
};
}
@@ -108,49 +125,57 @@ function upsertDailyRollupsForGroups(
const upsertStmt = db.prepare(`
INSERT INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards, cards_per_hour,
words_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
total_tokens_seen, total_cards, cards_per_hour,
tokens_per_min, lookup_hit_rate, CREATED_DATE, LAST_UPDATE_DATE
)
SELECT
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
s.video_id AS video_id,
COUNT(DISTINCT s.session_id) AS total_sessions,
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
COALESCE(SUM(sm.max_cards), 0) AS total_cards,
CASE
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
THEN (COALESCE(SUM(t.cards_mined), 0) * 60.0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
THEN (COALESCE(SUM(sm.max_cards), 0) * 60.0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
ELSE NULL
END AS cards_per_hour,
CASE
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
THEN COALESCE(SUM(t.words_seen), 0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
WHEN COALESCE(SUM(sm.max_active_ms), 0) > 0
THEN COALESCE(SUM(sm.max_tokens), 0) / (COALESCE(SUM(sm.max_active_ms), 0) / 60000.0)
ELSE NULL
END AS words_per_min,
END AS tokens_per_min,
CASE
WHEN COALESCE(SUM(t.lookup_count), 0) > 0
THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL)
WHEN COALESCE(SUM(sm.max_lookups), 0) > 0
THEN CAST(COALESCE(SUM(sm.max_hits), 0) AS REAL) / CAST(SUM(sm.max_lookups) AS REAL)
ELSE NULL
END AS lookup_hit_rate,
? AS CREATED_DATE,
? AS LAST_UPDATE_DATE
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
WHERE CAST(s.started_at_ms / 86400000 AS INTEGER) = ? AND s.video_id = ?
JOIN (
SELECT
t.session_id,
MAX(t.active_watched_ms) AS max_active_ms,
MAX(t.lines_seen) AS max_lines,
MAX(t.tokens_seen) AS max_tokens,
MAX(t.cards_mined) AS max_cards,
MAX(t.lookup_count) AS max_lookups,
MAX(t.lookup_hits) AS max_hits
FROM imm_session_telemetry t
GROUP BY t.session_id
) sm ON s.session_id = sm.session_id
WHERE CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) = ? AND s.video_id = ?
GROUP BY rollup_day, s.video_id
ON CONFLICT (rollup_day, video_id) DO UPDATE SET
total_sessions = excluded.total_sessions,
total_active_min = excluded.total_active_min,
total_lines_seen = excluded.total_lines_seen,
total_words_seen = excluded.total_words_seen,
total_tokens_seen = excluded.total_tokens_seen,
total_cards = excluded.total_cards,
cards_per_hour = excluded.cards_per_hour,
words_per_min = excluded.words_per_min,
tokens_per_min = excluded.tokens_per_min,
lookup_hit_rate = excluded.lookup_hit_rate,
CREATED_DATE = COALESCE(imm_daily_rollups.CREATED_DATE, excluded.CREATED_DATE),
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
@@ -173,29 +198,35 @@ function upsertMonthlyRollupsForGroups(
const upsertStmt = db.prepare(`
INSERT INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
total_tokens_seen, total_cards, CREATED_DATE, LAST_UPDATE_DATE
)
SELECT
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
s.video_id AS video_id,
COUNT(DISTINCT s.session_id) AS total_sessions,
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
COALESCE(SUM(sm.max_active_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(sm.max_lines), 0) AS total_lines_seen,
COALESCE(SUM(sm.max_tokens), 0) AS total_tokens_seen,
COALESCE(SUM(sm.max_cards), 0) AS total_cards,
? AS CREATED_DATE,
? AS LAST_UPDATE_DATE
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) = ? AND s.video_id = ?
JOIN (
SELECT
t.session_id,
MAX(t.active_watched_ms) AS max_active_ms,
MAX(t.lines_seen) AS max_lines,
MAX(t.tokens_seen) AS max_tokens,
MAX(t.cards_mined) AS max_cards
FROM imm_session_telemetry t
GROUP BY t.session_id
) sm ON s.session_id = sm.session_id
WHERE CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) = ? AND s.video_id = ?
GROUP BY rollup_month, s.video_id
ON CONFLICT (rollup_month, video_id) DO UPDATE SET
total_sessions = excluded.total_sessions,
total_active_min = excluded.total_active_min,
total_lines_seen = excluded.total_lines_seen,
total_words_seen = excluded.total_words_seen,
total_tokens_seen = excluded.total_tokens_seen,
total_cards = excluded.total_cards,
CREATED_DATE = COALESCE(imm_monthly_rollups.CREATED_DATE, excluded.CREATED_DATE),
@@ -216,8 +247,8 @@ function getAffectedRollupGroups(
.prepare(
`
SELECT DISTINCT
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
CAST(julianday(s.started_at_ms / 1000, 'unixepoch', 'localtime') - 2440587.5 AS INTEGER) AS rollup_day,
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch', 'localtime') AS INTEGER) AS rollup_month,
s.video_id AS video_id
FROM imm_session_telemetry t
JOIN imm_sessions s
@@ -292,3 +323,7 @@ export function runRollupMaintenance(db: DatabaseSync, forceRebuild = false): vo
throw error;
}
}
export function runOptimizeMaintenance(db: DatabaseSync): void {
db.exec('PRAGMA optimize');
}

View File

@@ -4,7 +4,7 @@ import { EventEmitter } from 'node:events';
import test from 'node:test';
import type { spawn as spawnFn } from 'node:child_process';
import { SOURCE_TYPE_LOCAL } from './types';
import { getLocalVideoMetadata, runFfprobe } from './metadata';
import { getLocalVideoMetadata, guessAnimeVideoMetadata, runFfprobe } from './metadata';
type Spawn = typeof spawnFn;
@@ -146,3 +146,83 @@ test('getLocalVideoMetadata derives title and falls back to null hash on read er
assert.equal(hashFallbackMetadata.canonicalTitle, 'Episode 02');
assert.equal(hashFallbackMetadata.hashSha256, null);
});
test('guessAnimeVideoMetadata uses guessit basename output first when available', async () => {
const seenTargets: string[] = [];
const parsed = await guessAnimeVideoMetadata(
'/tmp/Little Witch Academia S02E05.mkv',
'Episode 5',
{
runGuessit: async (target) => {
seenTargets.push(target);
return JSON.stringify({
title: 'Little Witch Academia',
season: 2,
episode: 5,
});
},
},
);
assert.deepEqual(seenTargets, ['Little Witch Academia S02E05.mkv']);
assert.deepEqual(parsed, {
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'guessit',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
filename: 'Little Witch Academia S02E05.mkv',
source: 'guessit',
}),
});
});
test('guessAnimeVideoMetadata falls back to parser when guessit throws', async () => {
const parsed = await guessAnimeVideoMetadata(
'/tmp/Little Witch Academia S02E05.mkv',
'Episode 5',
{
runGuessit: async () => {
throw new Error('guessit unavailable');
},
},
);
assert.deepEqual(parsed, {
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'fallback',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
confidence: 'high',
filename: 'Little Witch Academia S02E05.mkv',
rawTitle: 'Little Witch Academia S02E05',
source: 'fallback',
}),
});
});
test('guessAnimeVideoMetadata falls back when guessit output is incomplete', async () => {
const parsed = await guessAnimeVideoMetadata('/tmp/[SubsPlease] Frieren - 03 (1080p).mkv', null, {
runGuessit: async () => JSON.stringify({ episode: 3 }),
});
assert.deepEqual(parsed, {
parsedBasename: '[SubsPlease] Frieren - 03 (1080p).mkv',
parsedTitle: 'Frieren - 03 (1080p)',
parsedSeason: null,
parsedEpisode: null,
parserSource: 'fallback',
parserConfidence: 0.2,
parseMetadataJson: JSON.stringify({
confidence: 'low',
filename: '[SubsPlease] Frieren - 03 (1080p).mkv',
rawTitle: 'Frieren - 03 (1080p)',
source: 'fallback',
}),
});
});

View File

@@ -1,6 +1,13 @@
import crypto from 'node:crypto';
import { spawn as nodeSpawn } from 'node:child_process';
import * as fs from 'node:fs';
import path from 'node:path';
import { parseMediaInfo } from '../../../jimaku/utils';
import {
guessAnilistMediaInfo,
runGuessit,
type GuessAnilistMediaInfoDeps,
} from '../anilist/anilist-updater';
import {
deriveCanonicalTitle,
emptyMetadata,
@@ -8,7 +15,12 @@ import {
parseFps,
toNullableInt,
} from './reducer';
import { SOURCE_TYPE_LOCAL, type ProbeMetadata, type VideoMetadata } from './types';
import {
SOURCE_TYPE_LOCAL,
type ParsedAnimeVideoGuess,
type ProbeMetadata,
type VideoMetadata,
} from './types';
type SpawnFn = typeof nodeSpawn;
@@ -24,6 +36,21 @@ interface MetadataDeps {
fs?: FsDeps;
}
interface GuessAnimeVideoMetadataDeps {
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
}
function mapParserConfidenceToScore(confidence: 'high' | 'medium' | 'low'): number {
switch (confidence) {
case 'high':
return 1;
case 'medium':
return 0.6;
default:
return 0.2;
}
}
export async function computeSha256(
mediaPath: string,
deps: MetadataDeps = {},
@@ -151,3 +178,48 @@ export async function getLocalVideoMetadata(
metadataJson: null,
};
}
export async function guessAnimeVideoMetadata(
mediaPath: string | null,
mediaTitle: string | null,
deps: GuessAnimeVideoMetadataDeps = {},
): Promise<ParsedAnimeVideoGuess | null> {
const parsed = await guessAnilistMediaInfo(mediaPath, mediaTitle, {
runGuessit: deps.runGuessit ?? runGuessit,
});
if (!parsed) {
return null;
}
const parsedBasename = mediaPath ? path.basename(mediaPath) : null;
if (parsed.source === 'guessit') {
return {
parsedBasename,
parsedTitle: parsed.title,
parsedSeason: parsed.season,
parsedEpisode: parsed.episode,
parserSource: 'guessit',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
filename: parsedBasename,
source: 'guessit',
}),
};
}
const fallbackInfo = parseMediaInfo(mediaPath ?? mediaTitle);
return {
parsedBasename: parsedBasename ?? fallbackInfo.filename ?? null,
parsedTitle: parsed.title,
parsedSeason: parsed.season,
parsedEpisode: parsed.episode,
parserSource: 'fallback',
parserConfidence: mapParserConfidenceToScore(fallbackInfo.confidence),
parseMetadataJson: JSON.stringify({
confidence: fallbackInfo.confidence,
filename: fallbackInfo.filename,
rawTitle: fallbackInfo.rawTitle,
source: 'fallback',
}),
};
}

File diff suppressed because it is too large Load Diff

View File

@@ -15,11 +15,11 @@ export function createInitialSessionState(
totalWatchedMs: 0,
activeWatchedMs: 0,
linesSeen: 0,
wordsSeen: 0,
tokensSeen: 0,
cardsMined: 0,
lookupCount: 0,
lookupHits: 0,
yomitanLookupCount: 0,
pauseCount: 0,
pauseMs: 0,
seekForwardCount: 0,
@@ -30,6 +30,7 @@ export function createInitialSessionState(
lastPauseStartMs: null,
isPaused: false,
pendingTelemetry: true,
markedWatched: false,
};
}
@@ -50,16 +51,6 @@ export function sanitizePayload(payload: Record<string, unknown>, maxPayloadByte
return json.length <= maxPayloadBytes ? json : JSON.stringify({ truncated: true });
}
export function calculateTextMetrics(value: string): {
words: number;
tokens: number;
} {
const words = value.split(/\s+/).filter(Boolean).length;
const cjkCount = value.match(/[\u3040-\u30ff\u4e00-\u9fff]/g)?.length ?? 0;
const tokens = Math.max(words, cjkCount);
return { words, tokens };
}
export function secToMs(seconds: number): number {
const coerced = Number(seconds);
if (!Number.isFinite(coerced)) return 0;

View File

@@ -39,8 +39,41 @@ export function finalizeSessionRecord(
SET
ended_at_ms = ?,
status = ?,
ended_media_ms = ?,
total_watched_ms = ?,
active_watched_ms = ?,
lines_seen = ?,
tokens_seen = ?,
cards_mined = ?,
lookup_count = ?,
lookup_hits = ?,
yomitan_lookup_count = ?,
pause_count = ?,
pause_ms = ?,
seek_forward_count = ?,
seek_backward_count = ?,
media_buffer_events = ?,
LAST_UPDATE_DATE = ?
WHERE session_id = ?
`,
).run(endedAtMs, SESSION_STATUS_ENDED, Date.now(), sessionState.sessionId);
).run(
endedAtMs,
SESSION_STATUS_ENDED,
sessionState.lastMediaMs,
sessionState.totalWatchedMs,
sessionState.activeWatchedMs,
sessionState.linesSeen,
sessionState.tokensSeen,
sessionState.cardsMined,
sessionState.lookupCount,
sessionState.lookupHits,
sessionState.yomitanLookupCount,
sessionState.pauseCount,
sessionState.pauseMs,
sessionState.seekForwardCount,
sessionState.seekBackwardCount,
sessionState.mediaBufferEvents,
Date.now(),
sessionState.sessionId,
);
}

View File

@@ -6,10 +6,15 @@ import test from 'node:test';
import { Database } from './sqlite';
import { finalizeSessionRecord, startSessionRecord } from './session';
import {
applyPragmas,
createTrackerPreparedStatements,
ensureSchema,
executeQueuedWrite,
normalizeCoverBlobBytes,
parseCoverBlobReference,
getOrCreateAnimeRecord,
getOrCreateVideoRecord,
linkVideoToAnimeRecord,
} from './storage';
import { EVENT_SUBTITLE_LINE, SESSION_STATUS_ENDED, SOURCE_TYPE_LOCAL } from './types';
@@ -46,6 +51,34 @@ function cleanupDbPath(dbPath: string): void {
// libsql keeps Windows file handles alive after close when prepared statements were used.
}
test('applyPragmas sets the SQLite tuning defaults used by immersion tracking', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
applyPragmas(db);
const journalModeRow = db.prepare('PRAGMA journal_mode').get() as {
journal_mode: string;
};
const synchronousRow = db.prepare('PRAGMA synchronous').get() as { synchronous: number };
const foreignKeysRow = db.prepare('PRAGMA foreign_keys').get() as { foreign_keys: number };
const busyTimeoutRow = db.prepare('PRAGMA busy_timeout').get() as { timeout: number };
const journalSizeLimitRow = db.prepare('PRAGMA journal_size_limit').get() as {
journal_size_limit: number;
};
assert.equal(journalModeRow.journal_mode, 'wal');
assert.equal(synchronousRow.synchronous, 1);
assert.equal(foreignKeysRow.foreign_keys, 1);
assert.equal(busyTimeoutRow.timeout, 2500);
assert.equal(journalSizeLimitRow.journal_size_limit, 67_108_864);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema creates immersion core tables', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -60,6 +93,7 @@ test('ensureSchema creates immersion core tables', () => {
const tableNames = new Set(rows.map((row) => row.name));
assert.ok(tableNames.has('imm_videos'));
assert.ok(tableNames.has('imm_anime'));
assert.ok(tableNames.has('imm_sessions'));
assert.ok(tableNames.has('imm_session_telemetry'));
assert.ok(tableNames.has('imm_session_events'));
@@ -67,7 +101,37 @@ test('ensureSchema creates immersion core tables', () => {
assert.ok(tableNames.has('imm_monthly_rollups'));
assert.ok(tableNames.has('imm_words'));
assert.ok(tableNames.has('imm_kanji'));
assert.ok(tableNames.has('imm_subtitle_lines'));
assert.ok(tableNames.has('imm_word_line_occurrences'));
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
assert.ok(tableNames.has('imm_rollup_state'));
assert.ok(tableNames.has('imm_cover_art_blobs'));
const videoColumns = new Set(
(
db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(videoColumns.has('anime_id'));
assert.ok(videoColumns.has('parsed_basename'));
assert.ok(videoColumns.has('parsed_title'));
assert.ok(videoColumns.has('parsed_season'));
assert.ok(videoColumns.has('parsed_episode'));
assert.ok(videoColumns.has('parser_source'));
assert.ok(videoColumns.has('parser_confidence'));
assert.ok(videoColumns.has('parse_metadata_json'));
const mediaArtColumns = new Set(
(
db.prepare('PRAGMA table_info(imm_media_art)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(mediaArtColumns.has('cover_blob_hash'));
const rollupStateRow = db
.prepare('SELECT state_value FROM imm_rollup_state WHERE state_key = ?')
@@ -82,6 +146,566 @@ test('ensureSchema creates immersion core tables', () => {
}
});
test('ensureSchema creates large-history performance indexes', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const indexNames = new Set(
(
db
.prepare(`SELECT name FROM sqlite_master WHERE type = 'index' AND name LIKE 'idx_%'`)
.all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(indexNames.has('idx_telemetry_sample_ms'));
assert.ok(indexNames.has('idx_sessions_started_at'));
assert.ok(indexNames.has('idx_sessions_ended_at'));
assert.ok(indexNames.has('idx_words_frequency'));
assert.ok(indexNames.has('idx_kanji_frequency'));
assert.ok(indexNames.has('idx_media_art_anilist_id'));
assert.ok(indexNames.has('idx_media_art_cover_url'));
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema migrates legacy videos and backfills anime metadata from filenames', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
db.exec(`
CREATE TABLE imm_schema_version (
schema_version INTEGER PRIMARY KEY,
applied_at_ms INTEGER NOT NULL
);
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (4, 1);
CREATE TABLE imm_videos(
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
video_key TEXT NOT NULL UNIQUE,
canonical_title TEXT NOT NULL,
source_type INTEGER NOT NULL,
source_path TEXT,
source_url TEXT,
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
codec_id INTEGER, container_id INTEGER,
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
bitrate_kbps INTEGER, audio_codec_id INTEGER,
hash_sha256 TEXT, screenshot_path TEXT,
metadata_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
`);
const insertLegacyVideo = db.prepare(`
INSERT INTO imm_videos (
video_key, canonical_title, source_type, source_path, source_url,
duration_ms, file_size_bytes, codec_id, container_id, width_px, height_px,
fps_x100, bitrate_kbps, audio_codec_id, hash_sha256, screenshot_path,
metadata_json, CREATED_DATE, LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
insertLegacyVideo.run(
'local:/library/Little Witch Academia S02E05.mkv',
'Episode 5',
SOURCE_TYPE_LOCAL,
'/library/Little Witch Academia S02E05.mkv',
null,
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1,
1,
);
insertLegacyVideo.run(
'local:/library/Little Witch Academia S02E06.mkv',
'Episode 6',
SOURCE_TYPE_LOCAL,
'/library/Little Witch Academia S02E06.mkv',
null,
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1,
1,
);
insertLegacyVideo.run(
'local:/library/[SubsPlease] Frieren - 03 - Departure.mkv',
'Episode 3',
SOURCE_TYPE_LOCAL,
'/library/[SubsPlease] Frieren - 03 - Departure.mkv',
null,
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1,
1,
);
ensureSchema(db);
const videoColumns = new Set(
(
db.prepare('PRAGMA table_info(imm_videos)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(videoColumns.has('anime_id'));
assert.ok(videoColumns.has('parsed_basename'));
assert.ok(videoColumns.has('parsed_title'));
assert.ok(videoColumns.has('parsed_season'));
assert.ok(videoColumns.has('parsed_episode'));
assert.ok(videoColumns.has('parser_source'));
assert.ok(videoColumns.has('parser_confidence'));
assert.ok(videoColumns.has('parse_metadata_json'));
const animeRows = db
.prepare('SELECT canonical_title FROM imm_anime ORDER BY canonical_title')
.all() as Array<{ canonical_title: string }>;
assert.deepEqual(
animeRows.map((row) => row.canonical_title),
['Frieren', 'Little Witch Academia'],
);
const littleWitchRows = db
.prepare(
`
SELECT
a.canonical_title AS anime_title,
v.parsed_title,
v.parsed_basename,
v.parsed_season,
v.parsed_episode,
v.parser_source,
v.parser_confidence
FROM imm_videos v
JOIN imm_anime a ON a.anime_id = v.anime_id
WHERE v.video_key LIKE 'local:/library/Little Witch Academia%'
ORDER BY v.video_key
`,
)
.all() as Array<{
anime_title: string;
parsed_title: string | null;
parsed_basename: string | null;
parsed_season: number | null;
parsed_episode: number | null;
parser_source: string | null;
parser_confidence: number | null;
}>;
assert.equal(littleWitchRows.length, 2);
assert.deepEqual(
littleWitchRows.map((row) => ({
animeTitle: row.anime_title,
parsedTitle: row.parsed_title,
parsedBasename: row.parsed_basename,
parsedSeason: row.parsed_season,
parsedEpisode: row.parsed_episode,
parserSource: row.parser_source,
})),
[
{
animeTitle: 'Little Witch Academia',
parsedTitle: 'Little Witch Academia',
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'fallback',
},
{
animeTitle: 'Little Witch Academia',
parsedTitle: 'Little Witch Academia',
parsedBasename: 'Little Witch Academia S02E06.mkv',
parsedSeason: 2,
parsedEpisode: 6,
parserSource: 'fallback',
},
],
);
assert.ok(
littleWitchRows.every(
(row) => typeof row.parser_confidence === 'number' && row.parser_confidence > 0,
),
);
const frierenRow = db
.prepare(
`
SELECT
a.canonical_title AS anime_title,
v.parsed_title,
v.parsed_episode,
v.parser_source
FROM imm_videos v
JOIN imm_anime a ON a.anime_id = v.anime_id
WHERE v.video_key = ?
`,
)
.get('local:/library/[SubsPlease] Frieren - 03 - Departure.mkv') as {
anime_title: string;
parsed_title: string | null;
parsed_episode: number | null;
parser_source: string | null;
} | null;
assert.ok(frierenRow);
assert.equal(frierenRow?.anime_title, 'Frieren');
assert.equal(frierenRow?.parsed_title, 'Frieren');
assert.equal(frierenRow?.parsed_episode, 3);
assert.equal(frierenRow?.parser_source, 'fallback');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema adds subtitle-line occurrence tables to schema version 6 databases', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
db.exec(`
CREATE TABLE imm_schema_version (
schema_version INTEGER PRIMARY KEY,
applied_at_ms INTEGER NOT NULL
);
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (6, 1);
CREATE TABLE imm_videos(
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
video_key TEXT NOT NULL UNIQUE,
anime_id INTEGER,
canonical_title TEXT NOT NULL,
source_type INTEGER NOT NULL,
source_path TEXT,
source_url TEXT,
parsed_basename TEXT,
parsed_title TEXT,
parsed_season INTEGER,
parsed_episode INTEGER,
parser_source TEXT,
parser_confidence REAL,
parse_metadata_json TEXT,
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
codec_id INTEGER, container_id INTEGER,
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
bitrate_kbps INTEGER, audio_codec_id INTEGER,
hash_sha256 TEXT, screenshot_path TEXT,
metadata_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_sessions(
session_id INTEGER PRIMARY KEY AUTOINCREMENT,
session_uuid TEXT NOT NULL UNIQUE,
video_id INTEGER NOT NULL,
started_at_ms INTEGER NOT NULL,
ended_at_ms INTEGER,
status INTEGER NOT NULL,
locale_id INTEGER,
target_lang_id INTEGER,
difficulty_tier INTEGER,
subtitle_mode INTEGER,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_session_events(
event_id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id INTEGER NOT NULL,
ts_ms INTEGER NOT NULL,
event_type INTEGER NOT NULL,
line_index INTEGER,
segment_start_ms INTEGER,
segment_end_ms INTEGER,
words_delta INTEGER NOT NULL DEFAULT 0,
cards_delta INTEGER NOT NULL DEFAULT 0,
payload_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_words(
id INTEGER PRIMARY KEY AUTOINCREMENT,
headword TEXT,
word TEXT,
reading TEXT,
part_of_speech TEXT,
pos1 TEXT,
pos2 TEXT,
pos3 TEXT,
first_seen REAL,
last_seen REAL,
frequency INTEGER,
UNIQUE(headword, word, reading)
);
CREATE TABLE imm_kanji(
id INTEGER PRIMARY KEY AUTOINCREMENT,
kanji TEXT,
first_seen REAL,
last_seen REAL,
frequency INTEGER,
UNIQUE(kanji)
);
CREATE TABLE imm_rollup_state(
state_key TEXT PRIMARY KEY,
state_value INTEGER NOT NULL
);
`);
ensureSchema(db);
const tableNames = new Set(
(
db
.prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name LIKE 'imm_%'`)
.all() as Array<{ name: string }>
).map((row) => row.name),
);
assert.ok(tableNames.has('imm_subtitle_lines'));
assert.ok(tableNames.has('imm_word_line_occurrences'));
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema migrates legacy cover art blobs into the shared blob store', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
db.prepare('UPDATE imm_schema_version SET schema_version = 12').run();
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/legacy-cover-art.mkv', {
canonicalTitle: 'Legacy Cover Art',
sourcePath: '/tmp/legacy-cover-art.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const legacyBlob = Uint8Array.from([0xde, 0xad, 0xbe, 0xef]);
db.prepare(
`
INSERT INTO imm_media_art (
video_id,
anilist_id,
cover_url,
cover_blob,
cover_blob_hash,
title_romaji,
title_english,
episodes_total,
fetched_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(videoId, null, null, legacyBlob, null, null, null, null, 1, 1, 1);
assert.doesNotThrow(() => ensureSchema(db));
const mediaArtRow = db
.prepare(
'SELECT cover_blob AS coverBlob, cover_blob_hash AS coverBlobHash FROM imm_media_art',
)
.get() as {
coverBlob: ArrayBuffer | Uint8Array | Buffer | null;
coverBlobHash: string | null;
} | null;
assert.ok(mediaArtRow);
assert.ok(mediaArtRow?.coverBlobHash);
assert.equal(
parseCoverBlobReference(normalizeCoverBlobBytes(mediaArtRow?.coverBlob)),
mediaArtRow?.coverBlobHash,
);
const sharedBlobRow = db
.prepare('SELECT cover_blob AS coverBlob FROM imm_cover_art_blobs WHERE blob_hash = ?')
.get(mediaArtRow?.coverBlobHash) as {
coverBlob: ArrayBuffer | Uint8Array | Buffer;
} | null;
assert.ok(sharedBlobRow);
assert.equal(normalizeCoverBlobBytes(sharedBlobRow?.coverBlob)?.toString('hex'), 'deadbeef');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('anime rows are reused by normalized parsed title and upgraded with AniList metadata', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const firstVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e05.mkv', {
canonicalTitle: 'Episode 5',
sourcePath: '/tmp/Little Witch Academia S02E05.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const secondVideoId = getOrCreateVideoRecord(db, 'local:/tmp/lwa-s02e06.mkv', {
canonicalTitle: 'Episode 6',
sourcePath: '/tmp/Little Witch Academia S02E06.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const provisionalAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Little Witch Academia',
canonicalTitle: 'Little Witch Academia',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: '{"source":"parsed"}',
});
linkVideoToAnimeRecord(db, firstVideoId, {
animeId: provisionalAnimeId,
parsedBasename: 'Little Witch Academia S02E05.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 5,
parserSource: 'fallback',
parserConfidence: 0.6,
parseMetadataJson: '{"source":"parsed","episode":5}',
});
const reusedAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: ' little witch academia ',
canonicalTitle: 'Little Witch Academia',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: '{"source":"parsed"}',
});
linkVideoToAnimeRecord(db, secondVideoId, {
animeId: reusedAnimeId,
parsedBasename: 'Little Witch Academia S02E06.mkv',
parsedTitle: 'Little Witch Academia',
parsedSeason: 2,
parsedEpisode: 6,
parserSource: 'fallback',
parserConfidence: 0.6,
parseMetadataJson: '{"source":"parsed","episode":6}',
});
assert.equal(reusedAnimeId, provisionalAnimeId);
const upgradedAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Little Witch Academia',
canonicalTitle: 'Little Witch Academia TV',
anilistId: 33_435,
titleRomaji: 'Little Witch Academia',
titleEnglish: 'Little Witch Academia',
titleNative: 'リトルウィッチアカデミア',
metadataJson: '{"source":"anilist"}',
});
assert.equal(upgradedAnimeId, provisionalAnimeId);
const animeRows = db.prepare('SELECT * FROM imm_anime').all() as Array<{
anime_id: number;
normalized_title_key: string;
canonical_title: string;
anilist_id: number | null;
title_romaji: string | null;
title_english: string | null;
title_native: string | null;
metadata_json: string | null;
}>;
assert.equal(animeRows.length, 1);
assert.equal(animeRows[0]?.anime_id, provisionalAnimeId);
assert.equal(animeRows[0]?.normalized_title_key, 'little witch academia');
assert.equal(animeRows[0]?.canonical_title, 'Little Witch Academia TV');
assert.equal(animeRows[0]?.anilist_id, 33_435);
assert.equal(animeRows[0]?.title_romaji, 'Little Witch Academia');
assert.equal(animeRows[0]?.title_english, 'Little Witch Academia');
assert.equal(animeRows[0]?.title_native, 'リトルウィッチアカデミア');
assert.equal(animeRows[0]?.metadata_json, '{"source":"anilist"}');
const linkedVideos = db
.prepare(
`
SELECT anime_id, parsed_title, parsed_season, parsed_episode
FROM imm_videos
WHERE video_id IN (?, ?)
ORDER BY video_id
`,
)
.all(firstVideoId, secondVideoId) as Array<{
anime_id: number | null;
parsed_title: string | null;
parsed_season: number | null;
parsed_episode: number | null;
}>;
assert.deepEqual(linkedVideos, [
{
anime_id: provisionalAnimeId,
parsed_title: 'Little Witch Academia',
parsed_season: 2,
parsed_episode: 5,
},
{
anime_id: provisionalAnimeId,
parsed_title: 'Little Witch Academia',
parsed_season: 2,
parsed_episode: 6,
},
]);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('start/finalize session updates ended_at and status', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -116,6 +740,39 @@ test('start/finalize session updates ended_at and status', () => {
}
});
test('finalize session persists ended media position', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/slice-a-ended-media.mkv', {
canonicalTitle: 'Slice A Ended Media',
sourcePath: '/tmp/slice-a-ended-media.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
const startedAtMs = 1_234_567_000;
const endedAtMs = startedAtMs + 8_500;
const { sessionId, state } = startSessionRecord(db, videoId, startedAtMs);
state.lastMediaMs = 91_000;
finalizeSessionRecord(db, state, endedAtMs);
const row = db
.prepare('SELECT ended_media_ms FROM imm_sessions WHERE session_id = ?')
.get(sessionId) as {
ended_media_ms: number | null;
} | null;
assert.ok(row);
assert.equal(row?.ended_media_ms, 91_000);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('executeQueuedWrite inserts event and telemetry rows', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -139,11 +796,11 @@ test('executeQueuedWrite inserts event and telemetry rows', () => {
totalWatchedMs: 1_000,
activeWatchedMs: 900,
linesSeen: 3,
wordsSeen: 6,
tokensSeen: 6,
cardsMined: 1,
lookupCount: 2,
lookupHits: 1,
yomitanLookupCount: 0,
pauseCount: 1,
pauseMs: 50,
seekForwardCount: 0,
@@ -161,7 +818,7 @@ test('executeQueuedWrite inserts event and telemetry rows', () => {
lineIndex: 1,
segmentStartMs: 0,
segmentEndMs: 800,
wordsDelta: 2,
tokensDelta: 2,
cardsDelta: 0,
payloadJson: '{"event":"subtitle-line"}',
},
@@ -191,18 +848,22 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
ensureSchema(db);
const stmts = createTrackerPreparedStatements(db);
stmts.wordUpsertStmt.run('猫', '猫', '', 10.0, 10.0);
stmts.wordUpsertStmt.run('猫', '猫', '', 5.0, 15.0);
stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 10.0, 10.0);
stmts.wordUpsertStmt.run('猫', '猫', '', 'noun', '名詞', '一般', '', 5.0, 15.0);
stmts.kanjiUpsertStmt.run('日', 9.0, 9.0);
stmts.kanjiUpsertStmt.run('日', 8.0, 11.0);
const wordRow = db
.prepare(
'SELECT headword, frequency, first_seen, last_seen FROM imm_words WHERE headword = ?',
`SELECT headword, frequency, part_of_speech, pos1, pos2, first_seen, last_seen
FROM imm_words WHERE headword = ?`,
)
.get('猫') as {
headword: string;
frequency: number;
part_of_speech: string;
pos1: string;
pos2: string;
first_seen: number;
last_seen: number;
} | null;
@@ -218,6 +879,9 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
assert.ok(wordRow);
assert.ok(kanjiRow);
assert.equal(wordRow?.frequency, 2);
assert.equal(wordRow?.part_of_speech, 'noun');
assert.equal(wordRow?.pos1, '名詞');
assert.equal(wordRow?.pos2, '一般');
assert.equal(kanjiRow?.frequency, 2);
assert.equal(wordRow?.first_seen, 5);
assert.equal(wordRow?.last_seen, 15);
@@ -228,3 +892,54 @@ test('executeQueuedWrite inserts and upserts word and kanji rows', () => {
cleanupDbPath(dbPath);
}
});
test('word upsert replaces legacy other part_of_speech when better POS metadata arrives later', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const stmts = createTrackerPreparedStatements(db);
stmts.wordUpsertStmt.run(
'知っている',
'知っている',
'しっている',
'other',
'動詞',
'自立',
'',
10,
10,
);
stmts.wordUpsertStmt.run(
'知っている',
'知っている',
'しっている',
'verb',
'動詞',
'自立',
'',
11,
12,
);
const row = db
.prepare('SELECT frequency, part_of_speech, pos1, pos2 FROM imm_words WHERE headword = ?')
.get('知っている') as {
frequency: number;
part_of_speech: string;
pos1: string;
pos2: string;
} | null;
assert.ok(row);
assert.equal(row?.frequency, 2);
assert.equal(row?.part_of_speech, 'verb');
assert.equal(row?.pos1, '動詞');
assert.equal(row?.pos2, '自立');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
export const SCHEMA_VERSION = 3;
export const SCHEMA_VERSION = 15;
export const DEFAULT_QUEUE_CAP = 1_000;
export const DEFAULT_BATCH_SIZE = 25;
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
@@ -7,6 +7,7 @@ const ONE_WEEK_MS = 7 * 24 * 60 * 60 * 1000;
export const DEFAULT_EVENTS_RETENTION_MS = ONE_WEEK_MS;
export const DEFAULT_VACUUM_INTERVAL_MS = ONE_WEEK_MS;
export const DEFAULT_TELEMETRY_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
export const DEFAULT_SESSIONS_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
export const DEFAULT_DAILY_ROLLUP_RETENTION_MS = 365 * 24 * 60 * 60 * 1000;
export const DEFAULT_MONTHLY_ROLLUP_RETENTION_MS = 5 * 365 * 24 * 60 * 60 * 1000;
export const DEFAULT_MAX_PAYLOAD_BYTES = 256;
@@ -25,10 +26,14 @@ export const EVENT_SEEK_FORWARD = 5;
export const EVENT_SEEK_BACKWARD = 6;
export const EVENT_PAUSE_START = 7;
export const EVENT_PAUSE_END = 8;
export const EVENT_YOMITAN_LOOKUP = 9;
export interface ImmersionTrackerOptions {
dbPath: string;
policy?: ImmersionTrackerPolicy;
resolveLegacyVocabularyPos?: (
row: LegacyVocabularyPosRow,
) => Promise<LegacyVocabularyPosResolution | null>;
}
export interface ImmersionTrackerPolicy {
@@ -40,6 +45,7 @@ export interface ImmersionTrackerPolicy {
retention?: {
eventsDays?: number;
telemetryDays?: number;
sessionsDays?: number;
dailyRollupsDays?: number;
monthlyRollupsDays?: number;
vacuumIntervalDays?: number;
@@ -50,11 +56,11 @@ export interface TelemetryAccumulator {
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
yomitanLookupCount: number;
pauseCount: number;
pauseMs: number;
seekForwardCount: number;
@@ -72,20 +78,22 @@ export interface SessionState extends TelemetryAccumulator {
lastPauseStartMs: number | null;
isPaused: boolean;
pendingTelemetry: boolean;
markedWatched: boolean;
}
interface QueuedTelemetryWrite {
kind: 'telemetry';
sessionId: number;
sampleMs?: number;
lastMediaMs?: number | null;
totalWatchedMs?: number;
activeWatchedMs?: number;
linesSeen?: number;
wordsSeen?: number;
tokensSeen?: number;
cardsMined?: number;
lookupCount?: number;
lookupHits?: number;
yomitanLookupCount?: number;
pauseCount?: number;
pauseMs?: number;
seekForwardCount?: number;
@@ -95,7 +103,7 @@ interface QueuedTelemetryWrite {
lineIndex?: number | null;
segmentStartMs?: number | null;
segmentEndMs?: number | null;
wordsDelta?: number;
tokensDelta?: number;
cardsDelta?: number;
payloadJson?: string | null;
}
@@ -108,7 +116,7 @@ interface QueuedEventWrite {
lineIndex?: number | null;
segmentStartMs?: number | null;
segmentEndMs?: number | null;
wordsDelta?: number;
tokensDelta?: number;
cardsDelta?: number;
payloadJson?: string | null;
}
@@ -118,8 +126,13 @@ interface QueuedWordWrite {
headword: string;
word: string;
reading: string;
partOfSpeech: string;
pos1: string;
pos2: string;
pos3: string;
firstSeen: number;
lastSeen: number;
frequencyRank: number | null;
}
interface QueuedKanjiWrite {
@@ -129,11 +142,44 @@ interface QueuedKanjiWrite {
lastSeen: number;
}
export interface CountedWordOccurrence {
headword: string;
word: string;
reading: string;
partOfSpeech: string;
pos1: string;
pos2: string;
pos3: string;
occurrenceCount: number;
frequencyRank: number | null;
}
export interface CountedKanjiOccurrence {
kanji: string;
occurrenceCount: number;
}
interface QueuedSubtitleLineWrite {
kind: 'subtitleLine';
sessionId: number;
videoId: number;
lineIndex: number;
segmentStartMs: number | null;
segmentEndMs: number | null;
text: string;
secondaryText?: string | null;
wordOccurrences: CountedWordOccurrence[];
kanjiOccurrences: CountedKanjiOccurrence[];
firstSeen: number;
lastSeen: number;
}
export type QueuedWrite =
| QueuedTelemetryWrite
| QueuedEventWrite
| QueuedWordWrite
| QueuedKanjiWrite;
| QueuedKanjiWrite
| QueuedSubtitleLineWrite;
export interface VideoMetadata {
sourceType: number;
@@ -152,18 +198,173 @@ export interface VideoMetadata {
metadataJson: string | null;
}
export interface ParsedAnimeVideoMetadata {
animeId: number | null;
parsedBasename: string | null;
parsedTitle: string | null;
parsedSeason: number | null;
parsedEpisode: number | null;
parserSource: string | null;
parserConfidence: number | null;
parseMetadataJson: string | null;
}
export interface ParsedAnimeVideoGuess {
parsedBasename: string | null;
parsedTitle: string;
parsedSeason: number | null;
parsedEpisode: number | null;
parserSource: 'guessit' | 'fallback';
parserConfidence: number;
parseMetadataJson: string;
}
export interface SessionSummaryQueryRow {
sessionId: number;
videoId: number | null;
canonicalTitle: string | null;
animeId: number | null;
animeTitle: string | null;
startedAtMs: number;
endedAtMs: number | null;
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
yomitanLookupCount: number;
knownWordsSeen?: number;
knownWordRate?: number;
}
export interface LifetimeGlobalRow {
totalSessions: number;
totalActiveMs: number;
totalCards: number;
activeDays: number;
episodesStarted: number;
episodesCompleted: number;
animeCompleted: number;
lastRebuiltMs: number | null;
}
export interface LifetimeAnimeRow {
animeId: number;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalLinesSeen: number;
totalTokensSeen: number;
episodesStarted: number;
episodesCompleted: number;
firstWatchedMs: number | null;
lastWatchedMs: number | null;
}
export interface LifetimeMediaRow {
videoId: number;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalLinesSeen: number;
totalTokensSeen: number;
completed: number;
firstWatchedMs: number | null;
lastWatchedMs: number | null;
}
export interface AppliedSessionRow {
sessionId: number;
appliedAtMs: number;
}
export interface LifetimeRebuildSummary {
appliedSessions: number;
rebuiltAtMs: number;
}
export interface VocabularyStatsRow {
wordId: number;
headword: string;
word: string;
reading: string;
partOfSpeech: string | null;
pos1: string | null;
pos2: string | null;
pos3: string | null;
frequency: number;
frequencyRank: number | null;
animeCount: number;
firstSeen: number;
lastSeen: number;
}
export interface VocabularyCleanupSummary {
scanned: number;
kept: number;
deleted: number;
repaired: number;
}
export interface LegacyVocabularyPosRow {
headword: string;
word: string;
reading: string | null;
}
export interface LegacyVocabularyPosResolution {
headword: string;
reading: string;
partOfSpeech: string;
pos1: string;
pos2: string;
pos3: string;
}
export interface KanjiStatsRow {
kanjiId: number;
kanji: string;
frequency: number;
firstSeen: number;
lastSeen: number;
}
export interface WordOccurrenceRow {
animeId: number | null;
animeTitle: string | null;
videoId: number;
videoTitle: string;
sourcePath: string | null;
secondaryText: string | null;
sessionId: number;
lineIndex: number;
segmentStartMs: number | null;
segmentEndMs: number | null;
text: string;
occurrenceCount: number;
}
export interface KanjiOccurrenceRow {
animeId: number | null;
animeTitle: string | null;
videoId: number;
videoTitle: string;
sourcePath: string | null;
secondaryText: string | null;
sessionId: number;
lineIndex: number;
segmentStartMs: number | null;
segmentEndMs: number | null;
text: string;
occurrenceCount: number;
}
export interface SessionEventRow {
eventType: number;
tsMs: number;
payload: string | null;
}
export interface SessionTimelineRow {
@@ -171,7 +372,6 @@ export interface SessionTimelineRow {
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
}
@@ -182,11 +382,10 @@ export interface ImmersionSessionRollupRow {
totalSessions: number;
totalActiveMin: number;
totalLinesSeen: number;
totalWordsSeen: number;
totalTokensSeen: number;
totalCards: number;
cardsPerHour: number | null;
wordsPerMin: number | null;
tokensPerMin: number | null;
lookupHitRate: number | null;
}
@@ -200,3 +399,186 @@ export interface ProbeMetadata {
bitrateKbps: number | null;
audioCodecId: number | null;
}
export interface MediaArtRow {
videoId: number;
anilistId: number | null;
coverUrl: string | null;
coverBlob: Buffer | null;
titleRomaji: string | null;
titleEnglish: string | null;
episodesTotal: number | null;
fetchedAtMs: number;
}
export interface MediaLibraryRow {
videoId: number;
canonicalTitle: string;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
lastWatchedMs: number;
hasCoverArt: number;
}
export interface MediaDetailRow {
videoId: number;
canonicalTitle: string;
animeId: number | null;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
totalLinesSeen: number;
totalLookupCount: number;
totalLookupHits: number;
totalYomitanLookupCount: number;
}
export interface AnimeLibraryRow {
animeId: number;
canonicalTitle: string;
anilistId: number | null;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
episodeCount: number;
episodesTotal: number | null;
lastWatchedMs: number;
}
export interface AnimeDetailRow {
animeId: number;
canonicalTitle: string;
anilistId: number | null;
titleRomaji: string | null;
titleEnglish: string | null;
titleNative: string | null;
description: string | null;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
totalLinesSeen: number;
totalLookupCount: number;
totalLookupHits: number;
totalYomitanLookupCount: number;
episodeCount: number;
lastWatchedMs: number;
}
export interface AnimeAnilistEntryRow {
anilistId: number;
titleRomaji: string | null;
titleEnglish: string | null;
season: number | null;
}
export interface AnimeEpisodeRow {
animeId: number;
videoId: number;
canonicalTitle: string;
parsedTitle: string | null;
season: number | null;
episode: number | null;
durationMs: number;
endedMediaMs: number | null;
watched: number;
totalSessions: number;
totalActiveMs: number;
totalCards: number;
totalTokensSeen: number;
totalYomitanLookupCount: number;
lastWatchedMs: number;
}
export interface StreakCalendarRow {
epochDay: number;
totalActiveMin: number;
}
export interface AnimeWordRow {
wordId: number;
headword: string;
word: string;
reading: string;
partOfSpeech: string | null;
frequency: number;
}
export interface EpisodesPerDayRow {
epochDay: number;
episodeCount: number;
}
export interface NewAnimePerDayRow {
epochDay: number;
newAnimeCount: number;
}
export interface WatchTimePerAnimeRow {
epochDay: number;
animeId: number;
animeTitle: string;
totalActiveMin: number;
}
export interface WordDetailRow {
wordId: number;
headword: string;
word: string;
reading: string;
partOfSpeech: string | null;
pos1: string | null;
pos2: string | null;
pos3: string | null;
frequency: number;
firstSeen: number;
lastSeen: number;
}
export interface WordAnimeAppearanceRow {
animeId: number;
animeTitle: string;
occurrenceCount: number;
}
export interface SimilarWordRow {
wordId: number;
headword: string;
word: string;
reading: string;
frequency: number;
}
export interface KanjiDetailRow {
kanjiId: number;
kanji: string;
frequency: number;
firstSeen: number;
lastSeen: number;
}
export interface KanjiAnimeAppearanceRow {
animeId: number;
animeTitle: string;
occurrenceCount: number;
}
export interface KanjiWordRow {
wordId: number;
headword: string;
word: string;
reading: string;
frequency: number;
}
export interface EpisodeCardEventRow {
eventId: number;
sessionId: number;
tsMs: number;
cardsDelta: number;
noteIds: number[];
}