feat: add app-owned YouTube subtitle flow with absPlayer-style parsing (#31)

* fix: harden preload argv parsing for popup windows

* fix: align youtube playback with shared overlay startup

* fix: unwrap mpv youtube streams for anki media mining

* docs: update docs for youtube subtitle and mining flow

* refactor: unify cli and runtime wiring for startup and youtube flow

* feat: update subtitle sidebar overlay behavior

* chore: add shared log-file source for diagnostics

* fix(ci): add changelog fragment for immersion changes

* fix: address CodeRabbit review feedback

* fix: persist canonical title from youtube metadata

* style: format stats library tab

* fix: address latest review feedback

* style: format stats library files

* test: stub launcher youtube deps in CI

* test: isolate launcher youtube flow deps

* test: stub launcher youtube deps in failing case

* test: force x11 backend in launcher ci harness

* test: address latest review feedback

* fix(launcher): preserve user YouTube ytdl raw options

* docs(backlog): update task tracking notes

* fix(immersion): special-case youtube media paths in runtime and tracking

* feat(stats): improve YouTube media metadata and picker key handling

* fix(ci): format stats media library hook

* fix: address latest CodeRabbit review items

* docs: update youtube release notes and docs

* feat: auto-load youtube subtitles before manual picker

* fix: restore app-owned youtube subtitle flow

* docs: update youtube playback docs and config copy

* refactor: remove legacy youtube launcher mode plumbing

* fix: refine youtube subtitle startup binding

* docs: clarify youtube subtitle startup behavior

* fix: address PR #31 latest review follow-ups

* fix: address PR #31 follow-up review comments

* test: harden youtube picker test harness

* udpate backlog

* fix: add timeout to youtube metadata probe

* docs: refresh youtube and stats docs

* update backlog

* update backlog

* chore: release v0.9.0
This commit is contained in:
2026-03-24 00:01:24 -07:00
committed by GitHub
parent c17f0a4080
commit 5feed360ca
219 changed files with 12778 additions and 1052 deletions
@@ -39,6 +39,7 @@ import {
} from '../query.js';
import {
SOURCE_TYPE_LOCAL,
SOURCE_TYPE_REMOTE,
EVENT_CARD_MINED,
EVENT_SUBTITLE_LINE,
EVENT_YOMITAN_LOOKUP,
@@ -279,6 +280,78 @@ test('getAnimeEpisodes falls back to the latest subtitle segment end when sessio
}
});
test('getAnimeEpisodes ignores zero-valued session checkpoints and falls back to subtitle progress', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const stmts = createTrackerPreparedStatements(db);
const videoId = getOrCreateVideoRecord(db, 'remote:https://www.youtube.com/watch?v=zero123', {
canonicalTitle: 'Zero Checkpoint Stream',
sourcePath: null,
sourceUrl: 'https://www.youtube.com/watch?v=zero123',
sourceType: SOURCE_TYPE_REMOTE,
});
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Zero Checkpoint Anime',
canonicalTitle: 'Zero Checkpoint Anime',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: null,
});
linkVideoToAnimeRecord(db, videoId, {
animeId,
parsedBasename: 'watch?v=zero123',
parsedTitle: 'Zero Checkpoint Anime',
parsedSeason: 1,
parsedEpisode: 1,
parserSource: 'fallback',
parserConfidence: 1,
parseMetadataJson: '{"episode":1}',
});
db.prepare('UPDATE imm_videos SET duration_ms = ? WHERE video_id = ?').run(600_000, videoId);
const startedAtMs = 1_200_000;
const sessionId = startSessionRecord(db, videoId, startedAtMs).sessionId;
db.prepare(
`
UPDATE imm_sessions
SET
ended_at_ms = ?,
status = 2,
ended_media_ms = 0,
active_watched_ms = ?,
LAST_UPDATE_DATE = ?
WHERE session_id = ?
`,
).run(startedAtMs + 30_000, 180_000, startedAtMs + 30_000, sessionId);
stmts.eventInsertStmt.run(
sessionId,
startedAtMs + 29_000,
EVENT_SUBTITLE_LINE,
1,
170_000,
185_000,
4,
0,
'{"line":"stream progress"}',
startedAtMs + 29_000,
startedAtMs + 29_000,
);
const [episode] = getAnimeEpisodes(db, animeId);
assert.ok(episode);
assert.equal(episode?.endedMediaMs, 185_000);
assert.equal(episode?.durationMs, 600_000);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('getSessionTimeline returns the full session when no limit is provided', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -1956,6 +2029,100 @@ test('media library and detail queries read lifetime totals', () => {
}
});
test('media library and detail queries include joined youtube metadata when present', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const mediaOne = getOrCreateVideoRecord(db, 'yt:https://www.youtube.com/watch?v=abc123', {
canonicalTitle: 'Local Fallback Title',
sourcePath: null,
sourceUrl: 'https://www.youtube.com/watch?v=abc123',
sourceType: SOURCE_TYPE_REMOTE,
});
db.prepare(
`
INSERT INTO imm_lifetime_media (
video_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(mediaOne, 2, 6_000, 1, 5, 80, 0, 1_000, 9_000, 9_000, 9_000);
db.prepare(
`
INSERT INTO imm_youtube_videos (
video_id,
youtube_video_id,
video_url,
video_title,
video_thumbnail_url,
channel_id,
channel_name,
channel_url,
channel_thumbnail_url,
uploader_id,
uploader_url,
description,
metadata_json,
fetched_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(
mediaOne,
'abc123',
'https://www.youtube.com/watch?v=abc123',
'Tracked Video Title',
'https://i.ytimg.com/vi/abc123/hqdefault.jpg',
'UCcreator123',
'Creator Name',
'https://www.youtube.com/channel/UCcreator123',
'https://yt3.googleusercontent.com/channel-avatar=s88',
'@creator',
'https://www.youtube.com/@creator',
'Video description',
'{"source":"test"}',
10_000,
10_000,
10_000,
);
const library = getMediaLibrary(db);
const detail = getMediaDetail(db, mediaOne);
assert.equal(library.length, 1);
assert.equal(library[0]?.youtubeVideoId, 'abc123');
assert.equal(library[0]?.videoTitle, 'Tracked Video Title');
assert.equal(library[0]?.channelId, 'UCcreator123');
assert.equal(library[0]?.channelName, 'Creator Name');
assert.equal(library[0]?.channelUrl, 'https://www.youtube.com/channel/UCcreator123');
assert.equal(detail?.youtubeVideoId, 'abc123');
assert.equal(detail?.videoUrl, 'https://www.youtube.com/watch?v=abc123');
assert.equal(detail?.videoThumbnailUrl, 'https://i.ytimg.com/vi/abc123/hqdefault.jpg');
assert.equal(detail?.channelThumbnailUrl, 'https://yt3.googleusercontent.com/channel-avatar=s88');
assert.equal(detail?.uploaderId, '@creator');
assert.equal(detail?.uploaderUrl, 'https://www.youtube.com/@creator');
assert.equal(detail?.description, 'Video description');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('cover art queries reuse a shared blob across duplicate anime art rows', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -2679,3 +2846,200 @@ test('deleteSession rebuilds word and kanji aggregates from retained subtitle li
cleanupDbPath(dbPath);
}
});
test('deleteSession removes zero-session media from library and trends', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Delete Me Anime',
canonicalTitle: 'Delete Me Anime',
anilistId: 404_404,
titleRomaji: 'Delete Me Anime',
titleEnglish: 'Delete Me Anime',
titleNative: 'Delete Me Anime',
metadataJson: null,
});
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/delete-last-session.mkv', {
canonicalTitle: 'Delete Last Session',
sourcePath: '/tmp/delete-last-session.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
linkVideoToAnimeRecord(db, videoId, {
animeId,
parsedBasename: 'Delete Last Session',
parsedTitle: 'Delete Me Anime',
parsedSeason: 1,
parsedEpisode: 1,
parserSource: 'fallback',
parserConfidence: 1,
parseMetadataJson: '{"episode":1}',
});
const startedAtMs = 9_000_000;
const endedAtMs = startedAtMs + 120_000;
const rollupDay = Math.floor(startedAtMs / 86_400_000);
const rollupMonth = 197001;
const { sessionId } = startSessionRecord(db, videoId, startedAtMs);
db.prepare(
`
UPDATE imm_sessions
SET
ended_at_ms = ?,
ended_media_ms = ?,
total_watched_ms = ?,
active_watched_ms = ?,
lines_seen = ?,
tokens_seen = ?,
cards_mined = ?,
LAST_UPDATE_DATE = ?
WHERE session_id = ?
`,
).run(endedAtMs, 120000, 120000, 120000, 12, 120, 3, endedAtMs, sessionId);
db.prepare(
`
INSERT INTO imm_lifetime_applied_sessions (
session_id,
applied_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?)
`,
).run(sessionId, endedAtMs, endedAtMs, endedAtMs);
db.prepare(
`
INSERT INTO imm_lifetime_media (
video_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(videoId, 1, 120_000, 3, 12, 120, 0, startedAtMs, endedAtMs, endedAtMs, endedAtMs);
db.prepare(
`
INSERT INTO imm_lifetime_anime (
anime_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
episodes_started,
episodes_completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(animeId, 1, 120000, 3, 12, 120, 1, 0, startedAtMs, endedAtMs, endedAtMs, endedAtMs);
db.prepare(
`
UPDATE imm_lifetime_global
SET
total_sessions = 1,
total_active_ms = 120000,
total_cards = 3,
active_days = 1,
episodes_started = 1,
episodes_completed = 0,
anime_completed = 0,
last_rebuilt_ms = ?,
LAST_UPDATE_DATE = ?
WHERE global_id = 1
`,
).run(endedAtMs, endedAtMs);
db.prepare(
`
INSERT INTO imm_daily_rollups (
rollup_day,
video_id,
total_sessions,
total_active_min,
total_lines_seen,
total_tokens_seen,
total_cards,
cards_per_hour,
tokens_per_min,
lookup_hit_rate,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(rollupDay, videoId, 1, 2, 12, 120, 3, 90, 60, null, endedAtMs, endedAtMs);
db.prepare(
`
INSERT INTO imm_monthly_rollups (
rollup_month,
video_id,
total_sessions,
total_active_min,
total_lines_seen,
total_tokens_seen,
total_cards,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(rollupMonth, videoId, 1, 2, 12, 120, 3, endedAtMs, endedAtMs);
deleteSession(db, sessionId);
assert.deepEqual(getMediaLibrary(db), []);
assert.equal(getMediaDetail(db, videoId) ?? null, null);
assert.deepEqual(getAnimeLibrary(db), []);
assert.equal(getAnimeDetail(db, animeId) ?? null, null);
const trends = getTrendsDashboard(db, 'all', 'day');
assert.deepEqual(trends.activity.watchTime, []);
assert.deepEqual(trends.activity.sessions, []);
const dailyRollups = getDailyRollups(db, 30);
const monthlyRollups = getMonthlyRollups(db, 30);
assert.deepEqual(dailyRollups, []);
assert.deepEqual(monthlyRollups, []);
const lifetimeMediaCount = Number(
(
db.prepare('SELECT COUNT(*) AS total FROM imm_lifetime_media WHERE video_id = ?').get(
videoId,
) as { total: number }
).total,
);
const lifetimeAnimeCount = Number(
(
db.prepare('SELECT COUNT(*) AS total FROM imm_lifetime_anime WHERE anime_id = ?').get(
animeId,
) as { total: number }
).total,
);
const appliedSessionCount = Number(
(
db
.prepare('SELECT COUNT(*) AS total FROM imm_lifetime_applied_sessions WHERE session_id = ?')
.get(sessionId) as { total: number }
).total,
);
assert.equal(lifetimeMediaCount, 0);
assert.equal(lifetimeAnimeCount, 0);
assert.equal(appliedSessionCount, 0);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
+51 -36
View File
@@ -134,6 +134,49 @@ function resetLifetimeSummaries(db: DatabaseSync, nowMs: number): void {
).run(nowMs, nowMs);
}
function rebuildLifetimeSummariesInternal(
db: DatabaseSync,
rebuiltAtMs: number,
): LifetimeRebuildSummary {
const sessions = db
.prepare(
`
SELECT
session_id AS sessionId,
video_id AS videoId,
started_at_ms AS startedAtMs,
ended_at_ms AS endedAtMs,
total_watched_ms AS totalWatchedMs,
active_watched_ms AS activeWatchedMs,
lines_seen AS linesSeen,
tokens_seen AS tokensSeen,
cards_mined AS cardsMined,
lookup_count AS lookupCount,
lookup_hits AS lookupHits,
yomitan_lookup_count AS yomitanLookupCount,
pause_count AS pauseCount,
pause_ms AS pauseMs,
seek_forward_count AS seekForwardCount,
seek_backward_count AS seekBackwardCount,
media_buffer_events AS mediaBufferEvents
FROM imm_sessions
WHERE ended_at_ms IS NOT NULL
ORDER BY started_at_ms ASC, session_id ASC
`,
)
.all() as RetainedSessionRow[];
resetLifetimeSummaries(db, rebuiltAtMs);
for (const session of sessions) {
applySessionLifetimeSummary(db, toRebuildSessionState(session), session.endedAtMs);
}
return {
appliedSessions: sessions.length,
rebuiltAtMs,
};
}
function toRebuildSessionState(row: RetainedSessionRow): SessionState {
return {
sessionId: row.sessionId,
@@ -482,50 +525,22 @@ export function applySessionLifetimeSummary(
export function rebuildLifetimeSummaries(db: DatabaseSync): LifetimeRebuildSummary {
const rebuiltAtMs = Date.now();
const sessions = db
.prepare(
`
SELECT
session_id AS sessionId,
video_id AS videoId,
started_at_ms AS startedAtMs,
ended_at_ms AS endedAtMs,
total_watched_ms AS totalWatchedMs,
active_watched_ms AS activeWatchedMs,
lines_seen AS linesSeen,
tokens_seen AS tokensSeen,
cards_mined AS cardsMined,
lookup_count AS lookupCount,
lookup_hits AS lookupHits,
yomitan_lookup_count AS yomitanLookupCount,
pause_count AS pauseCount,
pause_ms AS pauseMs,
seek_forward_count AS seekForwardCount,
seek_backward_count AS seekBackwardCount,
media_buffer_events AS mediaBufferEvents
FROM imm_sessions
WHERE ended_at_ms IS NOT NULL
ORDER BY started_at_ms ASC, session_id ASC
`,
)
.all() as RetainedSessionRow[];
db.exec('BEGIN');
try {
resetLifetimeSummaries(db, rebuiltAtMs);
for (const session of sessions) {
applySessionLifetimeSummary(db, toRebuildSessionState(session), session.endedAtMs);
}
const summary = rebuildLifetimeSummariesInTransaction(db, rebuiltAtMs);
db.exec('COMMIT');
return summary;
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
}
return {
appliedSessions: sessions.length,
rebuiltAtMs,
};
export function rebuildLifetimeSummariesInTransaction(
db: DatabaseSync,
rebuiltAtMs = Date.now(),
): LifetimeRebuildSummary {
return rebuildLifetimeSummariesInternal(db, rebuiltAtMs);
}
export function reconcileStaleActiveSessions(db: DatabaseSync): number {
@@ -113,6 +113,14 @@ function setLastRollupSampleMs(db: DatabaseSync, sampleMs: number): void {
).run(ROLLUP_STATE_KEY, sampleMs);
}
function resetRollups(db: DatabaseSync): void {
db.exec(`
DELETE FROM imm_daily_rollups;
DELETE FROM imm_monthly_rollups;
`);
setLastRollupSampleMs(db, ZERO_ID);
}
function upsertDailyRollupsForGroups(
db: DatabaseSync,
groups: Array<{ rollupDay: number; videoId: number }>,
@@ -281,8 +289,20 @@ function dedupeGroups<T extends { rollupDay?: number; rollupMonth?: number; vide
}
export function runRollupMaintenance(db: DatabaseSync, forceRebuild = false): void {
if (forceRebuild) {
db.exec('BEGIN IMMEDIATE');
try {
rebuildRollupsInTransaction(db);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
return;
}
const rollupNowMs = Date.now();
const lastRollupSampleMs = forceRebuild ? ZERO_ID : getLastRollupSampleMs(db);
const lastRollupSampleMs = getLastRollupSampleMs(db);
const maxSampleRow = db
.prepare('SELECT MAX(sample_ms) AS maxSampleMs FROM imm_session_telemetry')
@@ -324,6 +344,41 @@ export function runRollupMaintenance(db: DatabaseSync, forceRebuild = false): vo
}
}
export function rebuildRollupsInTransaction(db: DatabaseSync): void {
const rollupNowMs = Date.now();
const maxSampleRow = db
.prepare('SELECT MAX(sample_ms) AS maxSampleMs FROM imm_session_telemetry')
.get() as unknown as RollupTelemetryResult | null;
resetRollups(db);
if (!maxSampleRow?.maxSampleMs) {
return;
}
const affectedGroups = getAffectedRollupGroups(db, ZERO_ID);
if (affectedGroups.length === 0) {
setLastRollupSampleMs(db, Number(maxSampleRow.maxSampleMs));
return;
}
const dailyGroups = dedupeGroups(
affectedGroups.map((group) => ({
rollupDay: group.rollupDay,
videoId: group.videoId,
})),
);
const monthlyGroups = dedupeGroups(
affectedGroups.map((group) => ({
rollupMonth: group.rollupMonth,
videoId: group.videoId,
})),
);
upsertDailyRollupsForGroups(db, dailyGroups, rollupNowMs);
upsertMonthlyRollupsForGroups(db, monthlyGroups, rollupNowMs);
setLastRollupSampleMs(db, Number(maxSampleRow.maxSampleMs));
}
export function runOptimizeMaintenance(db: DatabaseSync): void {
db.exec('PRAGMA optimize');
}
+34 -2
View File
@@ -31,6 +31,8 @@ import type {
VocabularyStatsRow,
} from './types';
import { buildCoverBlobReference, normalizeCoverBlobBytes } from './storage';
import { rebuildLifetimeSummariesInTransaction } from './lifetime';
import { rebuildRollupsInTransaction } from './maintenance';
import { PartOfSpeech, type MergedToken } from '../../../types';
import { shouldExcludeTokenFromVocabularyPersistence } from '../tokenizer/annotation-stage';
import { deriveStoredPartOfSpeech } from '../tokenizer/part-of-speech';
@@ -1746,7 +1748,7 @@ export function getAnimeEpisodes(db: DatabaseSync, animeId: number): AnimeEpisod
v.duration_ms AS durationMs,
(
SELECT COALESCE(
s_recent.ended_media_ms,
NULLIF(s_recent.ended_media_ms, 0),
(
SELECT MAX(line.segment_end_ms)
FROM imm_subtitle_lines line
@@ -1817,6 +1819,17 @@ export function getMediaLibrary(db: DatabaseSync): MediaLibraryRow[] {
COALESCE(lm.total_cards, 0) AS totalCards,
COALESCE(lm.total_tokens_seen, 0) AS totalTokensSeen,
COALESCE(lm.last_watched_ms, 0) AS lastWatchedMs,
yv.youtube_video_id AS youtubeVideoId,
yv.video_url AS videoUrl,
yv.video_title AS videoTitle,
yv.video_thumbnail_url AS videoThumbnailUrl,
yv.channel_id AS channelId,
yv.channel_name AS channelName,
yv.channel_url AS channelUrl,
yv.channel_thumbnail_url AS channelThumbnailUrl,
yv.uploader_id AS uploaderId,
yv.uploader_url AS uploaderUrl,
yv.description AS description,
CASE
WHEN ma.cover_blob_hash IS NOT NULL OR ma.cover_blob IS NOT NULL THEN 1
ELSE 0
@@ -1824,6 +1837,7 @@ export function getMediaLibrary(db: DatabaseSync): MediaLibraryRow[] {
FROM imm_videos v
JOIN imm_lifetime_media lm ON lm.video_id = v.video_id
LEFT JOIN imm_media_art ma ON ma.video_id = v.video_id
LEFT JOIN imm_youtube_videos yv ON yv.video_id = v.video_id
ORDER BY lm.last_watched_ms DESC
`,
)
@@ -1846,9 +1860,21 @@ export function getMediaDetail(db: DatabaseSync, videoId: number): MediaDetailRo
COALESCE(lm.total_lines_seen, 0) AS totalLinesSeen,
COALESCE(SUM(COALESCE(asm.lookupCount, s.lookup_count, 0)), 0) AS totalLookupCount,
COALESCE(SUM(COALESCE(asm.lookupHits, s.lookup_hits, 0)), 0) AS totalLookupHits,
COALESCE(SUM(COALESCE(asm.yomitanLookupCount, s.yomitan_lookup_count, 0)), 0) AS totalYomitanLookupCount
COALESCE(SUM(COALESCE(asm.yomitanLookupCount, s.yomitan_lookup_count, 0)), 0) AS totalYomitanLookupCount,
yv.youtube_video_id AS youtubeVideoId,
yv.video_url AS videoUrl,
yv.video_title AS videoTitle,
yv.video_thumbnail_url AS videoThumbnailUrl,
yv.channel_id AS channelId,
yv.channel_name AS channelName,
yv.channel_url AS channelUrl,
yv.channel_thumbnail_url AS channelThumbnailUrl,
yv.uploader_id AS uploaderId,
yv.uploader_url AS uploaderUrl,
yv.description AS description
FROM imm_videos v
JOIN imm_lifetime_media lm ON lm.video_id = v.video_id
LEFT JOIN imm_youtube_videos yv ON yv.video_id = v.video_id
LEFT JOIN imm_sessions s ON s.video_id = v.video_id
LEFT JOIN active_session_metrics asm ON asm.sessionId = s.session_id
WHERE v.video_id = ?
@@ -2443,6 +2469,8 @@ export function deleteSession(db: DatabaseSync, sessionId: number): void {
try {
deleteSessionsByIds(db, sessionIds);
refreshLexicalAggregates(db, affectedWordIds, affectedKanjiIds);
rebuildLifetimeSummariesInTransaction(db);
rebuildRollupsInTransaction(db);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
@@ -2459,6 +2487,8 @@ export function deleteSessions(db: DatabaseSync, sessionIds: number[]): void {
try {
deleteSessionsByIds(db, sessionIds);
refreshLexicalAggregates(db, affectedWordIds, affectedKanjiIds);
rebuildLifetimeSummariesInTransaction(db);
rebuildRollupsInTransaction(db);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
@@ -2495,6 +2525,8 @@ export function deleteVideo(db: DatabaseSync, videoId: number): void {
cleanupUnusedCoverArtBlobHash(db, artRow?.coverBlobHash ?? null);
db.prepare('DELETE FROM imm_videos WHERE video_id = ?').run(videoId);
refreshLexicalAggregates(db, affectedWordIds, affectedKanjiIds);
rebuildLifetimeSummariesInTransaction(db);
rebuildRollupsInTransaction(db);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
@@ -15,8 +15,14 @@ import {
getOrCreateAnimeRecord,
getOrCreateVideoRecord,
linkVideoToAnimeRecord,
linkYoutubeVideoToAnimeRecord,
} from './storage';
import { EVENT_SUBTITLE_LINE, SESSION_STATUS_ENDED, SOURCE_TYPE_LOCAL } from './types';
import {
EVENT_SUBTITLE_LINE,
SESSION_STATUS_ENDED,
SOURCE_TYPE_LOCAL,
SOURCE_TYPE_REMOTE,
} from './types';
function makeDbPath(): string {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-imm-storage-session-'));
@@ -106,6 +112,7 @@ test('ensureSchema creates immersion core tables', () => {
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
assert.ok(tableNames.has('imm_rollup_state'));
assert.ok(tableNames.has('imm_cover_art_blobs'));
assert.ok(tableNames.has('imm_youtube_videos'));
const videoColumns = new Set(
(
@@ -146,6 +153,114 @@ test('ensureSchema creates immersion core tables', () => {
}
});
test('ensureSchema adds youtube metadata table to existing schema version 15 databases', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
db.exec(`
CREATE TABLE imm_schema_version (
schema_version INTEGER PRIMARY KEY,
applied_at_ms INTEGER NOT NULL
);
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (15, 1000);
CREATE TABLE imm_rollup_state(
state_key TEXT PRIMARY KEY,
state_value INTEGER NOT NULL
);
INSERT INTO imm_rollup_state(state_key, state_value) VALUES ('last_rollup_sample_ms', 123);
CREATE TABLE imm_anime(
anime_id INTEGER PRIMARY KEY AUTOINCREMENT,
normalized_title_key TEXT NOT NULL UNIQUE,
canonical_title TEXT NOT NULL,
anilist_id INTEGER UNIQUE,
title_romaji TEXT,
title_english TEXT,
title_native TEXT,
episodes_total INTEGER,
description TEXT,
metadata_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_videos(
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
video_key TEXT NOT NULL UNIQUE,
anime_id INTEGER,
canonical_title TEXT NOT NULL,
source_type INTEGER NOT NULL,
source_path TEXT,
source_url TEXT,
parsed_basename TEXT,
parsed_title TEXT,
parsed_season INTEGER,
parsed_episode INTEGER,
parser_source TEXT,
parser_confidence REAL,
parse_metadata_json TEXT,
watched INTEGER NOT NULL DEFAULT 0,
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
codec_id INTEGER, container_id INTEGER,
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
bitrate_kbps INTEGER, audio_codec_id INTEGER,
hash_sha256 TEXT, screenshot_path TEXT,
metadata_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER,
FOREIGN KEY(anime_id) REFERENCES imm_anime(anime_id) ON DELETE SET NULL
);
`);
ensureSchema(db);
const tables = new Set(
(
db.prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name LIKE 'imm_%'`).all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(tables.has('imm_youtube_videos'));
const columns = new Set(
(
db.prepare('PRAGMA table_info(imm_youtube_videos)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.deepEqual(
columns,
new Set([
'video_id',
'youtube_video_id',
'video_url',
'video_title',
'video_thumbnail_url',
'channel_id',
'channel_name',
'channel_url',
'channel_thumbnail_url',
'uploader_id',
'uploader_url',
'description',
'metadata_json',
'fetched_at_ms',
'CREATED_DATE',
'LAST_UPDATE_DATE',
]),
);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema creates large-history performance indexes', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -169,6 +284,8 @@ test('ensureSchema creates large-history performance indexes', () => {
assert.ok(indexNames.has('idx_kanji_frequency'));
assert.ok(indexNames.has('idx_media_art_anilist_id'));
assert.ok(indexNames.has('idx_media_art_cover_url'));
assert.ok(indexNames.has('idx_youtube_videos_channel_id'));
assert.ok(indexNames.has('idx_youtube_videos_youtube_video_id'));
} finally {
db.close();
cleanupDbPath(dbPath);
@@ -706,6 +823,123 @@ test('anime rows are reused by normalized parsed title and upgraded with AniList
}
});
test('youtube videos can be regrouped under a shared channel anime identity', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const firstVideoId = getOrCreateVideoRecord(
db,
'remote:https://www.youtube.com/watch?v=video-1',
{
canonicalTitle: 'watch?v video-1',
sourcePath: null,
sourceUrl: 'https://www.youtube.com/watch?v=video-1',
sourceType: SOURCE_TYPE_REMOTE,
},
);
const secondVideoId = getOrCreateVideoRecord(
db,
'remote:https://www.youtube.com/watch?v=video-2',
{
canonicalTitle: 'watch?v video-2',
sourcePath: null,
sourceUrl: 'https://www.youtube.com/watch?v=video-2',
sourceType: SOURCE_TYPE_REMOTE,
},
);
const firstAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'watch?v video-1',
canonicalTitle: 'watch?v video-1',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: null,
});
linkVideoToAnimeRecord(db, firstVideoId, {
animeId: firstAnimeId,
parsedBasename: null,
parsedTitle: 'watch?v video-1',
parsedSeason: null,
parsedEpisode: null,
parserSource: 'fallback',
parserConfidence: 0.2,
parseMetadataJson: '{"source":"fallback"}',
});
const secondAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'watch?v video-2',
canonicalTitle: 'watch?v video-2',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: null,
});
linkVideoToAnimeRecord(db, secondVideoId, {
animeId: secondAnimeId,
parsedBasename: null,
parsedTitle: 'watch?v video-2',
parsedSeason: null,
parsedEpisode: null,
parserSource: 'fallback',
parserConfidence: 0.2,
parseMetadataJson: '{"source":"fallback"}',
});
linkYoutubeVideoToAnimeRecord(db, firstVideoId, {
youtubeVideoId: 'video-1',
videoUrl: 'https://www.youtube.com/watch?v=video-1',
videoTitle: 'Video One',
videoThumbnailUrl: 'https://i.ytimg.com/vi/video-1/hqdefault.jpg',
channelId: 'UC123',
channelName: 'Channel Name',
channelUrl: 'https://www.youtube.com/channel/UC123',
channelThumbnailUrl: 'https://yt3.googleusercontent.com/channel-123=s176-c-k-c0x00ffffff-no-rj',
uploaderId: '@channelname',
uploaderUrl: 'https://www.youtube.com/@channelname',
description: null,
metadataJson: '{"id":"video-1"}',
});
linkYoutubeVideoToAnimeRecord(db, secondVideoId, {
youtubeVideoId: 'video-2',
videoUrl: 'https://www.youtube.com/watch?v=video-2',
videoTitle: 'Video Two',
videoThumbnailUrl: 'https://i.ytimg.com/vi/video-2/hqdefault.jpg',
channelId: 'UC123',
channelName: 'Channel Name',
channelUrl: 'https://www.youtube.com/channel/UC123',
channelThumbnailUrl: 'https://yt3.googleusercontent.com/channel-123=s176-c-k-c0x00ffffff-no-rj',
uploaderId: '@channelname',
uploaderUrl: 'https://www.youtube.com/@channelname',
description: null,
metadataJson: '{"id":"video-2"}',
});
const animeRows = db.prepare('SELECT anime_id, canonical_title FROM imm_anime').all() as Array<{
anime_id: number;
canonical_title: string;
}>;
const videoRows = db
.prepare('SELECT video_id, anime_id, parsed_title FROM imm_videos ORDER BY video_id ASC')
.all() as Array<{ video_id: number; anime_id: number | null; parsed_title: string | null }>;
const channelAnimeRows = animeRows.filter((row) => row.canonical_title === 'Channel Name');
assert.equal(channelAnimeRows.length, 1);
assert.equal(videoRows[0]?.anime_id, channelAnimeRows[0]?.anime_id);
assert.equal(videoRows[1]?.anime_id, channelAnimeRows[0]?.anime_id);
assert.equal(videoRows[0]?.parsed_title, 'Channel Name');
assert.equal(videoRows[1]?.parsed_title, 'Channel Name');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('start/finalize session updates ended_at and status', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
+159 -1
View File
@@ -2,7 +2,7 @@ import { createHash } from 'node:crypto';
import { parseMediaInfo } from '../../../jimaku/utils';
import type { DatabaseSync } from './sqlite';
import { SCHEMA_VERSION } from './types';
import type { QueuedWrite, VideoMetadata } from './types';
import type { QueuedWrite, VideoMetadata, YoutubeVideoMetadata } from './types';
export interface TrackerPreparedStatements {
telemetryInsertStmt: ReturnType<DatabaseSync['prepare']>;
@@ -39,6 +39,41 @@ export interface VideoAnimeLinkInput {
parseMetadataJson: string | null;
}
function buildYoutubeChannelAnimeIdentity(metadata: YoutubeVideoMetadata): {
parsedTitle: string;
canonicalTitle: string;
metadataJson: string;
} | null {
const channelId = metadata.channelId?.trim() || null;
const channelUrl = metadata.channelUrl?.trim() || null;
const channelName = metadata.channelName?.trim() || null;
const uploaderId = metadata.uploaderId?.trim() || null;
const videoTitle = metadata.videoTitle?.trim() || null;
const parsedTitle = channelId
? `youtube-channel:${channelId}`
: channelUrl
? `youtube-channel-url:${channelUrl}`
: channelName
? `youtube-channel-name:${channelName}`
: null;
if (!parsedTitle) {
return null;
}
return {
parsedTitle,
canonicalTitle: channelName || uploaderId || videoTitle || parsedTitle,
metadataJson: JSON.stringify({
source: 'youtube-channel',
channelId,
channelUrl,
channelName,
uploaderId,
}),
};
}
const COVER_BLOB_REFERENCE_PREFIX = '__subminer_cover_blob_ref__:';
const WAL_JOURNAL_SIZE_LIMIT_BYTES = 64 * 1024 * 1024;
@@ -439,6 +474,38 @@ export function linkVideoToAnimeRecord(
);
}
export function linkYoutubeVideoToAnimeRecord(
db: DatabaseSync,
videoId: number,
metadata: YoutubeVideoMetadata,
): number | null {
const identity = buildYoutubeChannelAnimeIdentity(metadata);
if (!identity) {
return null;
}
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: identity.parsedTitle,
canonicalTitle: identity.canonicalTitle,
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: identity.metadataJson,
});
linkVideoToAnimeRecord(db, videoId, {
animeId,
parsedBasename: null,
parsedTitle: identity.canonicalTitle,
parsedSeason: null,
parsedEpisode: null,
parserSource: 'youtube',
parserConfidence: 1,
parseMetadataJson: identity.metadataJson,
});
return animeId;
}
function migrateLegacyAnimeMetadata(db: DatabaseSync): void {
addColumnIfMissing(db, 'imm_videos', 'anime_id', 'INTEGER REFERENCES imm_anime(anime_id)');
addColumnIfMissing(db, 'imm_videos', 'parsed_basename', 'TEXT');
@@ -743,6 +810,27 @@ export function ensureSchema(db: DatabaseSync): void {
FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ON DELETE CASCADE
);
`);
db.exec(`
CREATE TABLE IF NOT EXISTS imm_youtube_videos(
video_id INTEGER PRIMARY KEY,
youtube_video_id TEXT NOT NULL,
video_url TEXT NOT NULL,
video_title TEXT,
video_thumbnail_url TEXT,
channel_id TEXT,
channel_name TEXT,
channel_url TEXT,
channel_thumbnail_url TEXT,
uploader_id TEXT,
uploader_url TEXT,
description TEXT,
metadata_json TEXT,
fetched_at_ms INTEGER NOT NULL,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER,
FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ON DELETE CASCADE
);
`);
db.exec(`
CREATE TABLE IF NOT EXISTS imm_cover_art_blobs(
blob_hash TEXT PRIMARY KEY,
@@ -1134,6 +1222,14 @@ export function ensureSchema(db: DatabaseSync): void {
CREATE INDEX IF NOT EXISTS idx_media_art_cover_url
ON imm_media_art(cover_url)
`);
db.exec(`
CREATE INDEX IF NOT EXISTS idx_youtube_videos_channel_id
ON imm_youtube_videos(channel_id)
`);
db.exec(`
CREATE INDEX IF NOT EXISTS idx_youtube_videos_youtube_video_id
ON imm_youtube_videos(youtube_video_id)
`);
if (currentVersion?.schema_version && currentVersion.schema_version < SCHEMA_VERSION) {
db.exec('DELETE FROM imm_daily_rollups');
@@ -1506,3 +1602,65 @@ export function updateVideoTitleRecord(
`,
).run(canonicalTitle, Date.now(), videoId);
}
export function upsertYoutubeVideoMetadata(
db: DatabaseSync,
videoId: number,
metadata: YoutubeVideoMetadata,
): void {
const nowMs = Date.now();
db.prepare(
`
INSERT INTO imm_youtube_videos (
video_id,
youtube_video_id,
video_url,
video_title,
video_thumbnail_url,
channel_id,
channel_name,
channel_url,
channel_thumbnail_url,
uploader_id,
uploader_url,
description,
metadata_json,
fetched_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(video_id) DO UPDATE SET
youtube_video_id = excluded.youtube_video_id,
video_url = excluded.video_url,
video_title = excluded.video_title,
video_thumbnail_url = excluded.video_thumbnail_url,
channel_id = excluded.channel_id,
channel_name = excluded.channel_name,
channel_url = excluded.channel_url,
channel_thumbnail_url = excluded.channel_thumbnail_url,
uploader_id = excluded.uploader_id,
uploader_url = excluded.uploader_url,
description = excluded.description,
metadata_json = excluded.metadata_json,
fetched_at_ms = excluded.fetched_at_ms,
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
`,
).run(
videoId,
metadata.youtubeVideoId,
metadata.videoUrl,
metadata.videoTitle ?? null,
metadata.videoThumbnailUrl ?? null,
metadata.channelId ?? null,
metadata.channelName ?? null,
metadata.channelUrl ?? null,
metadata.channelThumbnailUrl ?? null,
metadata.uploaderId ?? null,
metadata.uploaderUrl ?? null,
metadata.description ?? null,
metadata.metadataJson ?? null,
nowMs,
nowMs,
nowMs,
);
}
+38 -1
View File
@@ -1,4 +1,4 @@
export const SCHEMA_VERSION = 15;
export const SCHEMA_VERSION = 16;
export const DEFAULT_QUEUE_CAP = 1_000;
export const DEFAULT_BATCH_SIZE = 25;
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
@@ -420,6 +420,17 @@ export interface MediaLibraryRow {
totalTokensSeen: number;
lastWatchedMs: number;
hasCoverArt: number;
youtubeVideoId: string | null;
videoUrl: string | null;
videoTitle: string | null;
videoThumbnailUrl: string | null;
channelId: string | null;
channelName: string | null;
channelUrl: string | null;
channelThumbnailUrl: string | null;
uploaderId: string | null;
uploaderUrl: string | null;
description: string | null;
}
export interface MediaDetailRow {
@@ -434,6 +445,32 @@ export interface MediaDetailRow {
totalLookupCount: number;
totalLookupHits: number;
totalYomitanLookupCount: number;
youtubeVideoId: string | null;
videoUrl: string | null;
videoTitle: string | null;
videoThumbnailUrl: string | null;
channelId: string | null;
channelName: string | null;
channelUrl: string | null;
channelThumbnailUrl: string | null;
uploaderId: string | null;
uploaderUrl: string | null;
description: string | null;
}
export interface YoutubeVideoMetadata {
youtubeVideoId: string;
videoUrl: string;
videoTitle: string | null;
videoThumbnailUrl: string | null;
channelId: string | null;
channelName: string | null;
channelUrl: string | null;
channelThumbnailUrl: string | null;
uploaderId: string | null;
uploaderUrl: string | null;
description: string | null;
metadataJson: string | null;
}
export interface AnimeLibraryRow {