feat: add app-owned YouTube subtitle flow with absPlayer-style parsing (#31)

* fix: harden preload argv parsing for popup windows

* fix: align youtube playback with shared overlay startup

* fix: unwrap mpv youtube streams for anki media mining

* docs: update docs for youtube subtitle and mining flow

* refactor: unify cli and runtime wiring for startup and youtube flow

* feat: update subtitle sidebar overlay behavior

* chore: add shared log-file source for diagnostics

* fix(ci): add changelog fragment for immersion changes

* fix: address CodeRabbit review feedback

* fix: persist canonical title from youtube metadata

* style: format stats library tab

* fix: address latest review feedback

* style: format stats library files

* test: stub launcher youtube deps in CI

* test: isolate launcher youtube flow deps

* test: stub launcher youtube deps in failing case

* test: force x11 backend in launcher ci harness

* test: address latest review feedback

* fix(launcher): preserve user YouTube ytdl raw options

* docs(backlog): update task tracking notes

* fix(immersion): special-case youtube media paths in runtime and tracking

* feat(stats): improve YouTube media metadata and picker key handling

* fix(ci): format stats media library hook

* fix: address latest CodeRabbit review items

* docs: update youtube release notes and docs

* feat: auto-load youtube subtitles before manual picker

* fix: restore app-owned youtube subtitle flow

* docs: update youtube playback docs and config copy

* refactor: remove legacy youtube launcher mode plumbing

* fix: refine youtube subtitle startup binding

* docs: clarify youtube subtitle startup behavior

* fix: address PR #31 latest review follow-ups

* fix: address PR #31 follow-up review comments

* test: harden youtube picker test harness

* udpate backlog

* fix: add timeout to youtube metadata probe

* docs: refresh youtube and stats docs

* update backlog

* update backlog

* chore: release v0.9.0
This commit is contained in:
2026-03-24 00:01:24 -07:00
committed by GitHub
parent c17f0a4080
commit 5feed360ca
219 changed files with 12778 additions and 1052 deletions

View File

@@ -9,6 +9,8 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
start: false,
launchMpv: false,
launchMpvTargets: [],
youtubePlay: undefined,
youtubeMode: undefined,
stop: false,
toggle: false,
toggleVisibleOverlay: false,
@@ -184,6 +186,9 @@ function createDeps(overrides: Partial<CliCommandServiceDeps> = {}) {
runJellyfinCommand: async () => {
calls.push('runJellyfinCommand');
},
runYoutubePlaybackFlow: async (request) => {
calls.push(`runYoutubePlaybackFlow:${request.url}:${request.mode}:${request.source}`);
},
printHelp: () => {
calls.push('printHelp');
},
@@ -207,6 +212,58 @@ function createDeps(overrides: Partial<CliCommandServiceDeps> = {}) {
return { deps, calls, osd };
}
test('handleCliCommand starts youtube playback flow on initial launch', () => {
const { deps, calls } = createDeps({
runYoutubePlaybackFlow: async (request) => {
calls.push(`youtube:${request.url}:${request.mode}`);
},
});
handleCliCommand(
makeArgs({ youtubePlay: 'https://youtube.com/watch?v=abc', youtubeMode: 'generate' }),
'initial',
deps,
);
assert.deepEqual(calls, [
'initializeOverlayRuntime',
'youtube:https://youtube.com/watch?v=abc:generate',
]);
});
test('handleCliCommand defaults youtube mode to download when omitted', () => {
const { deps, calls } = createDeps({
runYoutubePlaybackFlow: async (request) => {
calls.push(`youtube:${request.url}:${request.mode}`);
},
});
handleCliCommand(makeArgs({ youtubePlay: 'https://youtube.com/watch?v=abc' }), 'initial', deps);
assert.deepEqual(calls, [
'initializeOverlayRuntime',
'youtube:https://youtube.com/watch?v=abc:download',
]);
});
test('handleCliCommand reports youtube playback flow failures to logs and OSD', async () => {
const { deps, calls, osd } = createDeps({
runYoutubePlaybackFlow: async () => {
throw new Error('yt failed');
},
});
handleCliCommand(
makeArgs({ youtubePlay: 'https://youtube.com/watch?v=abc', youtubeMode: 'download' }),
'initial',
deps,
);
await new Promise((resolve) => setImmediate(resolve));
assert.ok(calls.some((value) => value.startsWith('error:runYoutubePlaybackFlow failed:')));
assert.ok(osd.includes('YouTube playback failed: yt failed'));
});
test('handleCliCommand reconnects MPV for second-instance --start when overlay runtime is already initialized', () => {
const { deps, calls } = createDeps({
isOverlayRuntimeInitialized: () => true,

View File

@@ -63,6 +63,11 @@ export interface CliCommandServiceDeps {
}>;
runStatsCommand: (args: CliArgs, source: CliCommandSource) => Promise<void>;
runJellyfinCommand: (args: CliArgs) => Promise<void>;
runYoutubePlaybackFlow: (request: {
url: string;
mode: NonNullable<CliArgs['youtubeMode']>;
source: CliCommandSource;
}) => Promise<void>;
printHelp: () => void;
hasMainWindow: () => boolean;
getMultiCopyTimeoutMs: () => number;
@@ -135,6 +140,7 @@ interface AnilistCliRuntime {
interface AppCliRuntime {
stop: () => void;
hasMainWindow: () => boolean;
runYoutubePlaybackFlow: CliCommandServiceDeps['runYoutubePlaybackFlow'];
}
export interface CliCommandDepsRuntimeOptions {
@@ -226,6 +232,7 @@ export function createCliCommandDepsRuntime(
generateCharacterDictionary: options.dictionary.generate,
runStatsCommand: options.jellyfin.runStatsCommand,
runJellyfinCommand: options.jellyfin.runCommand,
runYoutubePlaybackFlow: options.app.runYoutubePlaybackFlow,
printHelp: options.ui.printHelp,
hasMainWindow: options.app.hasMainWindow,
getMultiCopyTimeoutMs: options.getMultiCopyTimeoutMs,
@@ -396,6 +403,19 @@ export function handleCliCommand(
} else if (args.jellyfin) {
deps.openJellyfinSetup();
deps.log('Opened Jellyfin setup flow.');
} else if (args.youtubePlay) {
const youtubeUrl = args.youtubePlay;
runAsyncWithOsd(
() =>
deps.runYoutubePlaybackFlow({
url: youtubeUrl,
mode: args.youtubeMode ?? 'download',
source,
}),
deps,
'runYoutubePlaybackFlow',
'YouTube playback failed',
);
} else if (args.dictionary) {
const shouldStopAfterRun = source === 'initial' && !deps.hasMainWindow();
deps.log('Generating character dictionary for current anime...');

View File

@@ -37,6 +37,21 @@ async function waitForPendingAnimeMetadata(tracker: ImmersionTrackerService): Pr
await privateApi.pendingAnimeMetadataUpdates?.get(videoId);
}
async function waitForCondition(
predicate: () => boolean,
timeoutMs = 1_000,
intervalMs = 10,
): Promise<void> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
if (predicate()) {
return;
}
await new Promise((resolve) => setTimeout(resolve, intervalMs));
}
assert.equal(predicate(), true);
}
function makeMergedToken(overrides: Partial<MergedToken>): MergedToken {
return {
surface: '',
@@ -1269,6 +1284,40 @@ test('flushTelemetry checkpoints latest playback position on the active session
}
});
test('recordSubtitleLine advances session checkpoint progress when playback position is unavailable', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
try {
const Ctor = await loadTrackerCtor();
tracker = new Ctor({ dbPath });
tracker.handleMediaChange('https://stream.example.com/subtitle-progress.m3u8', 'Subtitle Progress');
tracker.recordSubtitleLine('line one', 170, 185, [], null);
const privateApi = tracker as unknown as {
db: DatabaseSync;
sessionState: { sessionId: number } | null;
flushTelemetry: (force?: boolean) => void;
flushNow: () => void;
};
const sessionId = privateApi.sessionState?.sessionId;
assert.ok(sessionId);
privateApi.flushTelemetry(true);
privateApi.flushNow();
const row = privateApi.db
.prepare('SELECT ended_media_ms FROM imm_sessions WHERE session_id = ?')
.get(sessionId) as { ended_media_ms: number | null } | null;
assert.equal(row?.ended_media_ms, 185_000);
} finally {
tracker?.destroy();
cleanupDbPath(dbPath);
}
});
test('deleteSession ignores the currently active session and keeps new writes flushable', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
@@ -2297,6 +2346,565 @@ test('reassignAnimeAnilist preserves existing description when description is om
}
});
test('handleMediaChange stores youtube metadata for new youtube sessions', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
const originalFetch = globalThis.fetch;
const originalPath = process.env.PATH;
let fakeBinDir: string | null = null;
try {
fakeBinDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-yt-dlp-bin-'));
const ytDlpOutput =
'{"id":"abc123","title":"Video Name","webpage_url":"https://www.youtube.com/watch?v=abc123","thumbnail":"https://i.ytimg.com/vi/abc123/hqdefault.jpg","channel_id":"UCcreator123","channel":"Creator Name","channel_url":"https://www.youtube.com/channel/UCcreator123","uploader_id":"@creator","uploader_url":"https://www.youtube.com/@creator","description":"Video description","channel_follower_count":12345,"thumbnails":[{"url":"https://i.ytimg.com/vi/abc123/hqdefault.jpg"},{"url":"https://yt3.googleusercontent.com/channel-avatar=s88"}]}';
if (process.platform === 'win32') {
const outputPath = path.join(fakeBinDir, 'output.json');
fs.writeFileSync(outputPath, ytDlpOutput, 'utf8');
fs.writeFileSync(
path.join(fakeBinDir, 'yt-dlp.cmd'),
'@echo off\r\ntype "%~dp0output.json"\r\n',
'utf8',
);
} else {
const scriptPath = path.join(fakeBinDir, 'yt-dlp');
fs.writeFileSync(
scriptPath,
`#!/bin/sh
printf '%s\n' '${ytDlpOutput}'
`,
{ mode: 0o755 },
);
}
process.env.PATH = `${fakeBinDir}${path.delimiter}${originalPath ?? ''}`;
globalThis.fetch = async (input) => {
const url = String(input);
if (url.includes('/oembed')) {
return new Response(
JSON.stringify({
thumbnail_url: 'https://i.ytimg.com/vi/abc123/hqdefault.jpg',
}),
{ status: 200, headers: { 'Content-Type': 'application/json' } },
);
}
return new Response(new Uint8Array([1, 2, 3]), {
status: 200,
headers: { 'Content-Type': 'image/jpeg' },
});
};
const Ctor = await loadTrackerCtor();
tracker = new Ctor({ dbPath });
tracker.handleMediaChange('https://www.youtube.com/watch?v=abc123', 'Player Title');
const privateApi = tracker as unknown as { db: DatabaseSync };
await waitForCondition(
() => {
const stored = privateApi.db
.prepare("SELECT 1 AS ready FROM imm_youtube_videos WHERE youtube_video_id = 'abc123'")
.get() as { ready: number } | null;
return stored?.ready === 1;
},
5_000,
);
const row = privateApi.db
.prepare(
`
SELECT
youtube_video_id AS youtubeVideoId,
video_url AS videoUrl,
video_title AS videoTitle,
video_thumbnail_url AS videoThumbnailUrl,
channel_id AS channelId,
channel_name AS channelName,
channel_url AS channelUrl,
channel_thumbnail_url AS channelThumbnailUrl,
uploader_id AS uploaderId,
uploader_url AS uploaderUrl,
description AS description
FROM imm_youtube_videos
`,
)
.get() as {
youtubeVideoId: string;
videoUrl: string;
videoTitle: string;
videoThumbnailUrl: string;
channelId: string;
channelName: string;
channelUrl: string;
channelThumbnailUrl: string;
uploaderId: string;
uploaderUrl: string;
description: string;
} | null;
const videoRow = privateApi.db
.prepare(
`
SELECT canonical_title AS canonicalTitle
FROM imm_videos
WHERE video_id = 1
`,
)
.get() as { canonicalTitle: string } | null;
const animeRow = privateApi.db
.prepare(
`
SELECT
a.canonical_title AS canonicalTitle,
v.parsed_title AS parsedTitle,
v.parser_source AS parserSource
FROM imm_videos v
JOIN imm_anime a ON a.anime_id = v.anime_id
WHERE v.video_id = 1
`,
)
.get() as {
canonicalTitle: string;
parsedTitle: string | null;
parserSource: string | null;
} | null;
assert.ok(row);
assert.ok(videoRow);
assert.equal(row.youtubeVideoId, 'abc123');
assert.equal(row.videoUrl, 'https://www.youtube.com/watch?v=abc123');
assert.equal(row.videoTitle, 'Video Name');
assert.equal(row.videoThumbnailUrl, 'https://i.ytimg.com/vi/abc123/hqdefault.jpg');
assert.equal(row.channelId, 'UCcreator123');
assert.equal(row.channelName, 'Creator Name');
assert.equal(row.channelUrl, 'https://www.youtube.com/channel/UCcreator123');
assert.equal(row.channelThumbnailUrl, 'https://yt3.googleusercontent.com/channel-avatar=s88');
assert.equal(row.uploaderId, '@creator');
assert.equal(row.uploaderUrl, 'https://www.youtube.com/@creator');
assert.equal(row.description, 'Video description');
assert.equal(videoRow.canonicalTitle, 'Video Name');
assert.equal(animeRow?.canonicalTitle, 'Creator Name');
assert.equal(animeRow?.parsedTitle, 'Creator Name');
assert.equal(animeRow?.parserSource, 'youtube');
} finally {
process.env.PATH = originalPath;
globalThis.fetch = originalFetch;
tracker?.destroy();
cleanupDbPath(dbPath);
if (fakeBinDir) {
fs.rmSync(fakeBinDir, { recursive: true, force: true });
}
}
});
test('getMediaLibrary lazily backfills missing youtube metadata for existing rows', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
const originalPath = process.env.PATH;
let fakeBinDir: string | null = null;
try {
fakeBinDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-yt-dlp-bin-'));
const ytDlpOutput =
'{"id":"backfill123","title":"Backfilled Video Title","webpage_url":"https://www.youtube.com/watch?v=backfill123","thumbnail":"https://i.ytimg.com/vi/backfill123/hqdefault.jpg","channel_id":"UCbackfill123","channel":"Backfill Creator","channel_url":"https://www.youtube.com/channel/UCbackfill123","uploader_id":"@backfill","uploader_url":"https://www.youtube.com/@backfill","description":"Backfilled description","thumbnails":[{"url":"https://i.ytimg.com/vi/backfill123/hqdefault.jpg"},{"url":"https://yt3.googleusercontent.com/backfill-avatar=s88"}]}';
if (process.platform === 'win32') {
const outputPath = path.join(fakeBinDir, 'output.json');
fs.writeFileSync(outputPath, ytDlpOutput, 'utf8');
fs.writeFileSync(
path.join(fakeBinDir, 'yt-dlp.cmd'),
'@echo off\r\ntype "%~dp0output.json"\r\n',
'utf8',
);
} else {
const scriptPath = path.join(fakeBinDir, 'yt-dlp');
fs.writeFileSync(
scriptPath,
`#!/bin/sh
printf '%s\n' '${ytDlpOutput}'
`,
{ mode: 0o755 },
);
}
process.env.PATH = `${fakeBinDir}${path.delimiter}${originalPath ?? ''}`;
const Ctor = await loadTrackerCtor();
tracker = new Ctor({ dbPath });
const privateApi = tracker as unknown as { db: DatabaseSync };
const nowMs = Date.now();
privateApi.db
.prepare(
`
INSERT INTO imm_videos (
video_key,
canonical_title,
source_type,
source_path,
source_url,
duration_ms,
file_size_bytes,
codec_id,
container_id,
width_px,
height_px,
fps_x100,
bitrate_kbps,
audio_codec_id,
hash_sha256,
screenshot_path,
metadata_json,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
)
.run(
'remote:https://www.youtube.com/watch?v=backfill123',
'watch?v=backfill123',
2,
null,
'https://www.youtube.com/watch?v=backfill123',
0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
nowMs,
nowMs,
);
privateApi.db
.prepare(
`
INSERT INTO imm_lifetime_media (
video_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
)
.run(1, 1, 5_000, 0, 0, 50, 0, nowMs, nowMs, nowMs, nowMs);
const before = await tracker.getMediaLibrary();
assert.equal(before[0]?.channelName ?? null, null);
await waitForCondition(() => {
const row = privateApi.db
.prepare(
`
SELECT
video_title AS videoTitle,
channel_name AS channelName,
channel_thumbnail_url AS channelThumbnailUrl
FROM imm_youtube_videos
WHERE video_id = 1
`,
)
.get() as {
videoTitle: string | null;
channelName: string | null;
channelThumbnailUrl: string | null;
} | null;
return (
row?.videoTitle === 'Backfilled Video Title' &&
row.channelName === 'Backfill Creator' &&
row.channelThumbnailUrl === 'https://yt3.googleusercontent.com/backfill-avatar=s88'
);
}, 5_000);
const after = await tracker.getMediaLibrary();
assert.equal(after[0]?.videoTitle, 'Backfilled Video Title');
assert.equal(after[0]?.channelName, 'Backfill Creator');
assert.equal(
after[0]?.channelThumbnailUrl,
'https://yt3.googleusercontent.com/backfill-avatar=s88',
);
} finally {
process.env.PATH = originalPath;
tracker?.destroy();
cleanupDbPath(dbPath);
if (fakeBinDir) {
fs.rmSync(fakeBinDir, { recursive: true, force: true });
}
}
});
test('getAnimeLibrary lazily relinks youtube rows to channel groupings', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;
try {
const Ctor = await loadTrackerCtor();
tracker = new Ctor({ dbPath });
const privateApi = tracker as unknown as { db: DatabaseSync };
const nowMs = Date.now();
privateApi.db.exec(`
INSERT INTO imm_anime (
anime_id,
normalized_title_key,
canonical_title,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES
(1, 'watch v first', 'watch?v first', ${nowMs}, ${nowMs}),
(2, 'watch v second', 'watch?v second', ${nowMs}, ${nowMs});
INSERT INTO imm_videos (
video_id,
anime_id,
video_key,
canonical_title,
parsed_title,
parser_source,
source_type,
source_path,
source_url,
duration_ms,
file_size_bytes,
codec_id,
container_id,
width_px,
height_px,
fps_x100,
bitrate_kbps,
audio_codec_id,
hash_sha256,
screenshot_path,
metadata_json,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES
(
1,
1,
'remote:https://www.youtube.com/watch?v=first',
'watch?v first',
'watch?v first',
'fallback',
2,
NULL,
'https://www.youtube.com/watch?v=first',
0,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
${nowMs},
${nowMs}
),
(
2,
2,
'remote:https://www.youtube.com/watch?v=second',
'watch?v second',
'watch?v second',
'fallback',
2,
NULL,
'https://www.youtube.com/watch?v=second',
0,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
${nowMs},
${nowMs}
);
INSERT INTO imm_youtube_videos (
video_id,
youtube_video_id,
video_url,
video_title,
video_thumbnail_url,
channel_id,
channel_name,
channel_url,
channel_thumbnail_url,
uploader_id,
uploader_url,
description,
metadata_json,
fetched_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES
(
1,
'first',
'https://www.youtube.com/watch?v=first',
'First Video',
'https://i.ytimg.com/vi/first/hqdefault.jpg',
'UCchannel1',
'Shared Channel',
'https://www.youtube.com/channel/UCchannel1',
'https://yt3.googleusercontent.com/shared=s88',
'@shared',
'https://www.youtube.com/@shared',
NULL,
'{}',
${nowMs},
${nowMs},
${nowMs}
),
(
2,
'second',
'https://www.youtube.com/watch?v=second',
'Second Video',
'https://i.ytimg.com/vi/second/hqdefault.jpg',
'UCchannel1',
'Shared Channel',
'https://www.youtube.com/channel/UCchannel1',
'https://yt3.googleusercontent.com/shared=s88',
'@shared',
'https://www.youtube.com/@shared',
NULL,
'{}',
${nowMs},
${nowMs},
${nowMs}
);
INSERT INTO imm_sessions (
session_id,
session_uuid,
video_id,
started_at_ms,
ended_at_ms,
status,
total_watched_ms,
active_watched_ms,
lines_seen,
tokens_seen,
cards_mined,
lookup_count,
lookup_hits,
yomitan_lookup_count,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES
(
1,
'session-youtube-1',
1,
${nowMs - 70000},
${nowMs - 10000},
2,
65000,
60000,
0,
100,
0,
0,
0,
0,
${nowMs},
${nowMs}
),
(
2,
'session-youtube-2',
2,
${nowMs - 50000},
${nowMs - 5000},
2,
35000,
30000,
0,
50,
0,
0,
0,
0,
${nowMs},
${nowMs}
);
INSERT INTO imm_lifetime_anime (
anime_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
episodes_started,
episodes_completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES
(1, 1, 60000, 0, 0, 100, 1, 0, ${nowMs}, ${nowMs}, ${nowMs}, ${nowMs}),
(2, 1, 30000, 0, 0, 50, 1, 0, ${nowMs}, ${nowMs}, ${nowMs}, ${nowMs});
INSERT INTO imm_lifetime_media (
video_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES
(1, 1, 60000, 0, 0, 100, 0, ${nowMs}, ${nowMs}, ${nowMs}, ${nowMs}),
(2, 1, 30000, 0, 0, 50, 0, ${nowMs}, ${nowMs}, ${nowMs}, ${nowMs});
`);
const rows = await tracker.getAnimeLibrary();
const sharedRows = rows.filter((row) => row.canonicalTitle === 'Shared Channel');
assert.equal(sharedRows.length, 1);
assert.equal(sharedRows[0]?.episodeCount, 2);
const relinked = privateApi.db
.prepare(
`
SELECT a.canonical_title AS canonicalTitle, COUNT(*) AS total
FROM imm_videos v
JOIN imm_anime a ON a.anime_id = v.anime_id
GROUP BY a.anime_id, a.canonical_title
ORDER BY total DESC, a.anime_id ASC
`,
)
.all() as Array<{ canonicalTitle: string; total: number }>;
assert.equal(relinked[0]?.canonicalTitle, 'Shared Channel');
assert.equal(relinked[0]?.total, 2);
} finally {
tracker?.destroy();
cleanupDbPath(dbPath);
}
});
test('reassignAnimeAnilist clears description when description is explicitly null', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;

View File

@@ -1,6 +1,7 @@
import path from 'node:path';
import * as fs from 'node:fs';
import { createLogger } from '../../logger';
import { MediaGenerator } from '../../media-generator';
import type { CoverArtFetcher } from './anilist/cover-art-fetcher';
import { getLocalVideoMetadata, guessAnimeVideoMetadata } from './immersion-tracker/metadata';
import {
@@ -19,9 +20,11 @@ import {
getOrCreateAnimeRecord,
getOrCreateVideoRecord,
linkVideoToAnimeRecord,
linkYoutubeVideoToAnimeRecord,
type TrackerPreparedStatements,
updateVideoMetadataRecord,
updateVideoTitleRecord,
upsertYoutubeVideoMetadata,
} from './immersion-tracker/storage';
import {
applySessionLifetimeSummary,
@@ -153,6 +156,105 @@ import {
import type { MergedToken } from '../../types';
import { shouldExcludeTokenFromVocabularyPersistence } from './tokenizer/annotation-stage';
import { deriveStoredPartOfSpeech } from './tokenizer/part-of-speech';
import { probeYoutubeVideoMetadata } from './youtube/metadata-probe';
const YOUTUBE_COVER_RETRY_MS = 5 * 60 * 1000;
const YOUTUBE_SCREENSHOT_MAX_SECONDS = 120;
const YOUTUBE_OEMBED_ENDPOINT = 'https://www.youtube.com/oembed';
const YOUTUBE_ID_PATTERN = /^[A-Za-z0-9_-]{6,}$/;
const YOUTUBE_METADATA_REFRESH_MS = 24 * 60 * 60 * 1000;
function isValidYouTubeVideoId(value: string | null): boolean {
return Boolean(value && YOUTUBE_ID_PATTERN.test(value));
}
function extractYouTubeVideoId(mediaUrl: string): string | null {
let parsed: URL;
try {
parsed = new URL(mediaUrl);
} catch {
return null;
}
const host = parsed.hostname.toLowerCase();
if (
host !== 'youtu.be' &&
!host.endsWith('.youtu.be') &&
!host.endsWith('youtube.com') &&
!host.endsWith('youtube-nocookie.com')
) {
return null;
}
if (host === 'youtu.be' || host.endsWith('.youtu.be')) {
const pathId = parsed.pathname.split('/').filter(Boolean)[0];
return isValidYouTubeVideoId(pathId ?? null) ? (pathId as string) : null;
}
const queryId = parsed.searchParams.get('v') ?? parsed.searchParams.get('vi') ?? null;
if (isValidYouTubeVideoId(queryId)) {
return queryId;
}
const pathParts = parsed.pathname.split('/').filter(Boolean);
for (let i = 0; i < pathParts.length; i += 1) {
const current = pathParts[i];
const next = pathParts[i + 1];
if (!current || !next) continue;
if (
current.toLowerCase() === 'shorts' ||
current.toLowerCase() === 'embed' ||
current.toLowerCase() === 'live' ||
current.toLowerCase() === 'v'
) {
const candidate = decodeURIComponent(next);
if (isValidYouTubeVideoId(candidate)) {
return candidate;
}
}
}
return null;
}
function buildYouTubeThumbnailUrls(videoId: string): string[] {
return [
`https://i.ytimg.com/vi/${videoId}/maxresdefault.jpg`,
`https://i.ytimg.com/vi/${videoId}/hqdefault.jpg`,
`https://i.ytimg.com/vi/${videoId}/sddefault.jpg`,
`https://i.ytimg.com/vi/${videoId}/mqdefault.jpg`,
`https://i.ytimg.com/vi/${videoId}/0.jpg`,
`https://i.ytimg.com/vi/${videoId}/default.jpg`,
];
}
async function fetchYouTubeOEmbedThumbnail(mediaUrl: string): Promise<string | null> {
try {
const response = await fetch(`${YOUTUBE_OEMBED_ENDPOINT}?url=${encodeURIComponent(mediaUrl)}&format=json`);
if (!response.ok) {
return null;
}
const payload = (await response.json()) as { thumbnail_url?: unknown };
const candidate = typeof payload.thumbnail_url === 'string' ? payload.thumbnail_url.trim() : '';
return candidate || null;
} catch {
return null;
}
}
async function downloadImage(url: string): Promise<Buffer | null> {
try {
const response = await fetch(url);
if (!response.ok) return null;
const contentType = response.headers.get('content-type');
if (contentType && !contentType.toLowerCase().startsWith('image/')) {
return null;
}
return Buffer.from(await response.arrayBuffer());
} catch {
return null;
}
}
export type {
AnimeAnilistEntryRow,
@@ -212,9 +314,11 @@ export class ImmersionTrackerService {
private sessionState: SessionState | null = null;
private currentVideoKey = '';
private currentMediaPathOrUrl = '';
private readonly mediaGenerator = new MediaGenerator();
private readonly preparedStatements: TrackerPreparedStatements;
private coverArtFetcher: CoverArtFetcher | null = null;
private readonly pendingCoverFetches = new Map<number, Promise<boolean>>();
private readonly pendingYoutubeMetadataFetches = new Map<number, Promise<void>>();
private readonly recordedSubtitleKeys = new Set<string>();
private readonly pendingAnimeMetadataUpdates = new Map<number, Promise<void>>();
private readonly resolveLegacyVocabularyPos:
@@ -433,11 +537,15 @@ export class ImmersionTrackerService {
}
async getMediaLibrary(): Promise<MediaLibraryRow[]> {
return getMediaLibrary(this.db);
const rows = getMediaLibrary(this.db);
this.backfillYoutubeMetadataForLibrary();
return rows;
}
async getMediaDetail(videoId: number): Promise<MediaDetailRow | null> {
return getMediaDetail(this.db, videoId);
const detail = getMediaDetail(this.db, videoId);
this.backfillYoutubeMetadataForVideo(videoId);
return detail;
}
async getMediaSessions(videoId: number, limit = 100): Promise<SessionSummaryQueryRow[]> {
@@ -453,10 +561,12 @@ export class ImmersionTrackerService {
}
async getAnimeLibrary(): Promise<AnimeLibraryRow[]> {
this.relinkYoutubeAnimeLibrary();
return getAnimeLibrary(this.db);
}
async getAnimeDetail(animeId: number): Promise<AnimeDetailRow | null> {
this.relinkYoutubeAnimeLibrary();
return getAnimeDetail(this.db, animeId);
}
@@ -647,6 +757,17 @@ export class ImmersionTrackerService {
if (existing?.coverBlob) {
return true;
}
const row = this.db
.prepare('SELECT source_url AS sourceUrl FROM imm_videos WHERE video_id = ?')
.get(videoId) as { sourceUrl: string | null } | null;
const sourceUrl = row?.sourceUrl?.trim() ?? '';
const youtubeVideoId = sourceUrl ? extractYouTubeVideoId(sourceUrl) : null;
if (youtubeVideoId) {
const youtubePromise = this.ensureYouTubeCoverArt(videoId, sourceUrl, youtubeVideoId);
return await youtubePromise;
}
if (!this.coverArtFetcher) {
return false;
}
@@ -677,6 +798,312 @@ export class ImmersionTrackerService {
}
}
private ensureYouTubeCoverArt(videoId: number, sourceUrl: string, youtubeVideoId: string): Promise<boolean> {
const existing = this.pendingCoverFetches.get(videoId);
if (existing) {
return existing;
}
const promise = this.captureYouTubeCoverArt(videoId, sourceUrl, youtubeVideoId);
this.pendingCoverFetches.set(videoId, promise);
promise.finally(() => {
this.pendingCoverFetches.delete(videoId);
});
return promise;
}
private async captureYouTubeCoverArt(
videoId: number,
sourceUrl: string,
youtubeVideoId: string,
): Promise<boolean> {
if (this.isDestroyed) return false;
const existing = await this.getCoverArt(videoId);
if (existing?.coverBlob) {
return true;
}
if (
existing?.coverUrl === null &&
existing?.anilistId === null &&
existing?.coverBlob === null &&
Date.now() - existing.fetchedAtMs < YOUTUBE_COVER_RETRY_MS
) {
return false;
}
let coverBlob: Buffer | null = null;
let coverUrl: string | null = null;
const embedThumbnailUrl = await fetchYouTubeOEmbedThumbnail(sourceUrl);
if (embedThumbnailUrl) {
const embedBlob = await downloadImage(embedThumbnailUrl);
if (embedBlob) {
coverBlob = embedBlob;
coverUrl = embedThumbnailUrl;
}
}
if (!coverBlob) {
for (const candidate of buildYouTubeThumbnailUrls(youtubeVideoId)) {
const candidateBlob = await downloadImage(candidate);
if (!candidateBlob) {
continue;
}
coverBlob = candidateBlob;
coverUrl = candidate;
break;
}
}
if (!coverBlob) {
const durationMs = getVideoDurationMs(this.db, videoId);
const maxSeconds = durationMs > 0 ? Math.min(durationMs / 1000, YOUTUBE_SCREENSHOT_MAX_SECONDS) : null;
const seekSecond = Math.random() * (maxSeconds ?? YOUTUBE_SCREENSHOT_MAX_SECONDS);
try {
coverBlob = await this.mediaGenerator.generateScreenshot(
sourceUrl,
seekSecond,
{
format: 'jpg',
quality: 90,
maxWidth: 640,
},
);
} catch (error) {
this.logger.warn(
'cover-art: failed to generate YouTube screenshot for videoId=%d: %s',
videoId,
(error as Error).message,
);
}
}
if (coverBlob) {
upsertCoverArt(this.db, videoId, {
anilistId: existing?.anilistId ?? null,
coverUrl,
coverBlob,
titleRomaji: existing?.titleRomaji ?? null,
titleEnglish: existing?.titleEnglish ?? null,
episodesTotal: existing?.episodesTotal ?? null,
});
return true;
}
const shouldCacheNoMatch =
!existing || (existing.coverUrl === null && existing.anilistId === null);
if (shouldCacheNoMatch) {
upsertCoverArt(this.db, videoId, {
anilistId: null,
coverUrl: null,
coverBlob: null,
titleRomaji: existing?.titleRomaji ?? null,
titleEnglish: existing?.titleEnglish ?? null,
episodesTotal: existing?.episodesTotal ?? null,
});
}
return false;
}
private captureYoutubeMetadataAsync(videoId: number, sourceUrl: string): void {
if (this.pendingYoutubeMetadataFetches.has(videoId)) {
return;
}
const pending = (async () => {
try {
const metadata = await probeYoutubeVideoMetadata(sourceUrl);
if (!metadata) {
return;
}
upsertYoutubeVideoMetadata(this.db, videoId, metadata);
linkYoutubeVideoToAnimeRecord(this.db, videoId, metadata);
if (metadata.videoTitle?.trim()) {
updateVideoTitleRecord(this.db, videoId, metadata.videoTitle.trim());
}
} catch (error) {
this.logger.debug(
'youtube metadata capture skipped for videoId=%d: %s',
videoId,
(error as Error).message,
);
}
})();
this.pendingYoutubeMetadataFetches.set(videoId, pending);
pending.finally(() => {
this.pendingYoutubeMetadataFetches.delete(videoId);
});
}
private backfillYoutubeMetadataForLibrary(): void {
const candidate = this.db
.prepare(
`
SELECT
v.video_id AS videoId,
v.source_url AS sourceUrl
FROM imm_videos v
JOIN imm_lifetime_media lm ON lm.video_id = v.video_id
LEFT JOIN imm_youtube_videos yv ON yv.video_id = v.video_id
WHERE
v.source_type = ?
AND v.source_url IS NOT NULL
AND (
LOWER(v.source_url) LIKE 'https://www.youtube.com/%'
OR LOWER(v.source_url) LIKE 'https://youtube.com/%'
OR LOWER(v.source_url) LIKE 'https://m.youtube.com/%'
OR LOWER(v.source_url) LIKE 'https://youtu.be/%'
)
AND (
yv.video_id IS NULL
OR yv.video_title IS NULL
OR yv.channel_name IS NULL
OR yv.channel_thumbnail_url IS NULL
)
AND (
yv.fetched_at_ms IS NULL
OR yv.fetched_at_ms <= ?
)
ORDER BY lm.last_watched_ms DESC, v.video_id DESC
LIMIT 1
`,
)
.get(
SOURCE_TYPE_REMOTE,
Date.now() - YOUTUBE_METADATA_REFRESH_MS,
) as { videoId: number; sourceUrl: string | null } | null;
if (!candidate?.sourceUrl) {
return;
}
this.captureYoutubeMetadataAsync(candidate.videoId, candidate.sourceUrl);
}
private backfillYoutubeMetadataForVideo(videoId: number): void {
const candidate = this.db
.prepare(
`
SELECT
v.source_url AS sourceUrl
FROM imm_videos v
LEFT JOIN imm_youtube_videos yv ON yv.video_id = v.video_id
WHERE
v.video_id = ?
AND v.source_type = ?
AND v.source_url IS NOT NULL
AND (
LOWER(v.source_url) LIKE 'https://www.youtube.com/%'
OR LOWER(v.source_url) LIKE 'https://youtube.com/%'
OR LOWER(v.source_url) LIKE 'https://m.youtube.com/%'
OR LOWER(v.source_url) LIKE 'https://youtu.be/%'
)
AND (
yv.video_id IS NULL
OR yv.video_title IS NULL
OR yv.channel_name IS NULL
OR yv.channel_thumbnail_url IS NULL
)
AND (
yv.fetched_at_ms IS NULL
OR yv.fetched_at_ms <= ?
)
`,
)
.get(
videoId,
SOURCE_TYPE_REMOTE,
Date.now() - YOUTUBE_METADATA_REFRESH_MS,
) as { sourceUrl: string | null } | null;
if (!candidate?.sourceUrl) {
return;
}
this.captureYoutubeMetadataAsync(videoId, candidate.sourceUrl);
}
private relinkYoutubeAnimeLibrary(): void {
const candidates = this.db
.prepare(
`
SELECT
v.video_id AS videoId,
yv.youtube_video_id AS youtubeVideoId,
yv.video_url AS videoUrl,
yv.video_title AS videoTitle,
yv.video_thumbnail_url AS videoThumbnailUrl,
yv.channel_id AS channelId,
yv.channel_name AS channelName,
yv.channel_url AS channelUrl,
yv.channel_thumbnail_url AS channelThumbnailUrl,
yv.uploader_id AS uploaderId,
yv.uploader_url AS uploaderUrl,
yv.description AS description,
yv.metadata_json AS metadataJson
FROM imm_videos v
JOIN imm_youtube_videos yv ON yv.video_id = v.video_id
LEFT JOIN imm_anime a ON a.anime_id = v.anime_id
LEFT JOIN imm_lifetime_media lm ON lm.video_id = v.video_id
WHERE
v.source_type = ?
AND v.source_url IS NOT NULL
AND (
LOWER(v.source_url) LIKE 'https://www.youtube.com/%'
OR LOWER(v.source_url) LIKE 'https://youtube.com/%'
OR LOWER(v.source_url) LIKE 'https://m.youtube.com/%'
OR LOWER(v.source_url) LIKE 'https://youtu.be/%'
)
AND yv.channel_name IS NOT NULL
AND (
v.anime_id IS NULL
OR a.metadata_json IS NULL
OR a.metadata_json NOT LIKE '%"source":"youtube-channel"%'
OR a.canonical_title IS NULL
OR TRIM(a.canonical_title) != TRIM(yv.channel_name)
)
ORDER BY lm.last_watched_ms DESC, v.video_id DESC
`,
)
.all(SOURCE_TYPE_REMOTE) as Array<{
videoId: number;
youtubeVideoId: string | null;
videoUrl: string | null;
videoTitle: string | null;
videoThumbnailUrl: string | null;
channelId: string | null;
channelName: string | null;
channelUrl: string | null;
channelThumbnailUrl: string | null;
uploaderId: string | null;
uploaderUrl: string | null;
description: string | null;
metadataJson: string | null;
}>;
if (candidates.length === 0) {
return;
}
for (const candidate of candidates) {
if (!candidate.youtubeVideoId || !candidate.videoUrl) {
continue;
}
linkYoutubeVideoToAnimeRecord(this.db, candidate.videoId, {
youtubeVideoId: candidate.youtubeVideoId,
videoUrl: candidate.videoUrl,
videoTitle: candidate.videoTitle,
videoThumbnailUrl: candidate.videoThumbnailUrl,
channelId: candidate.channelId,
channelName: candidate.channelName,
channelUrl: candidate.channelUrl,
channelThumbnailUrl: candidate.channelThumbnailUrl,
uploaderId: candidate.uploaderId,
uploaderUrl: candidate.uploaderUrl,
description: candidate.description,
metadataJson: candidate.metadataJson,
});
}
rebuildLifetimeSummaryTables(this.db);
}
handleMediaChange(mediaPath: string | null, mediaTitle: string | null): void {
const normalizedPath = normalizeMediaPath(mediaPath);
const normalizedTitle = normalizeText(mediaTitle);
@@ -721,7 +1148,14 @@ export class ImmersionTrackerService {
`Starting immersion session for path=${normalizedPath} videoId=${sessionInfo.videoId}`,
);
this.startSession(sessionInfo.videoId, sessionInfo.startedAtMs);
this.captureAnimeMetadataAsync(sessionInfo.videoId, normalizedPath, normalizedTitle || null);
const youtubeVideoId =
sourceType === SOURCE_TYPE_REMOTE ? extractYouTubeVideoId(normalizedPath) : null;
if (youtubeVideoId) {
void this.ensureYouTubeCoverArt(sessionInfo.videoId, normalizedPath, youtubeVideoId);
this.captureYoutubeMetadataAsync(sessionInfo.videoId, normalizedPath);
} else {
this.captureAnimeMetadataAsync(sessionInfo.videoId, normalizedPath, normalizedTitle || null);
}
this.captureVideoMetadataAsync(sessionInfo.videoId, sourceType, normalizedPath);
}
@@ -749,6 +1183,7 @@ export class ImmersionTrackerService {
}
const startMs = secToMs(startSec);
const endMs = secToMs(endSec);
const subtitleKey = `${startMs}:${cleaned}`;
if (this.recordedSubtitleKeys.has(subtitleKey)) {
return;
@@ -762,6 +1197,9 @@ export class ImmersionTrackerService {
this.sessionState.currentLineIndex += 1;
this.sessionState.linesSeen += 1;
this.sessionState.tokensSeen += tokenCount;
if (this.sessionState.lastMediaMs === null || endMs > this.sessionState.lastMediaMs) {
this.sessionState.lastMediaMs = endMs;
}
this.sessionState.pendingTelemetry = true;
const wordOccurrences = new Map<string, CountedWordOccurrence>();
@@ -811,8 +1249,8 @@ export class ImmersionTrackerService {
sessionId: this.sessionState.sessionId,
videoId: this.sessionState.videoId,
lineIndex: this.sessionState.currentLineIndex,
segmentStartMs: secToMs(startSec),
segmentEndMs: secToMs(endSec),
segmentStartMs: startMs,
segmentEndMs: endMs,
text: cleaned,
secondaryText: secondaryText ?? null,
wordOccurrences: Array.from(wordOccurrences.values()),

View File

@@ -39,6 +39,7 @@ import {
} from '../query.js';
import {
SOURCE_TYPE_LOCAL,
SOURCE_TYPE_REMOTE,
EVENT_CARD_MINED,
EVENT_SUBTITLE_LINE,
EVENT_YOMITAN_LOOKUP,
@@ -279,6 +280,78 @@ test('getAnimeEpisodes falls back to the latest subtitle segment end when sessio
}
});
test('getAnimeEpisodes ignores zero-valued session checkpoints and falls back to subtitle progress', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const stmts = createTrackerPreparedStatements(db);
const videoId = getOrCreateVideoRecord(db, 'remote:https://www.youtube.com/watch?v=zero123', {
canonicalTitle: 'Zero Checkpoint Stream',
sourcePath: null,
sourceUrl: 'https://www.youtube.com/watch?v=zero123',
sourceType: SOURCE_TYPE_REMOTE,
});
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Zero Checkpoint Anime',
canonicalTitle: 'Zero Checkpoint Anime',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: null,
});
linkVideoToAnimeRecord(db, videoId, {
animeId,
parsedBasename: 'watch?v=zero123',
parsedTitle: 'Zero Checkpoint Anime',
parsedSeason: 1,
parsedEpisode: 1,
parserSource: 'fallback',
parserConfidence: 1,
parseMetadataJson: '{"episode":1}',
});
db.prepare('UPDATE imm_videos SET duration_ms = ? WHERE video_id = ?').run(600_000, videoId);
const startedAtMs = 1_200_000;
const sessionId = startSessionRecord(db, videoId, startedAtMs).sessionId;
db.prepare(
`
UPDATE imm_sessions
SET
ended_at_ms = ?,
status = 2,
ended_media_ms = 0,
active_watched_ms = ?,
LAST_UPDATE_DATE = ?
WHERE session_id = ?
`,
).run(startedAtMs + 30_000, 180_000, startedAtMs + 30_000, sessionId);
stmts.eventInsertStmt.run(
sessionId,
startedAtMs + 29_000,
EVENT_SUBTITLE_LINE,
1,
170_000,
185_000,
4,
0,
'{"line":"stream progress"}',
startedAtMs + 29_000,
startedAtMs + 29_000,
);
const [episode] = getAnimeEpisodes(db, animeId);
assert.ok(episode);
assert.equal(episode?.endedMediaMs, 185_000);
assert.equal(episode?.durationMs, 600_000);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('getSessionTimeline returns the full session when no limit is provided', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -1956,6 +2029,100 @@ test('media library and detail queries read lifetime totals', () => {
}
});
test('media library and detail queries include joined youtube metadata when present', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const mediaOne = getOrCreateVideoRecord(db, 'yt:https://www.youtube.com/watch?v=abc123', {
canonicalTitle: 'Local Fallback Title',
sourcePath: null,
sourceUrl: 'https://www.youtube.com/watch?v=abc123',
sourceType: SOURCE_TYPE_REMOTE,
});
db.prepare(
`
INSERT INTO imm_lifetime_media (
video_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(mediaOne, 2, 6_000, 1, 5, 80, 0, 1_000, 9_000, 9_000, 9_000);
db.prepare(
`
INSERT INTO imm_youtube_videos (
video_id,
youtube_video_id,
video_url,
video_title,
video_thumbnail_url,
channel_id,
channel_name,
channel_url,
channel_thumbnail_url,
uploader_id,
uploader_url,
description,
metadata_json,
fetched_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(
mediaOne,
'abc123',
'https://www.youtube.com/watch?v=abc123',
'Tracked Video Title',
'https://i.ytimg.com/vi/abc123/hqdefault.jpg',
'UCcreator123',
'Creator Name',
'https://www.youtube.com/channel/UCcreator123',
'https://yt3.googleusercontent.com/channel-avatar=s88',
'@creator',
'https://www.youtube.com/@creator',
'Video description',
'{"source":"test"}',
10_000,
10_000,
10_000,
);
const library = getMediaLibrary(db);
const detail = getMediaDetail(db, mediaOne);
assert.equal(library.length, 1);
assert.equal(library[0]?.youtubeVideoId, 'abc123');
assert.equal(library[0]?.videoTitle, 'Tracked Video Title');
assert.equal(library[0]?.channelId, 'UCcreator123');
assert.equal(library[0]?.channelName, 'Creator Name');
assert.equal(library[0]?.channelUrl, 'https://www.youtube.com/channel/UCcreator123');
assert.equal(detail?.youtubeVideoId, 'abc123');
assert.equal(detail?.videoUrl, 'https://www.youtube.com/watch?v=abc123');
assert.equal(detail?.videoThumbnailUrl, 'https://i.ytimg.com/vi/abc123/hqdefault.jpg');
assert.equal(detail?.channelThumbnailUrl, 'https://yt3.googleusercontent.com/channel-avatar=s88');
assert.equal(detail?.uploaderId, '@creator');
assert.equal(detail?.uploaderUrl, 'https://www.youtube.com/@creator');
assert.equal(detail?.description, 'Video description');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('cover art queries reuse a shared blob across duplicate anime art rows', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -2679,3 +2846,200 @@ test('deleteSession rebuilds word and kanji aggregates from retained subtitle li
cleanupDbPath(dbPath);
}
});
test('deleteSession removes zero-session media from library and trends', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'Delete Me Anime',
canonicalTitle: 'Delete Me Anime',
anilistId: 404_404,
titleRomaji: 'Delete Me Anime',
titleEnglish: 'Delete Me Anime',
titleNative: 'Delete Me Anime',
metadataJson: null,
});
const videoId = getOrCreateVideoRecord(db, 'local:/tmp/delete-last-session.mkv', {
canonicalTitle: 'Delete Last Session',
sourcePath: '/tmp/delete-last-session.mkv',
sourceUrl: null,
sourceType: SOURCE_TYPE_LOCAL,
});
linkVideoToAnimeRecord(db, videoId, {
animeId,
parsedBasename: 'Delete Last Session',
parsedTitle: 'Delete Me Anime',
parsedSeason: 1,
parsedEpisode: 1,
parserSource: 'fallback',
parserConfidence: 1,
parseMetadataJson: '{"episode":1}',
});
const startedAtMs = 9_000_000;
const endedAtMs = startedAtMs + 120_000;
const rollupDay = Math.floor(startedAtMs / 86_400_000);
const rollupMonth = 197001;
const { sessionId } = startSessionRecord(db, videoId, startedAtMs);
db.prepare(
`
UPDATE imm_sessions
SET
ended_at_ms = ?,
ended_media_ms = ?,
total_watched_ms = ?,
active_watched_ms = ?,
lines_seen = ?,
tokens_seen = ?,
cards_mined = ?,
LAST_UPDATE_DATE = ?
WHERE session_id = ?
`,
).run(endedAtMs, 120000, 120000, 120000, 12, 120, 3, endedAtMs, sessionId);
db.prepare(
`
INSERT INTO imm_lifetime_applied_sessions (
session_id,
applied_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?)
`,
).run(sessionId, endedAtMs, endedAtMs, endedAtMs);
db.prepare(
`
INSERT INTO imm_lifetime_media (
video_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(videoId, 1, 120_000, 3, 12, 120, 0, startedAtMs, endedAtMs, endedAtMs, endedAtMs);
db.prepare(
`
INSERT INTO imm_lifetime_anime (
anime_id,
total_sessions,
total_active_ms,
total_cards,
total_lines_seen,
total_tokens_seen,
episodes_started,
episodes_completed,
first_watched_ms,
last_watched_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(animeId, 1, 120000, 3, 12, 120, 1, 0, startedAtMs, endedAtMs, endedAtMs, endedAtMs);
db.prepare(
`
UPDATE imm_lifetime_global
SET
total_sessions = 1,
total_active_ms = 120000,
total_cards = 3,
active_days = 1,
episodes_started = 1,
episodes_completed = 0,
anime_completed = 0,
last_rebuilt_ms = ?,
LAST_UPDATE_DATE = ?
WHERE global_id = 1
`,
).run(endedAtMs, endedAtMs);
db.prepare(
`
INSERT INTO imm_daily_rollups (
rollup_day,
video_id,
total_sessions,
total_active_min,
total_lines_seen,
total_tokens_seen,
total_cards,
cards_per_hour,
tokens_per_min,
lookup_hit_rate,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(rollupDay, videoId, 1, 2, 12, 120, 3, 90, 60, null, endedAtMs, endedAtMs);
db.prepare(
`
INSERT INTO imm_monthly_rollups (
rollup_month,
video_id,
total_sessions,
total_active_min,
total_lines_seen,
total_tokens_seen,
total_cards,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
).run(rollupMonth, videoId, 1, 2, 12, 120, 3, endedAtMs, endedAtMs);
deleteSession(db, sessionId);
assert.deepEqual(getMediaLibrary(db), []);
assert.equal(getMediaDetail(db, videoId) ?? null, null);
assert.deepEqual(getAnimeLibrary(db), []);
assert.equal(getAnimeDetail(db, animeId) ?? null, null);
const trends = getTrendsDashboard(db, 'all', 'day');
assert.deepEqual(trends.activity.watchTime, []);
assert.deepEqual(trends.activity.sessions, []);
const dailyRollups = getDailyRollups(db, 30);
const monthlyRollups = getMonthlyRollups(db, 30);
assert.deepEqual(dailyRollups, []);
assert.deepEqual(monthlyRollups, []);
const lifetimeMediaCount = Number(
(
db.prepare('SELECT COUNT(*) AS total FROM imm_lifetime_media WHERE video_id = ?').get(
videoId,
) as { total: number }
).total,
);
const lifetimeAnimeCount = Number(
(
db.prepare('SELECT COUNT(*) AS total FROM imm_lifetime_anime WHERE anime_id = ?').get(
animeId,
) as { total: number }
).total,
);
const appliedSessionCount = Number(
(
db
.prepare('SELECT COUNT(*) AS total FROM imm_lifetime_applied_sessions WHERE session_id = ?')
.get(sessionId) as { total: number }
).total,
);
assert.equal(lifetimeMediaCount, 0);
assert.equal(lifetimeAnimeCount, 0);
assert.equal(appliedSessionCount, 0);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});

View File

@@ -134,6 +134,49 @@ function resetLifetimeSummaries(db: DatabaseSync, nowMs: number): void {
).run(nowMs, nowMs);
}
function rebuildLifetimeSummariesInternal(
db: DatabaseSync,
rebuiltAtMs: number,
): LifetimeRebuildSummary {
const sessions = db
.prepare(
`
SELECT
session_id AS sessionId,
video_id AS videoId,
started_at_ms AS startedAtMs,
ended_at_ms AS endedAtMs,
total_watched_ms AS totalWatchedMs,
active_watched_ms AS activeWatchedMs,
lines_seen AS linesSeen,
tokens_seen AS tokensSeen,
cards_mined AS cardsMined,
lookup_count AS lookupCount,
lookup_hits AS lookupHits,
yomitan_lookup_count AS yomitanLookupCount,
pause_count AS pauseCount,
pause_ms AS pauseMs,
seek_forward_count AS seekForwardCount,
seek_backward_count AS seekBackwardCount,
media_buffer_events AS mediaBufferEvents
FROM imm_sessions
WHERE ended_at_ms IS NOT NULL
ORDER BY started_at_ms ASC, session_id ASC
`,
)
.all() as RetainedSessionRow[];
resetLifetimeSummaries(db, rebuiltAtMs);
for (const session of sessions) {
applySessionLifetimeSummary(db, toRebuildSessionState(session), session.endedAtMs);
}
return {
appliedSessions: sessions.length,
rebuiltAtMs,
};
}
function toRebuildSessionState(row: RetainedSessionRow): SessionState {
return {
sessionId: row.sessionId,
@@ -482,50 +525,22 @@ export function applySessionLifetimeSummary(
export function rebuildLifetimeSummaries(db: DatabaseSync): LifetimeRebuildSummary {
const rebuiltAtMs = Date.now();
const sessions = db
.prepare(
`
SELECT
session_id AS sessionId,
video_id AS videoId,
started_at_ms AS startedAtMs,
ended_at_ms AS endedAtMs,
total_watched_ms AS totalWatchedMs,
active_watched_ms AS activeWatchedMs,
lines_seen AS linesSeen,
tokens_seen AS tokensSeen,
cards_mined AS cardsMined,
lookup_count AS lookupCount,
lookup_hits AS lookupHits,
yomitan_lookup_count AS yomitanLookupCount,
pause_count AS pauseCount,
pause_ms AS pauseMs,
seek_forward_count AS seekForwardCount,
seek_backward_count AS seekBackwardCount,
media_buffer_events AS mediaBufferEvents
FROM imm_sessions
WHERE ended_at_ms IS NOT NULL
ORDER BY started_at_ms ASC, session_id ASC
`,
)
.all() as RetainedSessionRow[];
db.exec('BEGIN');
try {
resetLifetimeSummaries(db, rebuiltAtMs);
for (const session of sessions) {
applySessionLifetimeSummary(db, toRebuildSessionState(session), session.endedAtMs);
}
const summary = rebuildLifetimeSummariesInTransaction(db, rebuiltAtMs);
db.exec('COMMIT');
return summary;
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
}
return {
appliedSessions: sessions.length,
rebuiltAtMs,
};
export function rebuildLifetimeSummariesInTransaction(
db: DatabaseSync,
rebuiltAtMs = Date.now(),
): LifetimeRebuildSummary {
return rebuildLifetimeSummariesInternal(db, rebuiltAtMs);
}
export function reconcileStaleActiveSessions(db: DatabaseSync): number {

View File

@@ -113,6 +113,14 @@ function setLastRollupSampleMs(db: DatabaseSync, sampleMs: number): void {
).run(ROLLUP_STATE_KEY, sampleMs);
}
function resetRollups(db: DatabaseSync): void {
db.exec(`
DELETE FROM imm_daily_rollups;
DELETE FROM imm_monthly_rollups;
`);
setLastRollupSampleMs(db, ZERO_ID);
}
function upsertDailyRollupsForGroups(
db: DatabaseSync,
groups: Array<{ rollupDay: number; videoId: number }>,
@@ -281,8 +289,20 @@ function dedupeGroups<T extends { rollupDay?: number; rollupMonth?: number; vide
}
export function runRollupMaintenance(db: DatabaseSync, forceRebuild = false): void {
if (forceRebuild) {
db.exec('BEGIN IMMEDIATE');
try {
rebuildRollupsInTransaction(db);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
throw error;
}
return;
}
const rollupNowMs = Date.now();
const lastRollupSampleMs = forceRebuild ? ZERO_ID : getLastRollupSampleMs(db);
const lastRollupSampleMs = getLastRollupSampleMs(db);
const maxSampleRow = db
.prepare('SELECT MAX(sample_ms) AS maxSampleMs FROM imm_session_telemetry')
@@ -324,6 +344,41 @@ export function runRollupMaintenance(db: DatabaseSync, forceRebuild = false): vo
}
}
export function rebuildRollupsInTransaction(db: DatabaseSync): void {
const rollupNowMs = Date.now();
const maxSampleRow = db
.prepare('SELECT MAX(sample_ms) AS maxSampleMs FROM imm_session_telemetry')
.get() as unknown as RollupTelemetryResult | null;
resetRollups(db);
if (!maxSampleRow?.maxSampleMs) {
return;
}
const affectedGroups = getAffectedRollupGroups(db, ZERO_ID);
if (affectedGroups.length === 0) {
setLastRollupSampleMs(db, Number(maxSampleRow.maxSampleMs));
return;
}
const dailyGroups = dedupeGroups(
affectedGroups.map((group) => ({
rollupDay: group.rollupDay,
videoId: group.videoId,
})),
);
const monthlyGroups = dedupeGroups(
affectedGroups.map((group) => ({
rollupMonth: group.rollupMonth,
videoId: group.videoId,
})),
);
upsertDailyRollupsForGroups(db, dailyGroups, rollupNowMs);
upsertMonthlyRollupsForGroups(db, monthlyGroups, rollupNowMs);
setLastRollupSampleMs(db, Number(maxSampleRow.maxSampleMs));
}
export function runOptimizeMaintenance(db: DatabaseSync): void {
db.exec('PRAGMA optimize');
}

View File

@@ -31,6 +31,8 @@ import type {
VocabularyStatsRow,
} from './types';
import { buildCoverBlobReference, normalizeCoverBlobBytes } from './storage';
import { rebuildLifetimeSummariesInTransaction } from './lifetime';
import { rebuildRollupsInTransaction } from './maintenance';
import { PartOfSpeech, type MergedToken } from '../../../types';
import { shouldExcludeTokenFromVocabularyPersistence } from '../tokenizer/annotation-stage';
import { deriveStoredPartOfSpeech } from '../tokenizer/part-of-speech';
@@ -1746,7 +1748,7 @@ export function getAnimeEpisodes(db: DatabaseSync, animeId: number): AnimeEpisod
v.duration_ms AS durationMs,
(
SELECT COALESCE(
s_recent.ended_media_ms,
NULLIF(s_recent.ended_media_ms, 0),
(
SELECT MAX(line.segment_end_ms)
FROM imm_subtitle_lines line
@@ -1817,6 +1819,17 @@ export function getMediaLibrary(db: DatabaseSync): MediaLibraryRow[] {
COALESCE(lm.total_cards, 0) AS totalCards,
COALESCE(lm.total_tokens_seen, 0) AS totalTokensSeen,
COALESCE(lm.last_watched_ms, 0) AS lastWatchedMs,
yv.youtube_video_id AS youtubeVideoId,
yv.video_url AS videoUrl,
yv.video_title AS videoTitle,
yv.video_thumbnail_url AS videoThumbnailUrl,
yv.channel_id AS channelId,
yv.channel_name AS channelName,
yv.channel_url AS channelUrl,
yv.channel_thumbnail_url AS channelThumbnailUrl,
yv.uploader_id AS uploaderId,
yv.uploader_url AS uploaderUrl,
yv.description AS description,
CASE
WHEN ma.cover_blob_hash IS NOT NULL OR ma.cover_blob IS NOT NULL THEN 1
ELSE 0
@@ -1824,6 +1837,7 @@ export function getMediaLibrary(db: DatabaseSync): MediaLibraryRow[] {
FROM imm_videos v
JOIN imm_lifetime_media lm ON lm.video_id = v.video_id
LEFT JOIN imm_media_art ma ON ma.video_id = v.video_id
LEFT JOIN imm_youtube_videos yv ON yv.video_id = v.video_id
ORDER BY lm.last_watched_ms DESC
`,
)
@@ -1846,9 +1860,21 @@ export function getMediaDetail(db: DatabaseSync, videoId: number): MediaDetailRo
COALESCE(lm.total_lines_seen, 0) AS totalLinesSeen,
COALESCE(SUM(COALESCE(asm.lookupCount, s.lookup_count, 0)), 0) AS totalLookupCount,
COALESCE(SUM(COALESCE(asm.lookupHits, s.lookup_hits, 0)), 0) AS totalLookupHits,
COALESCE(SUM(COALESCE(asm.yomitanLookupCount, s.yomitan_lookup_count, 0)), 0) AS totalYomitanLookupCount
COALESCE(SUM(COALESCE(asm.yomitanLookupCount, s.yomitan_lookup_count, 0)), 0) AS totalYomitanLookupCount,
yv.youtube_video_id AS youtubeVideoId,
yv.video_url AS videoUrl,
yv.video_title AS videoTitle,
yv.video_thumbnail_url AS videoThumbnailUrl,
yv.channel_id AS channelId,
yv.channel_name AS channelName,
yv.channel_url AS channelUrl,
yv.channel_thumbnail_url AS channelThumbnailUrl,
yv.uploader_id AS uploaderId,
yv.uploader_url AS uploaderUrl,
yv.description AS description
FROM imm_videos v
JOIN imm_lifetime_media lm ON lm.video_id = v.video_id
LEFT JOIN imm_youtube_videos yv ON yv.video_id = v.video_id
LEFT JOIN imm_sessions s ON s.video_id = v.video_id
LEFT JOIN active_session_metrics asm ON asm.sessionId = s.session_id
WHERE v.video_id = ?
@@ -2443,6 +2469,8 @@ export function deleteSession(db: DatabaseSync, sessionId: number): void {
try {
deleteSessionsByIds(db, sessionIds);
refreshLexicalAggregates(db, affectedWordIds, affectedKanjiIds);
rebuildLifetimeSummariesInTransaction(db);
rebuildRollupsInTransaction(db);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
@@ -2459,6 +2487,8 @@ export function deleteSessions(db: DatabaseSync, sessionIds: number[]): void {
try {
deleteSessionsByIds(db, sessionIds);
refreshLexicalAggregates(db, affectedWordIds, affectedKanjiIds);
rebuildLifetimeSummariesInTransaction(db);
rebuildRollupsInTransaction(db);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');
@@ -2495,6 +2525,8 @@ export function deleteVideo(db: DatabaseSync, videoId: number): void {
cleanupUnusedCoverArtBlobHash(db, artRow?.coverBlobHash ?? null);
db.prepare('DELETE FROM imm_videos WHERE video_id = ?').run(videoId);
refreshLexicalAggregates(db, affectedWordIds, affectedKanjiIds);
rebuildLifetimeSummariesInTransaction(db);
rebuildRollupsInTransaction(db);
db.exec('COMMIT');
} catch (error) {
db.exec('ROLLBACK');

View File

@@ -15,8 +15,14 @@ import {
getOrCreateAnimeRecord,
getOrCreateVideoRecord,
linkVideoToAnimeRecord,
linkYoutubeVideoToAnimeRecord,
} from './storage';
import { EVENT_SUBTITLE_LINE, SESSION_STATUS_ENDED, SOURCE_TYPE_LOCAL } from './types';
import {
EVENT_SUBTITLE_LINE,
SESSION_STATUS_ENDED,
SOURCE_TYPE_LOCAL,
SOURCE_TYPE_REMOTE,
} from './types';
function makeDbPath(): string {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-imm-storage-session-'));
@@ -106,6 +112,7 @@ test('ensureSchema creates immersion core tables', () => {
assert.ok(tableNames.has('imm_kanji_line_occurrences'));
assert.ok(tableNames.has('imm_rollup_state'));
assert.ok(tableNames.has('imm_cover_art_blobs'));
assert.ok(tableNames.has('imm_youtube_videos'));
const videoColumns = new Set(
(
@@ -146,6 +153,114 @@ test('ensureSchema creates immersion core tables', () => {
}
});
test('ensureSchema adds youtube metadata table to existing schema version 15 databases', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
db.exec(`
CREATE TABLE imm_schema_version (
schema_version INTEGER PRIMARY KEY,
applied_at_ms INTEGER NOT NULL
);
INSERT INTO imm_schema_version(schema_version, applied_at_ms) VALUES (15, 1000);
CREATE TABLE imm_rollup_state(
state_key TEXT PRIMARY KEY,
state_value INTEGER NOT NULL
);
INSERT INTO imm_rollup_state(state_key, state_value) VALUES ('last_rollup_sample_ms', 123);
CREATE TABLE imm_anime(
anime_id INTEGER PRIMARY KEY AUTOINCREMENT,
normalized_title_key TEXT NOT NULL UNIQUE,
canonical_title TEXT NOT NULL,
anilist_id INTEGER UNIQUE,
title_romaji TEXT,
title_english TEXT,
title_native TEXT,
episodes_total INTEGER,
description TEXT,
metadata_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER
);
CREATE TABLE imm_videos(
video_id INTEGER PRIMARY KEY AUTOINCREMENT,
video_key TEXT NOT NULL UNIQUE,
anime_id INTEGER,
canonical_title TEXT NOT NULL,
source_type INTEGER NOT NULL,
source_path TEXT,
source_url TEXT,
parsed_basename TEXT,
parsed_title TEXT,
parsed_season INTEGER,
parsed_episode INTEGER,
parser_source TEXT,
parser_confidence REAL,
parse_metadata_json TEXT,
watched INTEGER NOT NULL DEFAULT 0,
duration_ms INTEGER NOT NULL CHECK(duration_ms>=0),
file_size_bytes INTEGER CHECK(file_size_bytes>=0),
codec_id INTEGER, container_id INTEGER,
width_px INTEGER, height_px INTEGER, fps_x100 INTEGER,
bitrate_kbps INTEGER, audio_codec_id INTEGER,
hash_sha256 TEXT, screenshot_path TEXT,
metadata_json TEXT,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER,
FOREIGN KEY(anime_id) REFERENCES imm_anime(anime_id) ON DELETE SET NULL
);
`);
ensureSchema(db);
const tables = new Set(
(
db.prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name LIKE 'imm_%'`).all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.ok(tables.has('imm_youtube_videos'));
const columns = new Set(
(
db.prepare('PRAGMA table_info(imm_youtube_videos)').all() as Array<{
name: string;
}>
).map((row) => row.name),
);
assert.deepEqual(
columns,
new Set([
'video_id',
'youtube_video_id',
'video_url',
'video_title',
'video_thumbnail_url',
'channel_id',
'channel_name',
'channel_url',
'channel_thumbnail_url',
'uploader_id',
'uploader_url',
'description',
'metadata_json',
'fetched_at_ms',
'CREATED_DATE',
'LAST_UPDATE_DATE',
]),
);
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('ensureSchema creates large-history performance indexes', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
@@ -169,6 +284,8 @@ test('ensureSchema creates large-history performance indexes', () => {
assert.ok(indexNames.has('idx_kanji_frequency'));
assert.ok(indexNames.has('idx_media_art_anilist_id'));
assert.ok(indexNames.has('idx_media_art_cover_url'));
assert.ok(indexNames.has('idx_youtube_videos_channel_id'));
assert.ok(indexNames.has('idx_youtube_videos_youtube_video_id'));
} finally {
db.close();
cleanupDbPath(dbPath);
@@ -706,6 +823,123 @@ test('anime rows are reused by normalized parsed title and upgraded with AniList
}
});
test('youtube videos can be regrouped under a shared channel anime identity', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);
try {
ensureSchema(db);
const firstVideoId = getOrCreateVideoRecord(
db,
'remote:https://www.youtube.com/watch?v=video-1',
{
canonicalTitle: 'watch?v video-1',
sourcePath: null,
sourceUrl: 'https://www.youtube.com/watch?v=video-1',
sourceType: SOURCE_TYPE_REMOTE,
},
);
const secondVideoId = getOrCreateVideoRecord(
db,
'remote:https://www.youtube.com/watch?v=video-2',
{
canonicalTitle: 'watch?v video-2',
sourcePath: null,
sourceUrl: 'https://www.youtube.com/watch?v=video-2',
sourceType: SOURCE_TYPE_REMOTE,
},
);
const firstAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'watch?v video-1',
canonicalTitle: 'watch?v video-1',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: null,
});
linkVideoToAnimeRecord(db, firstVideoId, {
animeId: firstAnimeId,
parsedBasename: null,
parsedTitle: 'watch?v video-1',
parsedSeason: null,
parsedEpisode: null,
parserSource: 'fallback',
parserConfidence: 0.2,
parseMetadataJson: '{"source":"fallback"}',
});
const secondAnimeId = getOrCreateAnimeRecord(db, {
parsedTitle: 'watch?v video-2',
canonicalTitle: 'watch?v video-2',
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: null,
});
linkVideoToAnimeRecord(db, secondVideoId, {
animeId: secondAnimeId,
parsedBasename: null,
parsedTitle: 'watch?v video-2',
parsedSeason: null,
parsedEpisode: null,
parserSource: 'fallback',
parserConfidence: 0.2,
parseMetadataJson: '{"source":"fallback"}',
});
linkYoutubeVideoToAnimeRecord(db, firstVideoId, {
youtubeVideoId: 'video-1',
videoUrl: 'https://www.youtube.com/watch?v=video-1',
videoTitle: 'Video One',
videoThumbnailUrl: 'https://i.ytimg.com/vi/video-1/hqdefault.jpg',
channelId: 'UC123',
channelName: 'Channel Name',
channelUrl: 'https://www.youtube.com/channel/UC123',
channelThumbnailUrl: 'https://yt3.googleusercontent.com/channel-123=s176-c-k-c0x00ffffff-no-rj',
uploaderId: '@channelname',
uploaderUrl: 'https://www.youtube.com/@channelname',
description: null,
metadataJson: '{"id":"video-1"}',
});
linkYoutubeVideoToAnimeRecord(db, secondVideoId, {
youtubeVideoId: 'video-2',
videoUrl: 'https://www.youtube.com/watch?v=video-2',
videoTitle: 'Video Two',
videoThumbnailUrl: 'https://i.ytimg.com/vi/video-2/hqdefault.jpg',
channelId: 'UC123',
channelName: 'Channel Name',
channelUrl: 'https://www.youtube.com/channel/UC123',
channelThumbnailUrl: 'https://yt3.googleusercontent.com/channel-123=s176-c-k-c0x00ffffff-no-rj',
uploaderId: '@channelname',
uploaderUrl: 'https://www.youtube.com/@channelname',
description: null,
metadataJson: '{"id":"video-2"}',
});
const animeRows = db.prepare('SELECT anime_id, canonical_title FROM imm_anime').all() as Array<{
anime_id: number;
canonical_title: string;
}>;
const videoRows = db
.prepare('SELECT video_id, anime_id, parsed_title FROM imm_videos ORDER BY video_id ASC')
.all() as Array<{ video_id: number; anime_id: number | null; parsed_title: string | null }>;
const channelAnimeRows = animeRows.filter((row) => row.canonical_title === 'Channel Name');
assert.equal(channelAnimeRows.length, 1);
assert.equal(videoRows[0]?.anime_id, channelAnimeRows[0]?.anime_id);
assert.equal(videoRows[1]?.anime_id, channelAnimeRows[0]?.anime_id);
assert.equal(videoRows[0]?.parsed_title, 'Channel Name');
assert.equal(videoRows[1]?.parsed_title, 'Channel Name');
} finally {
db.close();
cleanupDbPath(dbPath);
}
});
test('start/finalize session updates ended_at and status', () => {
const dbPath = makeDbPath();
const db = new Database(dbPath);

View File

@@ -2,7 +2,7 @@ import { createHash } from 'node:crypto';
import { parseMediaInfo } from '../../../jimaku/utils';
import type { DatabaseSync } from './sqlite';
import { SCHEMA_VERSION } from './types';
import type { QueuedWrite, VideoMetadata } from './types';
import type { QueuedWrite, VideoMetadata, YoutubeVideoMetadata } from './types';
export interface TrackerPreparedStatements {
telemetryInsertStmt: ReturnType<DatabaseSync['prepare']>;
@@ -39,6 +39,41 @@ export interface VideoAnimeLinkInput {
parseMetadataJson: string | null;
}
function buildYoutubeChannelAnimeIdentity(metadata: YoutubeVideoMetadata): {
parsedTitle: string;
canonicalTitle: string;
metadataJson: string;
} | null {
const channelId = metadata.channelId?.trim() || null;
const channelUrl = metadata.channelUrl?.trim() || null;
const channelName = metadata.channelName?.trim() || null;
const uploaderId = metadata.uploaderId?.trim() || null;
const videoTitle = metadata.videoTitle?.trim() || null;
const parsedTitle = channelId
? `youtube-channel:${channelId}`
: channelUrl
? `youtube-channel-url:${channelUrl}`
: channelName
? `youtube-channel-name:${channelName}`
: null;
if (!parsedTitle) {
return null;
}
return {
parsedTitle,
canonicalTitle: channelName || uploaderId || videoTitle || parsedTitle,
metadataJson: JSON.stringify({
source: 'youtube-channel',
channelId,
channelUrl,
channelName,
uploaderId,
}),
};
}
const COVER_BLOB_REFERENCE_PREFIX = '__subminer_cover_blob_ref__:';
const WAL_JOURNAL_SIZE_LIMIT_BYTES = 64 * 1024 * 1024;
@@ -439,6 +474,38 @@ export function linkVideoToAnimeRecord(
);
}
export function linkYoutubeVideoToAnimeRecord(
db: DatabaseSync,
videoId: number,
metadata: YoutubeVideoMetadata,
): number | null {
const identity = buildYoutubeChannelAnimeIdentity(metadata);
if (!identity) {
return null;
}
const animeId = getOrCreateAnimeRecord(db, {
parsedTitle: identity.parsedTitle,
canonicalTitle: identity.canonicalTitle,
anilistId: null,
titleRomaji: null,
titleEnglish: null,
titleNative: null,
metadataJson: identity.metadataJson,
});
linkVideoToAnimeRecord(db, videoId, {
animeId,
parsedBasename: null,
parsedTitle: identity.canonicalTitle,
parsedSeason: null,
parsedEpisode: null,
parserSource: 'youtube',
parserConfidence: 1,
parseMetadataJson: identity.metadataJson,
});
return animeId;
}
function migrateLegacyAnimeMetadata(db: DatabaseSync): void {
addColumnIfMissing(db, 'imm_videos', 'anime_id', 'INTEGER REFERENCES imm_anime(anime_id)');
addColumnIfMissing(db, 'imm_videos', 'parsed_basename', 'TEXT');
@@ -743,6 +810,27 @@ export function ensureSchema(db: DatabaseSync): void {
FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ON DELETE CASCADE
);
`);
db.exec(`
CREATE TABLE IF NOT EXISTS imm_youtube_videos(
video_id INTEGER PRIMARY KEY,
youtube_video_id TEXT NOT NULL,
video_url TEXT NOT NULL,
video_title TEXT,
video_thumbnail_url TEXT,
channel_id TEXT,
channel_name TEXT,
channel_url TEXT,
channel_thumbnail_url TEXT,
uploader_id TEXT,
uploader_url TEXT,
description TEXT,
metadata_json TEXT,
fetched_at_ms INTEGER NOT NULL,
CREATED_DATE INTEGER,
LAST_UPDATE_DATE INTEGER,
FOREIGN KEY(video_id) REFERENCES imm_videos(video_id) ON DELETE CASCADE
);
`);
db.exec(`
CREATE TABLE IF NOT EXISTS imm_cover_art_blobs(
blob_hash TEXT PRIMARY KEY,
@@ -1134,6 +1222,14 @@ export function ensureSchema(db: DatabaseSync): void {
CREATE INDEX IF NOT EXISTS idx_media_art_cover_url
ON imm_media_art(cover_url)
`);
db.exec(`
CREATE INDEX IF NOT EXISTS idx_youtube_videos_channel_id
ON imm_youtube_videos(channel_id)
`);
db.exec(`
CREATE INDEX IF NOT EXISTS idx_youtube_videos_youtube_video_id
ON imm_youtube_videos(youtube_video_id)
`);
if (currentVersion?.schema_version && currentVersion.schema_version < SCHEMA_VERSION) {
db.exec('DELETE FROM imm_daily_rollups');
@@ -1506,3 +1602,65 @@ export function updateVideoTitleRecord(
`,
).run(canonicalTitle, Date.now(), videoId);
}
export function upsertYoutubeVideoMetadata(
db: DatabaseSync,
videoId: number,
metadata: YoutubeVideoMetadata,
): void {
const nowMs = Date.now();
db.prepare(
`
INSERT INTO imm_youtube_videos (
video_id,
youtube_video_id,
video_url,
video_title,
video_thumbnail_url,
channel_id,
channel_name,
channel_url,
channel_thumbnail_url,
uploader_id,
uploader_url,
description,
metadata_json,
fetched_at_ms,
CREATED_DATE,
LAST_UPDATE_DATE
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(video_id) DO UPDATE SET
youtube_video_id = excluded.youtube_video_id,
video_url = excluded.video_url,
video_title = excluded.video_title,
video_thumbnail_url = excluded.video_thumbnail_url,
channel_id = excluded.channel_id,
channel_name = excluded.channel_name,
channel_url = excluded.channel_url,
channel_thumbnail_url = excluded.channel_thumbnail_url,
uploader_id = excluded.uploader_id,
uploader_url = excluded.uploader_url,
description = excluded.description,
metadata_json = excluded.metadata_json,
fetched_at_ms = excluded.fetched_at_ms,
LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE
`,
).run(
videoId,
metadata.youtubeVideoId,
metadata.videoUrl,
metadata.videoTitle ?? null,
metadata.videoThumbnailUrl ?? null,
metadata.channelId ?? null,
metadata.channelName ?? null,
metadata.channelUrl ?? null,
metadata.channelThumbnailUrl ?? null,
metadata.uploaderId ?? null,
metadata.uploaderUrl ?? null,
metadata.description ?? null,
metadata.metadataJson ?? null,
nowMs,
nowMs,
nowMs,
);
}

View File

@@ -1,4 +1,4 @@
export const SCHEMA_VERSION = 15;
export const SCHEMA_VERSION = 16;
export const DEFAULT_QUEUE_CAP = 1_000;
export const DEFAULT_BATCH_SIZE = 25;
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
@@ -420,6 +420,17 @@ export interface MediaLibraryRow {
totalTokensSeen: number;
lastWatchedMs: number;
hasCoverArt: number;
youtubeVideoId: string | null;
videoUrl: string | null;
videoTitle: string | null;
videoThumbnailUrl: string | null;
channelId: string | null;
channelName: string | null;
channelUrl: string | null;
channelThumbnailUrl: string | null;
uploaderId: string | null;
uploaderUrl: string | null;
description: string | null;
}
export interface MediaDetailRow {
@@ -434,6 +445,32 @@ export interface MediaDetailRow {
totalLookupCount: number;
totalLookupHits: number;
totalYomitanLookupCount: number;
youtubeVideoId: string | null;
videoUrl: string | null;
videoTitle: string | null;
videoThumbnailUrl: string | null;
channelId: string | null;
channelName: string | null;
channelUrl: string | null;
channelThumbnailUrl: string | null;
uploaderId: string | null;
uploaderUrl: string | null;
description: string | null;
}
export interface YoutubeVideoMetadata {
youtubeVideoId: string;
videoUrl: string;
videoTitle: string | null;
videoThumbnailUrl: string | null;
channelId: string | null;
channelName: string | null;
channelUrl: string | null;
channelThumbnailUrl: string | null;
uploaderId: string | null;
uploaderUrl: string | null;
description: string | null;
metadataJson: string | null;
}
export interface AnimeLibraryRow {

View File

@@ -79,7 +79,10 @@ export {
handleOverlayWindowBeforeInputEvent,
isTabInputForMpvForwarding,
} from './overlay-window-input';
export { initializeOverlayRuntime } from './overlay-runtime-init';
export {
initializeOverlayAnkiIntegration,
initializeOverlayRuntime,
} from './overlay-runtime-init';
export { setVisibleOverlayVisible, updateVisibleOverlayVisibility } from './overlay-visibility';
export {
MPV_REQUEST_ID_SECONDARY_SUB_VISIBILITY,

View File

@@ -15,6 +15,7 @@ function createOptions(overrides: Partial<Parameters<typeof handleMpvCommandFrom
PLAY_NEXT_SUBTITLE: '__play-next-subtitle',
SHIFT_SUB_DELAY_TO_NEXT_SUBTITLE_START: '__sub-delay-next-line',
SHIFT_SUB_DELAY_TO_PREVIOUS_SUBTITLE_START: '__sub-delay-prev-line',
YOUTUBE_PICKER_OPEN: '__youtube-picker-open',
},
triggerSubsyncFromConfig: () => {
calls.push('subsync');
@@ -22,6 +23,9 @@ function createOptions(overrides: Partial<Parameters<typeof handleMpvCommandFrom
openRuntimeOptionsPalette: () => {
calls.push('runtime-options');
},
openYoutubeTrackPicker: () => {
calls.push('youtube-picker');
},
runtimeOptionsCycle: () => ({ ok: true }),
showMpvOsd: (text) => {
osd.push(text);
@@ -98,6 +102,14 @@ test('handleMpvCommandFromIpc dispatches special subtitle-delay shift command',
assert.deepEqual(osd, []);
});
test('handleMpvCommandFromIpc dispatches special youtube picker open command', () => {
const { options, calls, sentCommands, osd } = createOptions();
handleMpvCommandFromIpc(['__youtube-picker-open'], options);
assert.deepEqual(calls, ['youtube-picker']);
assert.deepEqual(sentCommands, []);
assert.deepEqual(osd, []);
});
test('handleMpvCommandFromIpc does not forward commands while disconnected', () => {
const { options, sentCommands, osd } = createOptions({
isMpvConnected: () => false,

View File

@@ -14,9 +14,11 @@ export interface HandleMpvCommandFromIpcOptions {
PLAY_NEXT_SUBTITLE: string;
SHIFT_SUB_DELAY_TO_NEXT_SUBTITLE_START: string;
SHIFT_SUB_DELAY_TO_PREVIOUS_SUBTITLE_START: string;
YOUTUBE_PICKER_OPEN: string;
};
triggerSubsyncFromConfig: () => void;
openRuntimeOptionsPalette: () => void;
openYoutubeTrackPicker: () => void | Promise<void>;
runtimeOptionsCycle: (id: RuntimeOptionId, direction: 1 | -1) => RuntimeOptionApplyResult;
showMpvOsd: (text: string) => void;
mpvReplaySubtitle: () => void;
@@ -90,6 +92,11 @@ export function handleMpvCommandFromIpc(
return;
}
if (first === options.specialCommands.YOUTUBE_PICKER_OPEN) {
void options.openYoutubeTrackPicker();
return;
}
if (
first === options.specialCommands.SHIFT_SUB_DELAY_TO_NEXT_SUBTITLE_START ||
first === options.specialCommands.SHIFT_SUB_DELAY_TO_PREVIOUS_SUBTITLE_START

View File

@@ -144,6 +144,7 @@ function createRegisterIpcDeps(overrides: Partial<IpcServiceDeps> = {}): IpcServ
getAnilistQueueStatus: () => ({}),
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
onYoutubePickerResolve: async () => ({ ok: true, message: 'ok' }),
immersionTracker: null,
...overrides,
};
@@ -236,6 +237,7 @@ test('createIpcDepsRuntime wires AniList handlers', async () => {
return { ok: true, message: 'done' };
},
appendClipboardVideoToQueue: () => ({ ok: true, message: 'queued' }),
onYoutubePickerResolve: async () => ({ ok: true, message: 'ok' }),
});
assert.deepEqual(deps.getAnilistStatus(), { tokenStatus: 'resolved' });
@@ -305,6 +307,7 @@ test('registerIpcHandlers rejects malformed runtime-option payloads', async () =
getAnilistQueueStatus: () => ({}),
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
onYoutubePickerResolve: async () => ({ ok: true, message: 'ok' }),
},
registrar,
);
@@ -611,6 +614,7 @@ test('registerIpcHandlers ignores malformed fire-and-forget payloads', () => {
getAnilistQueueStatus: () => ({}),
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
onYoutubePickerResolve: async () => ({ ok: true, message: 'ok' }),
},
registrar,
);
@@ -677,6 +681,7 @@ test('registerIpcHandlers awaits saveControllerPreference through request-respon
getAnilistQueueStatus: () => ({}),
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
onYoutubePickerResolve: async () => ({ ok: true, message: 'ok' }),
},
registrar,
);
@@ -746,6 +751,7 @@ test('registerIpcHandlers rejects malformed controller preference payloads', asy
getAnilistQueueStatus: () => ({}),
retryAnilistQueueNow: async () => ({ ok: true, message: 'ok' }),
appendClipboardVideoToQueue: () => ({ ok: true, message: 'ok' }),
onYoutubePickerResolve: async () => ({ ok: true, message: 'ok' }),
},
registrar,
);

View File

@@ -10,6 +10,8 @@ import type {
SubtitlePosition,
SubsyncManualRunRequest,
SubsyncResult,
YoutubePickerResolveRequest,
YoutubePickerResolveResult,
} from '../../types';
import { IPC_CHANNELS, type OverlayHostedModal } from '../../shared/ipc/contracts';
import {
@@ -23,6 +25,7 @@ import {
parseRuntimeOptionValue,
parseSubtitlePosition,
parseSubsyncManualRunRequest,
parseYoutubePickerResolveRequest,
} from '../../shared/ipc/validators';
const { BrowserWindow, ipcMain } = electron;
@@ -61,6 +64,7 @@ export interface IpcServiceDeps {
getCurrentSecondarySub: () => string;
focusMainWindow: () => void;
runSubsyncManual: (request: SubsyncManualRunRequest) => Promise<SubsyncResult>;
onYoutubePickerResolve: (request: YoutubePickerResolveRequest) => Promise<YoutubePickerResolveResult>;
getAnkiConnectStatus: () => boolean;
getRuntimeOptions: () => unknown;
setRuntimeOption: (id: RuntimeOptionId, value: RuntimeOptionValue) => unknown;
@@ -163,6 +167,7 @@ export interface IpcDepsRuntimeOptions {
getMpvClient: () => MpvClientLike | null;
focusMainWindow: () => void;
runSubsyncManual: (request: SubsyncManualRunRequest) => Promise<SubsyncResult>;
onYoutubePickerResolve: (request: YoutubePickerResolveRequest) => Promise<YoutubePickerResolveResult>;
getAnkiConnectStatus: () => boolean;
getRuntimeOptions: () => unknown;
setRuntimeOption: (id: RuntimeOptionId, value: RuntimeOptionValue) => unknown;
@@ -225,6 +230,7 @@ export function createIpcDepsRuntime(options: IpcDepsRuntimeOptions): IpcService
mainWindow.focus();
},
runSubsyncManual: options.runSubsyncManual,
onYoutubePickerResolve: options.onYoutubePickerResolve,
getAnkiConnectStatus: options.getAnkiConnectStatus,
getRuntimeOptions: options.getRuntimeOptions,
setRuntimeOption: options.setRuntimeOption,
@@ -285,6 +291,14 @@ export function registerIpcHandlers(deps: IpcServiceDeps, ipc: IpcMainRegistrar
deps.onOverlayModalOpened(parsedModal);
});
ipc.handle(IPC_CHANNELS.request.youtubePickerResolve, async (_event: unknown, request: unknown) => {
const parsedRequest = parseYoutubePickerResolveRequest(request);
if (!parsedRequest) {
return { ok: false, message: 'Invalid YouTube picker resolve payload' };
}
return await deps.onYoutubePickerResolve(parsedRequest);
});
ipc.on(IPC_CHANNELS.command.openYomitanSettings, () => {
deps.openYomitanSettings();
});

View File

@@ -1,6 +1,6 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { initializeOverlayRuntime } from './overlay-runtime-init';
import { initializeOverlayAnkiIntegration, initializeOverlayRuntime } from './overlay-runtime-init';
test('initializeOverlayRuntime skips Anki integration when ankiConnect.enabled is false', () => {
let createdIntegrations = 0;
@@ -109,6 +109,136 @@ test('initializeOverlayRuntime starts Anki integration when ankiConnect.enabled
assert.equal(setIntegrationCalls, 1);
});
test('initializeOverlayAnkiIntegration can initialize Anki transport after overlay runtime already exists', () => {
let createdIntegrations = 0;
let startedIntegrations = 0;
let setIntegrationCalls = 0;
initializeOverlayAnkiIntegration({
getResolvedConfig: () => ({
ankiConnect: { enabled: true } as never,
}),
getSubtitleTimingTracker: () => ({}),
getMpvClient: () => ({
send: () => {},
}),
getRuntimeOptionsManager: () => ({
getEffectiveAnkiConnectConfig: (config) => config as never,
}),
createAnkiIntegration: (args) => {
createdIntegrations += 1;
assert.equal(args.config.enabled, true);
return {
start: () => {
startedIntegrations += 1;
},
};
},
setAnkiIntegration: () => {
setIntegrationCalls += 1;
},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => ({
keepNoteId: 11,
deleteNoteId: 12,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
});
assert.equal(createdIntegrations, 1);
assert.equal(startedIntegrations, 1);
assert.equal(setIntegrationCalls, 1);
});
test('initializeOverlayAnkiIntegration returns false when integration already exists', () => {
let createdIntegrations = 0;
let startedIntegrations = 0;
let setIntegrationCalls = 0;
const result = initializeOverlayAnkiIntegration({
getResolvedConfig: () => ({
ankiConnect: { enabled: true } as never,
}),
getSubtitleTimingTracker: () => ({}),
getMpvClient: () => ({
send: () => {},
}),
getRuntimeOptionsManager: () => ({
getEffectiveAnkiConnectConfig: (config) => config as never,
}),
getAnkiIntegration: () => ({}),
createAnkiIntegration: () => {
createdIntegrations += 1;
return {
start: () => {
startedIntegrations += 1;
},
};
},
setAnkiIntegration: () => {
setIntegrationCalls += 1;
},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => ({
keepNoteId: 11,
deleteNoteId: 12,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
});
assert.equal(result, false);
assert.equal(createdIntegrations, 0);
assert.equal(startedIntegrations, 0);
assert.equal(setIntegrationCalls, 0);
});
test('initializeOverlayAnkiIntegration returns false when ankiConnect is disabled', () => {
let createdIntegrations = 0;
let startedIntegrations = 0;
let setIntegrationCalls = 0;
const result = initializeOverlayAnkiIntegration({
getResolvedConfig: () => ({
ankiConnect: { enabled: false } as never,
}),
getSubtitleTimingTracker: () => ({}),
getMpvClient: () => ({
send: () => {},
}),
getRuntimeOptionsManager: () => ({
getEffectiveAnkiConnectConfig: (config) => config as never,
}),
createAnkiIntegration: () => {
createdIntegrations += 1;
return {
start: () => {
startedIntegrations += 1;
},
};
},
setAnkiIntegration: () => {
setIntegrationCalls += 1;
},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => ({
keepNoteId: 11,
deleteNoteId: 12,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
});
assert.equal(result, false);
assert.equal(createdIntegrations, 0);
assert.equal(startedIntegrations, 0);
assert.equal(setIntegrationCalls, 0);
});
test('initializeOverlayRuntime can skip starting Anki integration transport', () => {
let createdIntegrations = 0;
let startedIntegrations = 0;

View File

@@ -47,6 +47,24 @@ function createDefaultAnkiIntegration(args: CreateAnkiIntegrationArgs): AnkiInte
}
export function initializeOverlayRuntime(options: {
getMpvSocketPath: () => string;
getResolvedConfig: () => { ankiConnect?: AnkiConnectConfig; ai?: AiConfig };
getSubtitleTimingTracker: () => unknown | null;
getMpvClient: () => {
send?: (payload: { command: string[] }) => void;
} | null;
getRuntimeOptionsManager: () => {
getEffectiveAnkiConnectConfig: (config?: AnkiConnectConfig) => AnkiConnectConfig;
} | null;
getAnkiIntegration?: () => unknown | null;
setAnkiIntegration: (integration: unknown | null) => void;
showDesktopNotification: (title: string, options: { body?: string; icon?: string }) => void;
createFieldGroupingCallback: () => (
data: KikuFieldGroupingRequestData,
) => Promise<KikuFieldGroupingChoice>;
getKnownWordCacheStatePath: () => string;
shouldStartAnkiIntegration?: () => boolean;
createAnkiIntegration?: (args: CreateAnkiIntegrationArgs) => AnkiIntegrationLike;
backendOverride: string | null;
createMainWindow: () => void;
registerGlobalShortcuts: () => void;
@@ -60,23 +78,6 @@ export function initializeOverlayRuntime(options: {
override?: string | null,
targetMpvSocketPath?: string | null,
) => BaseWindowTracker | null;
getMpvSocketPath: () => string;
getResolvedConfig: () => { ankiConnect?: AnkiConnectConfig; ai?: AiConfig };
getSubtitleTimingTracker: () => unknown | null;
getMpvClient: () => {
send?: (payload: { command: string[] }) => void;
} | null;
getRuntimeOptionsManager: () => {
getEffectiveAnkiConnectConfig: (config?: AnkiConnectConfig) => AnkiConnectConfig;
} | null;
setAnkiIntegration: (integration: unknown | null) => void;
showDesktopNotification: (title: string, options: { body?: string; icon?: string }) => void;
createFieldGroupingCallback: () => (
data: KikuFieldGroupingRequestData,
) => Promise<KikuFieldGroupingChoice>;
getKnownWordCacheStatePath: () => string;
shouldStartAnkiIntegration?: () => boolean;
createAnkiIntegration?: (args: CreateAnkiIntegrationArgs) => AnkiIntegrationLike;
}): void {
options.createMainWindow();
options.registerGlobalShortcuts();
@@ -112,35 +113,64 @@ export function initializeOverlayRuntime(options: {
windowTracker.start();
}
initializeOverlayAnkiIntegration(options);
options.updateVisibleOverlayVisibility();
}
export function initializeOverlayAnkiIntegration(options: {
getResolvedConfig: () => { ankiConnect?: AnkiConnectConfig; ai?: AiConfig };
getSubtitleTimingTracker: () => unknown | null;
getMpvClient: () => {
send?: (payload: { command: string[] }) => void;
} | null;
getRuntimeOptionsManager: () => {
getEffectiveAnkiConnectConfig: (config?: AnkiConnectConfig) => AnkiConnectConfig;
} | null;
getAnkiIntegration?: () => unknown | null;
setAnkiIntegration: (integration: unknown | null) => void;
showDesktopNotification: (title: string, options: { body?: string; icon?: string }) => void;
createFieldGroupingCallback: () => (
data: KikuFieldGroupingRequestData,
) => Promise<KikuFieldGroupingChoice>;
getKnownWordCacheStatePath: () => string;
shouldStartAnkiIntegration?: () => boolean;
createAnkiIntegration?: (args: CreateAnkiIntegrationArgs) => AnkiIntegrationLike;
}): boolean {
if (options.getAnkiIntegration?.()) {
return false;
}
const config = options.getResolvedConfig();
const subtitleTimingTracker = options.getSubtitleTimingTracker();
const mpvClient = options.getMpvClient();
const runtimeOptionsManager = options.getRuntimeOptionsManager();
if (
config.ankiConnect?.enabled === true &&
subtitleTimingTracker &&
mpvClient &&
runtimeOptionsManager
config.ankiConnect?.enabled !== true ||
!subtitleTimingTracker ||
!mpvClient ||
!runtimeOptionsManager
) {
const effectiveAnkiConfig = runtimeOptionsManager.getEffectiveAnkiConnectConfig(
config.ankiConnect,
);
const createAnkiIntegration = options.createAnkiIntegration ?? createDefaultAnkiIntegration;
const integration = createAnkiIntegration({
config: effectiveAnkiConfig,
aiConfig: mergeAiConfig(config.ai, config.ankiConnect?.ai),
subtitleTimingTracker,
mpvClient,
showDesktopNotification: options.showDesktopNotification,
createFieldGroupingCallback: options.createFieldGroupingCallback,
knownWordCacheStatePath: options.getKnownWordCacheStatePath(),
});
if (options.shouldStartAnkiIntegration?.() !== false) {
integration.start();
}
options.setAnkiIntegration(integration);
return false;
}
options.updateVisibleOverlayVisibility();
const effectiveAnkiConfig = runtimeOptionsManager.getEffectiveAnkiConnectConfig(
config.ankiConnect,
);
const createAnkiIntegration = options.createAnkiIntegration ?? createDefaultAnkiIntegration;
const integration = createAnkiIntegration({
config: effectiveAnkiConfig,
aiConfig: mergeAiConfig(config.ai, config.ankiConnect?.ai),
subtitleTimingTracker,
mpvClient,
showDesktopNotification: options.showDesktopNotification,
createFieldGroupingCallback: options.createFieldGroupingCallback,
knownWordCacheStatePath: options.getKnownWordCacheStatePath(),
});
if (options.shouldStartAnkiIntegration?.() !== false) {
integration.start();
}
options.setAnkiIntegration(integration);
return true;
}

View File

@@ -200,6 +200,44 @@ test('Windows visible overlay stays click-through and does not steal focus while
assert.ok(!calls.includes('focus'));
});
test('visible overlay stays hidden while a modal window is active', () => {
const { window, calls } = createMainWindowRecorder();
const tracker: WindowTrackerStub = {
isTracking: () => true,
getGeometry: () => ({ x: 0, y: 0, width: 1280, height: 720 }),
};
updateVisibleOverlayVisibility({
visibleOverlayVisible: true,
modalActive: true,
mainWindow: window as never,
windowTracker: tracker as never,
trackerNotReadyWarningShown: false,
setTrackerNotReadyWarningShown: () => {},
updateVisibleOverlayBounds: () => {
calls.push('update-bounds');
},
ensureOverlayWindowLevel: () => {
calls.push('ensure-level');
},
syncPrimaryOverlayWindowLayer: () => {
calls.push('sync-layer');
},
enforceOverlayLayerOrder: () => {
calls.push('enforce-order');
},
syncOverlayShortcuts: () => {
calls.push('sync-shortcuts');
},
isMacOSPlatform: true,
isWindowsPlatform: false,
} as never);
assert.ok(calls.includes('hide'));
assert.ok(!calls.includes('show'));
assert.ok(!calls.includes('update-bounds'));
});
test('macOS tracked visible overlay stays visible without passively stealing focus', () => {
const { window, calls } = createMainWindowRecorder();
const tracker: WindowTrackerStub = {

View File

@@ -4,6 +4,7 @@ import { WindowGeometry } from '../../types';
export function updateVisibleOverlayVisibility(args: {
visibleOverlayVisible: boolean;
modalActive?: boolean;
forceMousePassthrough?: boolean;
mainWindow: BrowserWindow | null;
windowTracker: BaseWindowTracker | null;
@@ -28,6 +29,12 @@ export function updateVisibleOverlayVisibility(args: {
const mainWindow = args.mainWindow;
if (args.modalActive) {
mainWindow.hide();
args.syncOverlayShortcuts();
return;
}
const showPassiveVisibleOverlay = (): void => {
const forceMousePassthrough = args.forceMousePassthrough === true;
if (args.isWindowsPlatform || forceMousePassthrough) {

View File

@@ -194,3 +194,167 @@ test('runAppReadyRuntime headless refresh bootstraps Anki runtime without UI sta
'run-headless-command',
]);
});
test('runAppReadyRuntime loads Yomitan before headless overlay fallback initialization', async () => {
const calls: string[] = [];
await runAppReadyRuntime({
ensureDefaultConfigBootstrap: () => {
calls.push('bootstrap');
},
loadSubtitlePosition: () => {
calls.push('load-subtitle-position');
},
resolveKeybindings: () => {
calls.push('resolve-keybindings');
},
createMpvClient: () => {
calls.push('create-mpv');
},
reloadConfig: () => {
calls.push('reload-config');
},
getResolvedConfig: () => ({}),
getConfigWarnings: () => [],
logConfigWarning: () => {},
setLogLevel: () => {},
initRuntimeOptionsManager: () => {
calls.push('init-runtime-options');
},
setSecondarySubMode: () => {},
defaultSecondarySubMode: 'hover',
defaultWebsocketPort: 0,
defaultAnnotationWebsocketPort: 0,
defaultTexthookerPort: 0,
hasMpvWebsocketPlugin: () => false,
startSubtitleWebsocket: () => {},
startAnnotationWebsocket: () => {},
startTexthooker: () => {},
log: () => {},
createMecabTokenizerAndCheck: async () => {},
createSubtitleTimingTracker: () => {
calls.push('subtitle-timing');
},
createImmersionTracker: () => {},
startJellyfinRemoteSession: async () => {},
loadYomitanExtension: async () => {
calls.push('load-yomitan');
},
handleFirstRunSetup: async () => {},
prewarmSubtitleDictionaries: async () => {},
startBackgroundWarmups: () => {},
texthookerOnlyMode: false,
shouldAutoInitializeOverlayRuntimeFromConfig: () => false,
setVisibleOverlayVisible: () => {},
initializeOverlayRuntime: () => {
calls.push('init-overlay');
},
handleInitialArgs: () => {
calls.push('handle-initial-args');
},
shouldRunHeadlessInitialCommand: () => true,
shouldUseMinimalStartup: () => false,
shouldSkipHeavyStartup: () => false,
});
assert.deepEqual(calls, [
'bootstrap',
'reload-config',
'init-runtime-options',
'create-mpv',
'subtitle-timing',
'load-yomitan',
'init-overlay',
'handle-initial-args',
]);
});
test('runAppReadyRuntime loads Yomitan before auto-initializing overlay runtime', async () => {
const calls: string[] = [];
await runAppReadyRuntime({
ensureDefaultConfigBootstrap: () => {
calls.push('bootstrap');
},
loadSubtitlePosition: () => {
calls.push('load-subtitle-position');
},
resolveKeybindings: () => {
calls.push('resolve-keybindings');
},
createMpvClient: () => {
calls.push('create-mpv');
},
reloadConfig: () => {
calls.push('reload-config');
},
getResolvedConfig: () => ({
websocket: { enabled: false },
annotationWebsocket: { enabled: false },
texthooker: { launchAtStartup: false },
}),
getConfigWarnings: () => [],
logConfigWarning: () => {},
setLogLevel: () => {
calls.push('set-log-level');
},
initRuntimeOptionsManager: () => {
calls.push('init-runtime-options');
},
setSecondarySubMode: () => {
calls.push('set-secondary-sub-mode');
},
defaultSecondarySubMode: 'hover',
defaultWebsocketPort: 0,
defaultAnnotationWebsocketPort: 0,
defaultTexthookerPort: 0,
hasMpvWebsocketPlugin: () => false,
startSubtitleWebsocket: () => {
calls.push('subtitle-ws');
},
startAnnotationWebsocket: () => {
calls.push('annotation-ws');
},
startTexthooker: () => {
calls.push('texthooker');
},
log: () => {
calls.push('log');
},
createMecabTokenizerAndCheck: async () => {},
createSubtitleTimingTracker: () => {
calls.push('subtitle-timing');
},
createImmersionTracker: () => {
calls.push('immersion');
},
startJellyfinRemoteSession: async () => {},
loadYomitanExtension: async () => {
calls.push('load-yomitan');
},
handleFirstRunSetup: async () => {
calls.push('first-run');
},
prewarmSubtitleDictionaries: async () => {},
startBackgroundWarmups: () => {
calls.push('warmups');
},
texthookerOnlyMode: false,
shouldAutoInitializeOverlayRuntimeFromConfig: () => true,
setVisibleOverlayVisible: () => {
calls.push('visible-overlay');
},
initializeOverlayRuntime: () => {
calls.push('init-overlay');
},
handleInitialArgs: () => {
calls.push('handle-initial-args');
},
shouldUseMinimalStartup: () => false,
shouldSkipHeavyStartup: () => false,
});
assert.ok(calls.indexOf('load-yomitan') !== -1);
assert.ok(calls.indexOf('init-overlay') !== -1);
assert.ok(calls.indexOf('load-yomitan') < calls.indexOf('init-overlay'));
});

View File

@@ -194,6 +194,7 @@ export async function runAppReadyRuntime(deps: AppReadyRuntimeDeps): Promise<voi
} else {
deps.createMpvClient();
deps.createSubtitleTimingTracker();
await deps.loadYomitanExtension();
deps.initializeOverlayRuntime();
deps.handleInitialArgs();
}
@@ -290,13 +291,14 @@ export async function runAppReadyRuntime(deps: AppReadyRuntimeDeps): Promise<voi
if (deps.texthookerOnlyMode) {
deps.log('Texthooker-only mode enabled; skipping overlay window.');
} else if (deps.shouldAutoInitializeOverlayRuntimeFromConfig()) {
await deps.loadYomitanExtension();
deps.setVisibleOverlayVisible(true);
deps.initializeOverlayRuntime();
} else {
deps.log('Overlay runtime deferred: waiting for explicit overlay command.');
await deps.loadYomitanExtension();
}
await deps.loadYomitanExtension();
await deps.handleFirstRunSetup();
deps.handleInitialArgs();
deps.logDebug?.(`App-ready critical path finished in ${now() - startupStartedAtMs}ms.`);

View File

@@ -0,0 +1,18 @@
import type { YoutubeTrackOption } from './track-probe';
import { downloadYoutubeSubtitleTrack, downloadYoutubeSubtitleTracks } from './track-download';
export async function acquireYoutubeSubtitleTrack(input: {
targetUrl: string;
outputDir: string;
track: YoutubeTrackOption;
}): Promise<{ path: string }> {
return await downloadYoutubeSubtitleTrack(input);
}
export async function acquireYoutubeSubtitleTracks(input: {
targetUrl: string;
outputDir: string;
tracks: YoutubeTrackOption[];
}): Promise<Map<string, string>> {
return await downloadYoutubeSubtitleTracks(input);
}

View File

@@ -0,0 +1 @@
export type YoutubeTrackKind = 'manual' | 'auto';

View File

@@ -0,0 +1,41 @@
import type { YoutubeTrackKind } from './kinds';
export type { YoutubeTrackKind };
export function normalizeYoutubeLangCode(value: string): string {
return value.trim().toLowerCase().replace(/_/g, '-').replace(/[^a-z0-9-]+/g, '');
}
export function isJapaneseYoutubeLang(value: string): boolean {
const normalized = normalizeYoutubeLangCode(value);
return (
normalized === 'ja' ||
normalized === 'jp' ||
normalized === 'jpn' ||
normalized === 'japanese' ||
normalized.startsWith('ja-') ||
normalized.startsWith('jp-')
);
}
export function isEnglishYoutubeLang(value: string): boolean {
const normalized = normalizeYoutubeLangCode(value);
return (
normalized === 'en' ||
normalized === 'eng' ||
normalized === 'english' ||
normalized === 'enus' ||
normalized === 'en-us' ||
normalized.startsWith('en-')
);
}
export function formatYoutubeTrackLabel(input: {
language: string;
kind: YoutubeTrackKind;
title?: string;
}): string {
const language = input.language.trim() || 'unknown';
const base = input.title?.trim() || language;
return `${base} (${input.kind})`;
}

View File

@@ -0,0 +1,89 @@
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import test from 'node:test';
import { probeYoutubeVideoMetadata } from './metadata-probe';
async function withTempDir<T>(fn: (dir: string) => Promise<T>): Promise<T> {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-youtube-metadata-probe-'));
try {
return await fn(dir);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
}
function makeFakeYtDlpScript(dir: string, payload: string): void {
const scriptPath = path.join(dir, 'yt-dlp');
const script = `#!/usr/bin/env node
process.stdout.write(${JSON.stringify(payload)});
`;
fs.writeFileSync(scriptPath, script, 'utf8');
if (process.platform !== 'win32') {
fs.chmodSync(scriptPath, 0o755);
}
fs.writeFileSync(scriptPath + '.cmd', `@echo off\r\nnode "${scriptPath}"\r\n`, 'utf8');
}
function makeHangingFakeYtDlpScript(dir: string): void {
const scriptPath = path.join(dir, 'yt-dlp');
const script = `#!/usr/bin/env node
setInterval(() => {}, 1000);
`;
fs.writeFileSync(scriptPath, script, 'utf8');
if (process.platform !== 'win32') {
fs.chmodSync(scriptPath, 0o755);
}
fs.writeFileSync(scriptPath + '.cmd', `@echo off\r\nnode "${scriptPath}"\r\n`, 'utf8');
}
async function withFakeYtDlp<T>(payload: string, fn: () => Promise<T>): Promise<T> {
return await withTempDir(async (root) => {
const binDir = path.join(root, 'bin');
fs.mkdirSync(binDir, { recursive: true });
makeFakeYtDlpScript(binDir, payload);
const originalPath = process.env.PATH ?? '';
process.env.PATH = `${binDir}${path.delimiter}${originalPath}`;
try {
return await fn();
} finally {
process.env.PATH = originalPath;
}
});
}
async function withHangingFakeYtDlp<T>(fn: () => Promise<T>): Promise<T> {
return await withTempDir(async (root) => {
const binDir = path.join(root, 'bin');
fs.mkdirSync(binDir, { recursive: true });
makeHangingFakeYtDlpScript(binDir);
const originalPath = process.env.PATH ?? '';
process.env.PATH = `${binDir}${path.delimiter}${originalPath}`;
try {
return await fn();
} finally {
process.env.PATH = originalPath;
}
});
}
test('probeYoutubeVideoMetadata returns null on malformed yt-dlp JSON', async () => {
await withFakeYtDlp('not-json', async () => {
const result = await probeYoutubeVideoMetadata('https://www.youtube.com/watch?v=abc123');
assert.equal(result, null);
});
});
test(
'probeYoutubeVideoMetadata times out when yt-dlp hangs',
{ timeout: 20_000 },
async () => {
await withHangingFakeYtDlp(async () => {
await assert.rejects(
probeYoutubeVideoMetadata('https://www.youtube.com/watch?v=abc123'),
/timed out after 15000ms/,
);
});
},
);

View File

@@ -0,0 +1,122 @@
import { spawn } from 'node:child_process';
import type { YoutubeVideoMetadata } from '../immersion-tracker/types';
const YOUTUBE_METADATA_PROBE_TIMEOUT_MS = 15_000;
type YtDlpThumbnail = {
url?: string;
width?: number;
height?: number;
};
type YtDlpYoutubeMetadata = {
id?: string;
title?: string;
webpage_url?: string;
thumbnail?: string;
thumbnails?: YtDlpThumbnail[];
channel_id?: string;
channel?: string;
channel_url?: string;
uploader_id?: string;
uploader_url?: string;
description?: string;
};
function runCapture(
command: string,
args: string[],
timeoutMs = YOUTUBE_METADATA_PROBE_TIMEOUT_MS,
): Promise<{ stdout: string; stderr: string }> {
return new Promise((resolve, reject) => {
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
let stdout = '';
let stderr = '';
const timer = setTimeout(() => {
proc.kill();
reject(new Error(`yt-dlp timed out after ${timeoutMs}ms`));
}, timeoutMs);
proc.stdout.setEncoding('utf8');
proc.stderr.setEncoding('utf8');
proc.stdout.on('data', (chunk) => {
stdout += String(chunk);
});
proc.stderr.on('data', (chunk) => {
stderr += String(chunk);
});
proc.once('error', (error) => {
clearTimeout(timer);
reject(error);
});
proc.once('close', (code) => {
clearTimeout(timer);
if (code === 0) {
resolve({ stdout, stderr });
return;
}
reject(new Error(stderr.trim() || `yt-dlp exited with status ${code ?? 'unknown'}`));
});
});
}
function pickChannelThumbnail(thumbnails: YtDlpThumbnail[] | undefined): string | null {
if (!Array.isArray(thumbnails)) return null;
for (const thumbnail of thumbnails) {
const candidate = thumbnail.url?.trim();
if (!candidate) continue;
if (candidate.includes('/vi/')) continue;
if (
typeof thumbnail.width === 'number' &&
typeof thumbnail.height === 'number' &&
thumbnail.width > 0 &&
thumbnail.height > 0
) {
const ratio = thumbnail.width / thumbnail.height;
if (ratio >= 0.8 && ratio <= 1.25) {
return candidate;
}
continue;
}
if (candidate.includes('yt3.googleusercontent.com')) {
return candidate;
}
}
return null;
}
export async function probeYoutubeVideoMetadata(
targetUrl: string,
): Promise<YoutubeVideoMetadata | null> {
const { stdout } = await runCapture('yt-dlp', [
'--dump-single-json',
'--no-warnings',
'--skip-download',
targetUrl,
]);
let info: YtDlpYoutubeMetadata;
try {
info = JSON.parse(stdout) as YtDlpYoutubeMetadata;
} catch {
return null;
}
const youtubeVideoId = info.id?.trim();
const videoUrl = info.webpage_url?.trim() || targetUrl.trim();
if (!youtubeVideoId || !videoUrl) {
return null;
}
return {
youtubeVideoId,
videoUrl,
videoTitle: info.title?.trim() || null,
videoThumbnailUrl: info.thumbnail?.trim() || null,
channelId: info.channel_id?.trim() || null,
channelName: info.channel?.trim() || null,
channelUrl: info.channel_url?.trim() || null,
channelThumbnailUrl: pickChannelThumbnail(info.thumbnails),
uploaderId: info.uploader_id?.trim() || null,
uploaderUrl: info.uploader_url?.trim() || null,
description: info.description?.trim() || null,
metadataJson: JSON.stringify(info),
};
}

View File

@@ -0,0 +1,29 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { retimeYoutubeSubtitle } from './retime';
test('retimeYoutubeSubtitle uses the downloaded subtitle path as-is', async () => {
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-youtube-retime-'));
try {
const primaryPath = path.join(root, 'primary.vtt');
const referencePath = path.join(root, 'reference.vtt');
fs.writeFileSync(primaryPath, 'WEBVTT\n', 'utf8');
fs.writeFileSync(referencePath, 'WEBVTT\n', 'utf8');
const result = await retimeYoutubeSubtitle({
primaryPath,
secondaryPath: referencePath,
});
assert.equal(result.ok, true);
assert.equal(result.strategy, 'none');
assert.equal(result.path, primaryPath);
assert.equal(result.message, 'Using downloaded subtitle as-is (no automatic retime enabled)');
assert.equal(fs.readFileSync(result.path, 'utf8'), 'WEBVTT\n');
} finally {
fs.rmSync(root, { recursive: true, force: true });
}
});

View File

@@ -0,0 +1,11 @@
export async function retimeYoutubeSubtitle(input: {
primaryPath: string;
secondaryPath: string | null;
}): Promise<{ ok: boolean; path: string; strategy: 'none' | 'alass' | 'ffsubsync'; message: string }> {
return {
ok: true,
path: input.primaryPath,
strategy: 'none',
message: `Using downloaded subtitle as-is${input.secondaryPath ? ' (no automatic retime enabled)' : ''}`,
};
}

View File

@@ -0,0 +1,75 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { convertYoutubeTimedTextToVtt, normalizeYoutubeAutoVtt } from './timedtext';
test('convertYoutubeTimedTextToVtt leaves malformed numeric entities literal', () => {
const result = convertYoutubeTimedTextToVtt(
'<timedtext><body><p t="0" d="1000">&#99999999; &#x110000; &#x41;</p></body></timedtext>',
);
assert.equal(
result,
['WEBVTT', '', '00:00:00.000 --> 00:00:01.000', '&#99999999; &#x110000; A', ''].join('\n'),
);
});
test('convertYoutubeTimedTextToVtt does not swallow text after zero-length overlap rows', () => {
const result = convertYoutubeTimedTextToVtt(
[
'<timedtext><body>',
'<p t="0" d="2000">今日は</p>',
'<p t="1000" d="0">今日はいい天気ですね</p>',
'<p t="1000" d="2000">今日はいい天気ですね</p>',
'</body></timedtext>',
].join(''),
);
assert.equal(
result,
[
'WEBVTT',
'',
'00:00:00.000 --> 00:00:00.999',
'今日は',
'',
'00:00:01.000 --> 00:00:03.000',
'いい天気ですね',
'',
].join('\n'),
);
});
test('normalizeYoutubeAutoVtt strips cumulative rolling-caption prefixes', () => {
const result = normalizeYoutubeAutoVtt(
[
'WEBVTT',
'',
'00:00:01.000 --> 00:00:02.000',
'今日は',
'',
'00:00:02.000 --> 00:00:03.000',
'今日はいい天気ですね',
'',
'00:00:03.000 --> 00:00:04.000',
'今日はいい天気ですね本当に',
'',
].join('\n'),
);
assert.equal(
result,
[
'WEBVTT',
'',
'00:00:01.000 --> 00:00:02.000',
'今日は',
'',
'00:00:02.000 --> 00:00:03.000',
'いい天気ですね',
'',
'00:00:03.000 --> 00:00:04.000',
'本当に',
'',
].join('\n'),
);
});

View File

@@ -0,0 +1,166 @@
interface YoutubeTimedTextRow {
startMs: number;
durationMs: number;
text: string;
}
const YOUTUBE_TIMEDTEXT_EXTENSIONS = new Set(['srv1', 'srv2', 'srv3', 'ytsrv3']);
function decodeNumericEntity(match: string, codePoint: number): string {
if (
!Number.isInteger(codePoint) ||
codePoint < 0 ||
codePoint > 0x10ffff ||
(codePoint >= 0xd800 && codePoint <= 0xdfff)
) {
return match;
}
return String.fromCodePoint(codePoint);
}
function decodeHtmlEntities(value: string): string {
return value
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&#(\d+);/g, (match, codePoint) =>
decodeNumericEntity(match, Number(codePoint)),
)
.replace(/&#x([0-9a-f]+);/gi, (match, codePoint) =>
decodeNumericEntity(match, Number.parseInt(codePoint, 16)),
);
}
function parseAttributeMap(raw: string): Map<string, string> {
const attrs = new Map<string, string>();
for (const match of raw.matchAll(/([a-zA-Z0-9:_-]+)="([^"]*)"/g)) {
attrs.set(match[1]!, match[2]!);
}
return attrs;
}
function extractYoutubeTimedTextRows(xml: string): YoutubeTimedTextRow[] {
const rows: YoutubeTimedTextRow[] = [];
for (const match of xml.matchAll(/<p\b([^>]*)>([\s\S]*?)<\/p>/g)) {
const attrs = parseAttributeMap(match[1] ?? '');
const startMs = Number(attrs.get('t'));
const durationMs = Number(attrs.get('d'));
if (!Number.isFinite(startMs) || !Number.isFinite(durationMs)) {
continue;
}
const inner = (match[2] ?? '')
.replace(/<br\s*\/?>/gi, '\n')
.replace(/<[^>]+>/g, '');
const text = decodeHtmlEntities(inner).trim();
if (!text) {
continue;
}
rows.push({ startMs, durationMs, text });
}
return rows;
}
function formatVttTimestamp(ms: number): string {
const totalMs = Math.max(0, Math.floor(ms));
const hours = Math.floor(totalMs / 3_600_000);
const minutes = Math.floor((totalMs % 3_600_000) / 60_000);
const seconds = Math.floor((totalMs % 60_000) / 1_000);
const millis = totalMs % 1_000;
return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}.${String(millis).padStart(3, '0')}`;
}
export function isYoutubeTimedTextExtension(value: string | undefined): boolean {
if (!value) {
return false;
}
return YOUTUBE_TIMEDTEXT_EXTENSIONS.has(value.trim().toLowerCase());
}
export function convertYoutubeTimedTextToVtt(xml: string): string {
const rows = extractYoutubeTimedTextRows(xml);
if (rows.length === 0) {
return 'WEBVTT\n';
}
const blocks: string[] = [];
let previousText = '';
for (let index = 0; index < rows.length; index += 1) {
const row = rows[index]!;
const nextRow = rows[index + 1];
const unclampedEnd = row.startMs + row.durationMs;
const clampedEnd =
nextRow && unclampedEnd > nextRow.startMs
? Math.max(row.startMs, nextRow.startMs - 1)
: unclampedEnd;
if (clampedEnd <= row.startMs) {
continue;
}
const text =
previousText && row.text.startsWith(previousText)
? row.text.slice(previousText.length).trimStart()
: row.text;
previousText = row.text;
if (!text) {
continue;
}
blocks.push(`${formatVttTimestamp(row.startMs)} --> ${formatVttTimestamp(clampedEnd)}\n${text}`);
}
return `WEBVTT\n\n${blocks.join('\n\n')}\n`;
}
function normalizeRollingCaptionText(text: string, previousText: string): string {
if (!previousText || !text.startsWith(previousText)) {
return text;
}
return text.slice(previousText.length).trimStart();
}
export function normalizeYoutubeAutoVtt(content: string): string {
const normalizedContent = content.replace(/\r\n?/g, '\n');
const blocks = normalizedContent.split(/\n{2,}/);
if (blocks.length === 0) {
return content;
}
let previousText = '';
let changed = false;
const normalizedBlocks = blocks.map((block) => {
if (!block.includes('-->')) {
return block;
}
const lines = block.split('\n');
const timingLineIndex = lines.findIndex((line) => line.includes('-->'));
if (timingLineIndex < 0 || timingLineIndex === lines.length - 1) {
return block;
}
const textLines = lines.slice(timingLineIndex + 1);
const originalText = textLines.join('\n').trim();
if (!originalText) {
return block;
}
const normalizedText = normalizeRollingCaptionText(originalText, previousText);
previousText = originalText;
if (!normalizedText || normalizedText === originalText) {
return block;
}
changed = true;
return [...lines.slice(0, timingLineIndex + 1), normalizedText].join('\n');
});
if (!changed) {
return content;
}
return `${normalizedBlocks.join('\n\n')}\n`;
}

View File

@@ -0,0 +1,570 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { downloadYoutubeSubtitleTrack, downloadYoutubeSubtitleTracks } from './track-download';
async function withTempDir<T>(fn: (dir: string) => Promise<T>): Promise<T> {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-youtube-track-download-'));
try {
return await fn(dir);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
}
function makeFakeYtDlpScript(dir: string): string {
const scriptPath = path.join(dir, 'yt-dlp');
const script = `#!/usr/bin/env node
const fs = require('node:fs');
const path = require('node:path');
const args = process.argv.slice(2);
let outputTemplate = '';
const wantsAutoSubs = args.includes('--write-auto-subs');
const wantsManualSubs = args.includes('--write-subs');
const subLangIndex = args.indexOf('--sub-langs');
const subLang = subLangIndex >= 0 ? args[subLangIndex + 1] || '' : '';
const subLangs = subLang ? subLang.split(',').filter(Boolean) : [];
for (let i = 0; i < args.length; i += 1) {
if (args[i] === '-o' && typeof args[i + 1] === 'string') {
outputTemplate = args[i + 1];
i += 1;
}
}
if (process.env.YTDLP_EXPECT_AUTO_SUBS === '1' && !wantsAutoSubs) {
process.exit(2);
}
if (process.env.YTDLP_EXPECT_MANUAL_SUBS === '1' && !wantsManualSubs) {
process.exit(3);
}
if (process.env.YTDLP_EXPECT_SUB_LANG && subLang !== process.env.YTDLP_EXPECT_SUB_LANG) {
process.exit(4);
}
const prefix = outputTemplate.replace(/\.%\([^)]+\)s$/, '');
if (!prefix) {
process.exit(1);
}
fs.mkdirSync(path.dirname(prefix), { recursive: true });
if (process.env.YTDLP_FAKE_MODE === 'multi') {
for (const lang of subLangs) {
fs.writeFileSync(\`\${prefix}.\${lang}.vtt\`, 'WEBVTT\\n');
}
} else if (process.env.YTDLP_FAKE_MODE === 'rolling-auto') {
fs.writeFileSync(
\`\${prefix}.vtt\`,
[
'WEBVTT',
'',
'00:00:01.000 --> 00:00:02.000',
'今日は',
'',
'00:00:02.000 --> 00:00:03.000',
'今日はいい天気ですね',
'',
'00:00:03.000 --> 00:00:04.000',
'今日はいい天気ですね本当に',
'',
].join('\\n'),
);
} else if (process.env.YTDLP_FAKE_MODE === 'multi-primary-only-fail') {
const primaryLang = subLangs[0];
if (primaryLang) {
fs.writeFileSync(\`\${prefix}.\${primaryLang}.vtt\`, 'WEBVTT\\n');
}
process.stderr.write("ERROR: Unable to download video subtitles for 'en': HTTP Error 429: Too Many Requests\\n");
process.exit(1);
} else if (process.env.YTDLP_FAKE_MODE === 'both') {
fs.writeFileSync(\`\${prefix}.vtt\`, 'WEBVTT\\n');
fs.writeFileSync(\`\${prefix}.orig.webp\`, 'webp');
} else if (process.env.YTDLP_FAKE_MODE === 'webp-only') {
fs.writeFileSync(\`\${prefix}.orig.webp\`, 'webp');
} else {
fs.writeFileSync(\`\${prefix}.vtt\`, 'WEBVTT\\n');
}
process.exit(0);
`;
fs.writeFileSync(scriptPath, script, 'utf8');
fs.chmodSync(scriptPath, 0o755);
return scriptPath;
}
async function withFakeYtDlp<T>(
mode: 'both' | 'webp-only' | 'multi' | 'multi-primary-only-fail' | 'rolling-auto',
fn: (dir: string, binDir: string) => Promise<T>,
): Promise<T> {
return await withTempDir(async (root) => {
const binDir = path.join(root, 'bin');
fs.mkdirSync(binDir, { recursive: true });
makeFakeYtDlpScript(binDir);
const originalPath = process.env.PATH ?? '';
process.env.PATH = `${binDir}${path.delimiter}${originalPath}`;
process.env.YTDLP_FAKE_MODE = mode;
try {
return await fn(root, binDir);
} finally {
process.env.PATH = originalPath;
delete process.env.YTDLP_FAKE_MODE;
}
});
}
async function withFakeYtDlpExpectations<T>(
expectations: Partial<Record<'YTDLP_EXPECT_AUTO_SUBS' | 'YTDLP_EXPECT_MANUAL_SUBS' | 'YTDLP_EXPECT_SUB_LANG', string>>,
fn: () => Promise<T>,
): Promise<T> {
const previous = {
YTDLP_EXPECT_AUTO_SUBS: process.env.YTDLP_EXPECT_AUTO_SUBS,
YTDLP_EXPECT_MANUAL_SUBS: process.env.YTDLP_EXPECT_MANUAL_SUBS,
YTDLP_EXPECT_SUB_LANG: process.env.YTDLP_EXPECT_SUB_LANG,
};
Object.assign(process.env, expectations);
try {
return await fn();
} finally {
for (const [key, value] of Object.entries(previous)) {
if (value === undefined) {
delete process.env[key];
} else {
process.env[key] = value;
}
}
}
}
async function withStubFetch<T>(
handler: (url: string) => Promise<Response> | Response,
fn: () => Promise<T>,
): Promise<T> {
const originalFetch = globalThis.fetch;
globalThis.fetch = (async (input: string | URL | Request) => {
const url =
typeof input === 'string'
? input
: input instanceof URL
? input.toString()
: input.url;
return await handler(url);
}) as typeof fetch;
try {
return await fn();
} finally {
globalThis.fetch = originalFetch;
}
}
test('downloadYoutubeSubtitleTrack prefers subtitle files over later webp artifacts', async () => {
if (process.platform === 'win32') {
return;
}
await withFakeYtDlp('both', async (root) => {
const result = await downloadYoutubeSubtitleTrack({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir: path.join(root, 'out'),
track: {
id: 'auto:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'auto',
label: 'Japanese (auto)',
},
});
assert.equal(path.extname(result.path), '.vtt');
assert.match(path.basename(result.path), /^auto-ja-orig\./);
});
});
test('downloadYoutubeSubtitleTrack ignores stale subtitle files from prior runs', async () => {
if (process.platform === 'win32') {
return;
}
await withFakeYtDlp('webp-only', async (root) => {
const outputDir = path.join(root, 'out');
fs.mkdirSync(outputDir, { recursive: true });
fs.writeFileSync(path.join(outputDir, 'auto-ja.vtt'), 'stale subtitle');
await assert.rejects(
async () =>
await downloadYoutubeSubtitleTrack({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir,
track: {
id: 'auto:ja',
language: 'ja',
sourceLanguage: 'ja',
kind: 'auto',
label: 'Japanese (auto)',
},
}),
/No subtitle file was downloaded/,
);
});
});
test('downloadYoutubeSubtitleTrack uses auto subtitle flags and raw source language for auto tracks', async () => {
if (process.platform === 'win32') {
return;
}
await withFakeYtDlp('both', async (root) => {
await withFakeYtDlpExpectations(
{
YTDLP_EXPECT_AUTO_SUBS: '1',
YTDLP_EXPECT_SUB_LANG: 'ja-orig',
},
async () => {
const result = await downloadYoutubeSubtitleTrack({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir: path.join(root, 'out'),
track: {
id: 'auto:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'auto',
label: 'Japanese (auto)',
},
});
assert.equal(path.extname(result.path), '.vtt');
},
);
});
});
test('downloadYoutubeSubtitleTrack keeps manual subtitle flag for manual tracks', async () => {
if (process.platform === 'win32') {
return;
}
await withFakeYtDlp('both', async (root) => {
await withFakeYtDlpExpectations(
{
YTDLP_EXPECT_MANUAL_SUBS: '1',
YTDLP_EXPECT_SUB_LANG: 'ja',
},
async () => {
const result = await downloadYoutubeSubtitleTrack({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir: path.join(root, 'out'),
track: {
id: 'manual:ja',
language: 'ja',
sourceLanguage: 'ja',
kind: 'manual',
label: 'Japanese (manual)',
},
});
assert.equal(path.extname(result.path), '.vtt');
},
);
});
});
test('downloadYoutubeSubtitleTrack normalizes rolling auto-caption vtt output from yt-dlp', async () => {
if (process.platform === 'win32') {
return;
}
await withFakeYtDlp('rolling-auto', async (root) => {
const result = await downloadYoutubeSubtitleTrack({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir: path.join(root, 'out'),
track: {
id: 'auto:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'auto',
label: 'Japanese (auto)',
},
});
assert.equal(
fs.readFileSync(result.path, 'utf8'),
[
'WEBVTT',
'',
'00:00:01.000 --> 00:00:02.000',
'今日は',
'',
'00:00:02.000 --> 00:00:03.000',
'いい天気ですね',
'',
'00:00:03.000 --> 00:00:04.000',
'本当に',
'',
].join('\n'),
);
});
});
test('downloadYoutubeSubtitleTrack prefers direct download URL when available', async () => {
await withTempDir(async (root) => {
await withStubFetch(
async (url) => {
assert.equal(url, 'https://example.com/subs/ja.vtt');
return new Response('WEBVTT\n', { status: 200 });
},
async () => {
const result = await downloadYoutubeSubtitleTrack({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir: path.join(root, 'out'),
track: {
id: 'auto:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'auto',
label: 'Japanese (auto)',
downloadUrl: 'https://example.com/subs/ja.vtt',
fileExtension: 'vtt',
},
});
assert.equal(path.basename(result.path), 'auto-ja-orig.ja-orig.vtt');
assert.equal(fs.readFileSync(result.path, 'utf8'), 'WEBVTT\n');
},
);
});
});
test('downloadYoutubeSubtitleTrack sanitizes metadata source language in filenames', async () => {
await withTempDir(async (root) => {
await withStubFetch(
async () => new Response('WEBVTT\n', { status: 200 }),
async () => {
const result = await downloadYoutubeSubtitleTrack({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir: path.join(root, 'out'),
track: {
id: 'auto:../../ja-orig',
language: 'ja',
sourceLanguage: '../ja-orig/../../evil',
kind: 'auto',
label: 'Japanese (auto)',
downloadUrl: 'https://example.com/subs/ja.vtt',
fileExtension: 'vtt',
},
});
assert.equal(path.dirname(result.path), path.join(root, 'out'));
assert.equal(path.basename(result.path), 'auto-ja-orig.ja-orig-evil.vtt');
},
);
});
});
test('downloadYoutubeSubtitleTrack converts srv3 auto subtitles into regular vtt', async () => {
await withTempDir(async (root) => {
await withStubFetch(
async (url) => {
assert.equal(url, 'https://example.com/subs/ja.srv3');
return new Response(
[
'<timedtext><body>',
'<p t="1000" d="2500">今日は</p>',
'<p t="2000" d="2500">今日はいい天気ですね</p>',
'<p t="3500" d="2500">今日はいい天気ですね本当に</p>',
'</body></timedtext>',
].join(''),
{ status: 200 },
);
},
async () => {
const result = await downloadYoutubeSubtitleTrack({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir: path.join(root, 'out'),
track: {
id: 'auto:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'auto',
label: 'Japanese (auto)',
downloadUrl: 'https://example.com/subs/ja.srv3',
fileExtension: 'srv3',
},
});
assert.equal(path.basename(result.path), 'auto-ja-orig.ja-orig.vtt');
assert.equal(
fs.readFileSync(result.path, 'utf8'),
[
'WEBVTT',
'',
'00:00:01.000 --> 00:00:01.999',
'今日は',
'',
'00:00:02.000 --> 00:00:03.499',
'いい天気ですね',
'',
'00:00:03.500 --> 00:00:06.000',
'本当に',
'',
].join('\n'),
);
},
);
});
});
test('downloadYoutubeSubtitleTracks downloads primary and secondary in one invocation', async () => {
if (process.platform === 'win32') {
return;
}
await withFakeYtDlp('multi', async (root) => {
const outputDir = path.join(root, 'out');
const result = await downloadYoutubeSubtitleTracks({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir,
tracks: [
{
id: 'auto:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'auto',
label: 'Japanese (auto)',
},
{
id: 'auto:en',
language: 'en',
sourceLanguage: 'en',
kind: 'auto',
label: 'English (auto)',
},
],
});
assert.match(path.basename(result.get('auto:ja-orig') ?? ''), /\.ja-orig\.vtt$/);
assert.match(path.basename(result.get('auto:en') ?? ''), /\.en\.vtt$/);
});
});
test('downloadYoutubeSubtitleTracks preserves successfully downloaded primary file on partial failure', async () => {
if (process.platform === 'win32') {
return;
}
await withFakeYtDlp('multi-primary-only-fail', async (root) => {
const outputDir = path.join(root, 'out');
const result = await downloadYoutubeSubtitleTracks({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir,
tracks: [
{
id: 'auto:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'auto',
label: 'Japanese (auto)',
},
{
id: 'auto:en',
language: 'en',
sourceLanguage: 'en',
kind: 'auto',
label: 'English (auto)',
},
],
});
assert.match(path.basename(result.get('auto:ja-orig') ?? ''), /\.ja-orig\.vtt$/);
assert.equal(result.has('auto:en'), false);
});
});
test('downloadYoutubeSubtitleTracks prefers direct download URLs when available', async () => {
await withTempDir(async (root) => {
const seen: string[] = [];
await withStubFetch(
async (url) => {
seen.push(url);
return new Response(`WEBVTT\n${url}\n`, { status: 200 });
},
async () => {
const result = await downloadYoutubeSubtitleTracks({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir: path.join(root, 'out'),
tracks: [
{
id: 'auto:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'auto',
label: 'Japanese (auto)',
downloadUrl: 'https://example.com/subs/ja.vtt',
fileExtension: 'vtt',
},
{
id: 'auto:en',
language: 'en',
sourceLanguage: 'en',
kind: 'auto',
label: 'English (auto)',
downloadUrl: 'https://example.com/subs/en.vtt',
fileExtension: 'vtt',
},
],
});
assert.deepEqual(seen, [
'https://example.com/subs/ja.vtt',
'https://example.com/subs/en.vtt',
]);
assert.match(path.basename(result.get('auto:ja-orig') ?? ''), /\.ja-orig\.vtt$/);
assert.match(path.basename(result.get('auto:en') ?? ''), /\.en\.vtt$/);
},
);
});
});
test('downloadYoutubeSubtitleTracks keeps duplicate source-language direct downloads distinct', async () => {
await withTempDir(async (root) => {
const seen: string[] = [];
await withStubFetch(
async (url) => {
seen.push(url);
return new Response(`WEBVTT\n${url}\n`, { status: 200 });
},
async () => {
const result = await downloadYoutubeSubtitleTracks({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir: path.join(root, 'out'),
tracks: [
{
id: 'auto:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'auto',
label: 'Japanese (auto)',
downloadUrl: 'https://example.com/subs/ja-auto.vtt',
fileExtension: 'vtt',
},
{
id: 'manual:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'manual',
label: 'Japanese (manual)',
downloadUrl: 'https://example.com/subs/ja-manual.vtt',
fileExtension: 'vtt',
},
],
});
assert.deepEqual(seen, [
'https://example.com/subs/ja-auto.vtt',
'https://example.com/subs/ja-manual.vtt',
]);
assert.notEqual(result.get('auto:ja-orig'), result.get('manual:ja-orig'));
},
);
});
});

View File

@@ -0,0 +1,315 @@
import fs from 'node:fs';
import path from 'node:path';
import { spawn } from 'node:child_process';
import type { YoutubeTrackOption } from './track-probe';
import {
convertYoutubeTimedTextToVtt,
isYoutubeTimedTextExtension,
normalizeYoutubeAutoVtt,
} from './timedtext';
const YOUTUBE_SUBTITLE_EXTENSIONS = new Set(['.srt', '.vtt', '.ass']);
const YOUTUBE_BATCH_PREFIX = 'youtube-batch';
const YOUTUBE_DOWNLOAD_TIMEOUT_MS = 15_000;
function sanitizeFilenameSegment(value: string): string {
const sanitized = value.trim().replace(/[^a-z0-9_-]+/gi, '-').replace(/-+/g, '-');
return sanitized.replace(/^-+|-+$/g, '') || 'unknown';
}
function createFetchTimeoutSignal(timeoutMs: number): AbortSignal | undefined {
if (typeof AbortSignal !== 'undefined' && typeof AbortSignal.timeout === 'function') {
return AbortSignal.timeout(timeoutMs);
}
return undefined;
}
function runCapture(
command: string,
args: string[],
timeoutMs = YOUTUBE_DOWNLOAD_TIMEOUT_MS,
): Promise<{ stdout: string; stderr: string }> {
return new Promise((resolve, reject) => {
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
let stdout = '';
let stderr = '';
const timer = setTimeout(() => {
proc.kill();
reject(new Error(`yt-dlp timed out after ${timeoutMs}ms`));
}, timeoutMs);
proc.stdout.setEncoding('utf8');
proc.stderr.setEncoding('utf8');
proc.stdout.on('data', (chunk) => {
stdout += String(chunk);
});
proc.stderr.on('data', (chunk) => {
stderr += String(chunk);
});
proc.once('error', (error) => {
clearTimeout(timer);
reject(error);
});
proc.once('close', (code) => {
clearTimeout(timer);
if (code === 0) {
resolve({ stdout, stderr });
return;
}
reject(new Error(stderr.trim() || `yt-dlp exited with status ${code ?? 'unknown'}`));
});
});
}
function runCaptureDetailed(
command: string,
args: string[],
timeoutMs = YOUTUBE_DOWNLOAD_TIMEOUT_MS,
): Promise<{ stdout: string; stderr: string; code: number }> {
return new Promise((resolve, reject) => {
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
let stdout = '';
let stderr = '';
const timer = setTimeout(() => {
proc.kill();
reject(new Error(`yt-dlp timed out after ${timeoutMs}ms`));
}, timeoutMs);
proc.stdout.setEncoding('utf8');
proc.stderr.setEncoding('utf8');
proc.stdout.on('data', (chunk) => {
stdout += String(chunk);
});
proc.stderr.on('data', (chunk) => {
stderr += String(chunk);
});
proc.once('error', (error) => {
clearTimeout(timer);
reject(error);
});
proc.once('close', (code) => {
clearTimeout(timer);
resolve({ stdout, stderr, code: code ?? 1 });
});
});
}
function pickLatestSubtitleFile(dir: string, prefix: string): string | null {
const entries = fs.readdirSync(dir).map((name) => path.join(dir, name));
const candidates = entries.filter((candidate) => {
const basename = path.basename(candidate);
const ext = path.extname(basename).toLowerCase();
return basename.startsWith(prefix) && YOUTUBE_SUBTITLE_EXTENSIONS.has(ext);
});
candidates.sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs);
return candidates[0] ?? null;
}
function pickLatestSubtitleFileForLanguage(
dir: string,
prefix: string,
sourceLanguage: string,
): string | null {
const entries = fs.readdirSync(dir).map((name) => path.join(dir, name));
const candidates = entries.filter((candidate) => {
const basename = path.basename(candidate);
const ext = path.extname(basename).toLowerCase();
return (
basename.startsWith(`${prefix}.`) &&
basename.includes(`.${sourceLanguage}.`) &&
YOUTUBE_SUBTITLE_EXTENSIONS.has(ext)
);
});
candidates.sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs);
return candidates[0] ?? null;
}
function buildDownloadArgs(input: {
targetUrl: string;
outputTemplate: string;
sourceLanguages: string[];
includeAutoSubs: boolean;
includeManualSubs: boolean;
}): string[] {
const args = ['--skip-download', '--no-warnings'];
if (input.includeAutoSubs) {
args.push('--write-auto-subs');
}
if (input.includeManualSubs) {
args.push('--write-subs');
}
args.push(
'--sub-format',
'srt/vtt/best',
'--sub-langs',
input.sourceLanguages.join(','),
'-o',
input.outputTemplate,
input.targetUrl,
);
return args;
}
async function downloadSubtitleFromUrl(input: {
outputDir: string;
prefix: string;
track: YoutubeTrackOption;
}): Promise<{ path: string }> {
if (!input.track.downloadUrl) {
throw new Error(`No direct subtitle URL available for ${input.track.sourceLanguage}`);
}
const ext = (input.track.fileExtension?.trim().toLowerCase() || 'vtt').replace(/[^a-z0-9]+/g, '');
const safeExt = isYoutubeTimedTextExtension(ext)
? 'vtt'
: YOUTUBE_SUBTITLE_EXTENSIONS.has(`.${ext}`)
? ext
: 'vtt';
const safeSourceLanguage = sanitizeFilenameSegment(input.track.sourceLanguage);
const targetPath = path.join(
input.outputDir,
`${input.prefix}.${safeSourceLanguage}.${safeExt}`,
);
const response = await fetch(input.track.downloadUrl, {
signal: createFetchTimeoutSignal(YOUTUBE_DOWNLOAD_TIMEOUT_MS),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status} while downloading ${input.track.sourceLanguage}`);
}
const body = await response.text();
const normalizedBody = isYoutubeTimedTextExtension(ext)
? convertYoutubeTimedTextToVtt(body)
: input.track.kind === 'auto' && safeExt === 'vtt'
? normalizeYoutubeAutoVtt(body)
: body;
fs.writeFileSync(targetPath, normalizedBody, 'utf8');
return { path: targetPath };
}
function canDownloadSubtitleFromUrl(track: YoutubeTrackOption): boolean {
if (!track.downloadUrl) {
return false;
}
const ext = (track.fileExtension?.trim().toLowerCase() || 'vtt').replace(/[^a-z0-9]+/g, '');
return isYoutubeTimedTextExtension(ext) || YOUTUBE_SUBTITLE_EXTENSIONS.has(`.${ext}`);
}
function normalizeDownloadedAutoSubtitle(pathname: string, track: YoutubeTrackOption): void {
if (track.kind !== 'auto' || path.extname(pathname).toLowerCase() !== '.vtt') {
return;
}
const content = fs.readFileSync(pathname, 'utf8');
const normalized = normalizeYoutubeAutoVtt(content);
if (normalized !== content) {
fs.writeFileSync(pathname, normalized, 'utf8');
}
}
export async function downloadYoutubeSubtitleTrack(input: {
targetUrl: string;
outputDir: string;
track: YoutubeTrackOption;
}): Promise<{ path: string }> {
fs.mkdirSync(input.outputDir, { recursive: true });
const prefix = input.track.id.replace(/[^a-z0-9_-]+/gi, '-');
for (const name of fs.readdirSync(input.outputDir)) {
if (name.startsWith(prefix)) {
try {
fs.rmSync(path.join(input.outputDir, name), { force: true });
} catch {
// ignore stale files
}
}
}
if (canDownloadSubtitleFromUrl(input.track)) {
return await downloadSubtitleFromUrl({
outputDir: input.outputDir,
prefix,
track: input.track,
});
}
const outputTemplate = path.join(input.outputDir, `${prefix}.%(ext)s`);
const args = [
...buildDownloadArgs({
targetUrl: input.targetUrl,
outputTemplate,
sourceLanguages: [input.track.sourceLanguage],
includeAutoSubs: input.track.kind === 'auto',
includeManualSubs: input.track.kind === 'manual',
}),
];
await runCapture('yt-dlp', args);
const subtitlePath = pickLatestSubtitleFile(input.outputDir, prefix);
if (!subtitlePath) {
throw new Error(`No subtitle file was downloaded for ${input.track.sourceLanguage}`);
}
normalizeDownloadedAutoSubtitle(subtitlePath, input.track);
return { path: subtitlePath };
}
export async function downloadYoutubeSubtitleTracks(input: {
targetUrl: string;
outputDir: string;
tracks: YoutubeTrackOption[];
}): Promise<Map<string, string>> {
fs.mkdirSync(input.outputDir, { recursive: true });
const hasDuplicateSourceLanguages =
new Set(input.tracks.map((track) => track.sourceLanguage)).size !== input.tracks.length;
for (const name of fs.readdirSync(input.outputDir)) {
if (name.startsWith(`${YOUTUBE_BATCH_PREFIX}.`)) {
try {
fs.rmSync(path.join(input.outputDir, name), { force: true });
} catch {
// ignore stale files
}
}
}
if (hasDuplicateSourceLanguages || input.tracks.every(canDownloadSubtitleFromUrl)) {
const results = new Map<string, string>();
for (const track of input.tracks) {
const download = await downloadSubtitleFromUrl({
outputDir: input.outputDir,
prefix: track.id.replace(/[^a-z0-9_-]+/gi, '-'),
track,
});
results.set(track.id, download.path);
}
return results;
}
const outputTemplate = path.join(input.outputDir, `${YOUTUBE_BATCH_PREFIX}.%(ext)s`);
const includeAutoSubs = input.tracks.some((track) => track.kind === 'auto');
const includeManualSubs = input.tracks.some((track) => track.kind === 'manual');
const result = await runCaptureDetailed(
'yt-dlp',
buildDownloadArgs({
targetUrl: input.targetUrl,
outputTemplate,
sourceLanguages: input.tracks.map((track) => track.sourceLanguage),
includeAutoSubs,
includeManualSubs,
}),
);
const results = new Map<string, string>();
for (const track of input.tracks) {
const subtitlePath = pickLatestSubtitleFileForLanguage(
input.outputDir,
YOUTUBE_BATCH_PREFIX,
track.sourceLanguage,
);
if (subtitlePath) {
normalizeDownloadedAutoSubtitle(subtitlePath, track);
results.set(track.id, subtitlePath);
}
}
if (results.size > 0) {
return results;
}
if (result.code !== 0) {
throw new Error(result.stderr.trim() || `yt-dlp exited with status ${result.code}`);
}
throw new Error(
`No subtitle file was downloaded for ${input.tracks.map((track) => track.sourceLanguage).join(',')}`,
);
}

View File

@@ -0,0 +1,99 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { probeYoutubeTracks } from './track-probe';
async function withTempDir<T>(fn: (dir: string) => Promise<T>): Promise<T> {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-youtube-track-probe-'));
try {
return await fn(dir);
} finally {
fs.rmSync(dir, { recursive: true, force: true });
}
}
function makeFakeYtDlpScript(dir: string, payload: unknown, rawScript = false): void {
const scriptPath = path.join(dir, 'yt-dlp');
const stdoutBody = typeof payload === 'string' ? payload : JSON.stringify(payload);
const script = rawScript
? stdoutBody
: `#!/usr/bin/env node
process.stdout.write(${JSON.stringify(stdoutBody)});
`;
fs.writeFileSync(scriptPath, script, 'utf8');
if (process.platform !== 'win32') {
fs.chmodSync(scriptPath, 0o755);
}
fs.writeFileSync(scriptPath + '.cmd', `@echo off\r\nnode "${scriptPath}"\r\n`, 'utf8');
}
async function withFakeYtDlp<T>(
payload: unknown,
fn: () => Promise<T>,
options: { rawScript?: boolean } = {},
): Promise<T> {
return await withTempDir(async (root) => {
const binDir = path.join(root, 'bin');
fs.mkdirSync(binDir, { recursive: true });
makeFakeYtDlpScript(binDir, payload, options.rawScript === true);
const originalPath = process.env.PATH ?? '';
process.env.PATH = `${binDir}${path.delimiter}${originalPath}`;
try {
return await fn();
} finally {
process.env.PATH = originalPath;
}
});
}
test('probeYoutubeTracks prefers srv3 over vtt for automatic captions', async () => {
await withFakeYtDlp(
{
id: 'abc123',
title: 'Example',
automatic_captions: {
'ja-orig': [
{ ext: 'vtt', url: 'https://example.com/ja.vtt', name: 'Japanese auto' },
{ ext: 'srv3', url: 'https://example.com/ja.srv3', name: 'Japanese auto' },
],
},
},
async () => {
const result = await probeYoutubeTracks('https://www.youtube.com/watch?v=abc123');
assert.equal(result.videoId, 'abc123');
assert.equal(result.tracks[0]?.downloadUrl, 'https://example.com/ja.srv3');
assert.equal(result.tracks[0]?.fileExtension, 'srv3');
},
);
});
test('probeYoutubeTracks keeps preferring srt for manual captions', async () => {
await withFakeYtDlp(
{
id: 'abc123',
title: 'Example',
subtitles: {
ja: [
{ ext: 'srv3', url: 'https://example.com/ja.srv3', name: 'Japanese manual' },
{ ext: 'srt', url: 'https://example.com/ja.srt', name: 'Japanese manual' },
],
},
},
async () => {
const result = await probeYoutubeTracks('https://www.youtube.com/watch?v=abc123');
assert.equal(result.tracks[0]?.downloadUrl, 'https://example.com/ja.srt');
assert.equal(result.tracks[0]?.fileExtension, 'srt');
},
);
});
test('probeYoutubeTracks reports malformed yt-dlp JSON with context', async () => {
await withFakeYtDlp('not-json', async () => {
await assert.rejects(
async () => await probeYoutubeTracks('https://www.youtube.com/watch?v=abc123'),
/Failed to parse yt-dlp output as JSON/,
);
});
});

View File

@@ -0,0 +1,136 @@
import { spawn } from 'node:child_process';
import type { YoutubeTrackOption } from '../../../types';
import { formatYoutubeTrackLabel, normalizeYoutubeLangCode, type YoutubeTrackKind } from './labels';
const YOUTUBE_TRACK_PROBE_TIMEOUT_MS = 15_000;
export type YoutubeTrackProbeResult = {
videoId: string;
title: string;
tracks: YoutubeTrackOption[];
};
type YtDlpSubtitleEntry = Array<{ ext?: string; name?: string; url?: string }>;
type YtDlpInfo = {
id?: string;
title?: string;
subtitles?: Record<string, YtDlpSubtitleEntry>;
automatic_captions?: Record<string, YtDlpSubtitleEntry>;
};
function runCapture(
command: string,
args: string[],
timeoutMs = YOUTUBE_TRACK_PROBE_TIMEOUT_MS,
): Promise<{ stdout: string; stderr: string }> {
return new Promise((resolve, reject) => {
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
let stdout = '';
let stderr = '';
const timer = setTimeout(() => {
proc.kill();
reject(new Error(`yt-dlp timed out after ${timeoutMs}ms`));
}, timeoutMs);
proc.stdout.setEncoding('utf8');
proc.stderr.setEncoding('utf8');
proc.stdout.on('data', (chunk) => {
stdout += String(chunk);
});
proc.stderr.on('data', (chunk) => {
stderr += String(chunk);
});
proc.once('error', (error) => {
clearTimeout(timer);
reject(error);
});
proc.once('close', (code) => {
clearTimeout(timer);
if (code === 0) {
resolve({ stdout, stderr });
return;
}
reject(new Error(stderr.trim() || `yt-dlp exited with status ${code ?? 'unknown'}`));
});
});
}
function choosePreferredFormat(
formats: YtDlpSubtitleEntry,
kind: YoutubeTrackKind,
): { ext: string; url: string; title?: string } | null {
const preferredOrder =
kind === 'auto'
? ['srv3', 'srv2', 'srv1', 'vtt', 'srt', 'ttml', 'json3']
: ['srt', 'vtt', 'srv3', 'srv2', 'srv1', 'ttml', 'json3'];
for (const ext of preferredOrder) {
const match = formats.find(
(format) => typeof format.url === 'string' && format.url && format.ext === ext,
);
if (match?.url) {
return { ext, url: match.url, title: match.name?.trim() || undefined };
}
}
const fallback = formats.find((format) => typeof format.url === 'string' && format.url);
if (!fallback?.url) {
return null;
}
return {
ext: fallback.ext?.trim() || 'vtt',
url: fallback.url,
title: fallback.name?.trim() || undefined,
};
}
function toTracks(entries: Record<string, YtDlpSubtitleEntry> | undefined, kind: YoutubeTrackKind) {
const tracks: YoutubeTrackOption[] = [];
if (!entries) return tracks;
for (const [language, formats] of Object.entries(entries)) {
if (!Array.isArray(formats) || formats.length === 0) continue;
const preferredFormat = choosePreferredFormat(formats, kind);
if (!preferredFormat) continue;
const sourceLanguage = language.trim() || language;
const normalizedLanguage = normalizeYoutubeLangCode(sourceLanguage) || sourceLanguage;
const title = preferredFormat.title;
tracks.push({
id: `${kind}:${sourceLanguage}`,
language: normalizedLanguage,
sourceLanguage,
kind,
title,
label: formatYoutubeTrackLabel({ language: normalizedLanguage, kind, title }),
downloadUrl: preferredFormat.url,
fileExtension: preferredFormat.ext,
});
}
return tracks;
}
export type { YoutubeTrackOption };
export async function probeYoutubeTracks(targetUrl: string): Promise<YoutubeTrackProbeResult> {
const { stdout } = await runCapture('yt-dlp', ['--dump-single-json', '--no-warnings', targetUrl]);
const trimmedStdout = stdout.trim();
if (!trimmedStdout) {
throw new Error('yt-dlp returned empty output while probing subtitle tracks');
}
let info: YtDlpInfo;
try {
info = JSON.parse(trimmedStdout) as YtDlpInfo;
} catch (error) {
const snippet = trimmedStdout.slice(0, 200);
throw new Error(
`Failed to parse yt-dlp output as JSON: ${
error instanceof Error ? error.message : String(error)
}${snippet ? `; stdout=${snippet}` : ''}`,
);
}
const tracks = [...toTracks(info.subtitles, 'manual'), ...toTracks(info.automatic_captions, 'auto')];
return {
videoId: info.id || '',
title: info.title || '',
tracks,
};
}

View File

@@ -0,0 +1,63 @@
import { isEnglishYoutubeLang, isJapaneseYoutubeLang } from './labels';
import type { YoutubeTrackOption } from './track-probe';
function pickTrack(
tracks: YoutubeTrackOption[],
matcher: (value: string) => boolean,
excludeId?: string,
): YoutubeTrackOption | null {
const matching = tracks.filter((track) => matcher(track.language) && track.id !== excludeId);
return matching[0] ?? null;
}
export function chooseDefaultYoutubeTrackIds(
tracks: YoutubeTrackOption[],
): { primaryTrackId: string | null; secondaryTrackId: string | null } {
const primary =
pickTrack(
tracks.filter((track) => track.kind === 'manual'),
isJapaneseYoutubeLang,
) ||
pickTrack(
tracks.filter((track) => track.kind === 'auto'),
isJapaneseYoutubeLang,
) ||
tracks.find((track) => track.kind === 'manual') ||
tracks[0] ||
null;
const secondary =
pickTrack(
tracks.filter((track) => track.kind === 'manual'),
isEnglishYoutubeLang,
primary?.id ?? undefined,
) ||
pickTrack(
tracks.filter((track) => track.kind === 'auto'),
isEnglishYoutubeLang,
primary?.id ?? undefined,
) ||
null;
return {
primaryTrackId: primary?.id ?? null,
secondaryTrackId: secondary?.id ?? null,
};
}
export function normalizeYoutubeTrackSelection(input: {
primaryTrackId: string | null;
secondaryTrackId: string | null;
}): {
primaryTrackId: string | null;
secondaryTrackId: string | null;
} {
if (input.primaryTrackId && input.secondaryTrackId && input.primaryTrackId === input.secondaryTrackId) {
return {
primaryTrackId: input.primaryTrackId,
secondaryTrackId: null,
};
}
return input;
}