mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-04-03 18:12:07 -07:00
feat: add app-owned YouTube subtitle flow with absPlayer-style parsing (#31)
* fix: harden preload argv parsing for popup windows * fix: align youtube playback with shared overlay startup * fix: unwrap mpv youtube streams for anki media mining * docs: update docs for youtube subtitle and mining flow * refactor: unify cli and runtime wiring for startup and youtube flow * feat: update subtitle sidebar overlay behavior * chore: add shared log-file source for diagnostics * fix(ci): add changelog fragment for immersion changes * fix: address CodeRabbit review feedback * fix: persist canonical title from youtube metadata * style: format stats library tab * fix: address latest review feedback * style: format stats library files * test: stub launcher youtube deps in CI * test: isolate launcher youtube flow deps * test: stub launcher youtube deps in failing case * test: force x11 backend in launcher ci harness * test: address latest review feedback * fix(launcher): preserve user YouTube ytdl raw options * docs(backlog): update task tracking notes * fix(immersion): special-case youtube media paths in runtime and tracking * feat(stats): improve YouTube media metadata and picker key handling * fix(ci): format stats media library hook * fix: address latest CodeRabbit review items * docs: update youtube release notes and docs * feat: auto-load youtube subtitles before manual picker * fix: restore app-owned youtube subtitle flow * docs: update youtube playback docs and config copy * refactor: remove legacy youtube launcher mode plumbing * fix: refine youtube subtitle startup binding * docs: clarify youtube subtitle startup behavior * fix: address PR #31 latest review follow-ups * fix: address PR #31 follow-up review comments * test: harden youtube picker test harness * udpate backlog * fix: add timeout to youtube metadata probe * docs: refresh youtube and stats docs * update backlog * update backlog * chore: release v0.9.0
This commit is contained in:
315
src/core/services/youtube/track-download.ts
Normal file
315
src/core/services/youtube/track-download.ts
Normal file
@@ -0,0 +1,315 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { spawn } from 'node:child_process';
|
||||
import type { YoutubeTrackOption } from './track-probe';
|
||||
import {
|
||||
convertYoutubeTimedTextToVtt,
|
||||
isYoutubeTimedTextExtension,
|
||||
normalizeYoutubeAutoVtt,
|
||||
} from './timedtext';
|
||||
|
||||
const YOUTUBE_SUBTITLE_EXTENSIONS = new Set(['.srt', '.vtt', '.ass']);
|
||||
const YOUTUBE_BATCH_PREFIX = 'youtube-batch';
|
||||
const YOUTUBE_DOWNLOAD_TIMEOUT_MS = 15_000;
|
||||
|
||||
function sanitizeFilenameSegment(value: string): string {
|
||||
const sanitized = value.trim().replace(/[^a-z0-9_-]+/gi, '-').replace(/-+/g, '-');
|
||||
return sanitized.replace(/^-+|-+$/g, '') || 'unknown';
|
||||
}
|
||||
|
||||
function createFetchTimeoutSignal(timeoutMs: number): AbortSignal | undefined {
|
||||
if (typeof AbortSignal !== 'undefined' && typeof AbortSignal.timeout === 'function') {
|
||||
return AbortSignal.timeout(timeoutMs);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function runCapture(
|
||||
command: string,
|
||||
args: string[],
|
||||
timeoutMs = YOUTUBE_DOWNLOAD_TIMEOUT_MS,
|
||||
): Promise<{ stdout: string; stderr: string }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
const timer = setTimeout(() => {
|
||||
proc.kill();
|
||||
reject(new Error(`yt-dlp timed out after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
proc.stdout.setEncoding('utf8');
|
||||
proc.stderr.setEncoding('utf8');
|
||||
proc.stdout.on('data', (chunk) => {
|
||||
stdout += String(chunk);
|
||||
});
|
||||
proc.stderr.on('data', (chunk) => {
|
||||
stderr += String(chunk);
|
||||
});
|
||||
proc.once('error', (error) => {
|
||||
clearTimeout(timer);
|
||||
reject(error);
|
||||
});
|
||||
proc.once('close', (code) => {
|
||||
clearTimeout(timer);
|
||||
if (code === 0) {
|
||||
resolve({ stdout, stderr });
|
||||
return;
|
||||
}
|
||||
reject(new Error(stderr.trim() || `yt-dlp exited with status ${code ?? 'unknown'}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function runCaptureDetailed(
|
||||
command: string,
|
||||
args: string[],
|
||||
timeoutMs = YOUTUBE_DOWNLOAD_TIMEOUT_MS,
|
||||
): Promise<{ stdout: string; stderr: string; code: number }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
const timer = setTimeout(() => {
|
||||
proc.kill();
|
||||
reject(new Error(`yt-dlp timed out after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
proc.stdout.setEncoding('utf8');
|
||||
proc.stderr.setEncoding('utf8');
|
||||
proc.stdout.on('data', (chunk) => {
|
||||
stdout += String(chunk);
|
||||
});
|
||||
proc.stderr.on('data', (chunk) => {
|
||||
stderr += String(chunk);
|
||||
});
|
||||
proc.once('error', (error) => {
|
||||
clearTimeout(timer);
|
||||
reject(error);
|
||||
});
|
||||
proc.once('close', (code) => {
|
||||
clearTimeout(timer);
|
||||
resolve({ stdout, stderr, code: code ?? 1 });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function pickLatestSubtitleFile(dir: string, prefix: string): string | null {
|
||||
const entries = fs.readdirSync(dir).map((name) => path.join(dir, name));
|
||||
const candidates = entries.filter((candidate) => {
|
||||
const basename = path.basename(candidate);
|
||||
const ext = path.extname(basename).toLowerCase();
|
||||
return basename.startsWith(prefix) && YOUTUBE_SUBTITLE_EXTENSIONS.has(ext);
|
||||
});
|
||||
candidates.sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs);
|
||||
return candidates[0] ?? null;
|
||||
}
|
||||
|
||||
function pickLatestSubtitleFileForLanguage(
|
||||
dir: string,
|
||||
prefix: string,
|
||||
sourceLanguage: string,
|
||||
): string | null {
|
||||
const entries = fs.readdirSync(dir).map((name) => path.join(dir, name));
|
||||
const candidates = entries.filter((candidate) => {
|
||||
const basename = path.basename(candidate);
|
||||
const ext = path.extname(basename).toLowerCase();
|
||||
return (
|
||||
basename.startsWith(`${prefix}.`) &&
|
||||
basename.includes(`.${sourceLanguage}.`) &&
|
||||
YOUTUBE_SUBTITLE_EXTENSIONS.has(ext)
|
||||
);
|
||||
});
|
||||
candidates.sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs);
|
||||
return candidates[0] ?? null;
|
||||
}
|
||||
|
||||
function buildDownloadArgs(input: {
|
||||
targetUrl: string;
|
||||
outputTemplate: string;
|
||||
sourceLanguages: string[];
|
||||
includeAutoSubs: boolean;
|
||||
includeManualSubs: boolean;
|
||||
}): string[] {
|
||||
const args = ['--skip-download', '--no-warnings'];
|
||||
if (input.includeAutoSubs) {
|
||||
args.push('--write-auto-subs');
|
||||
}
|
||||
if (input.includeManualSubs) {
|
||||
args.push('--write-subs');
|
||||
}
|
||||
args.push(
|
||||
'--sub-format',
|
||||
'srt/vtt/best',
|
||||
'--sub-langs',
|
||||
input.sourceLanguages.join(','),
|
||||
'-o',
|
||||
input.outputTemplate,
|
||||
input.targetUrl,
|
||||
);
|
||||
return args;
|
||||
}
|
||||
|
||||
async function downloadSubtitleFromUrl(input: {
|
||||
outputDir: string;
|
||||
prefix: string;
|
||||
track: YoutubeTrackOption;
|
||||
}): Promise<{ path: string }> {
|
||||
if (!input.track.downloadUrl) {
|
||||
throw new Error(`No direct subtitle URL available for ${input.track.sourceLanguage}`);
|
||||
}
|
||||
const ext = (input.track.fileExtension?.trim().toLowerCase() || 'vtt').replace(/[^a-z0-9]+/g, '');
|
||||
const safeExt = isYoutubeTimedTextExtension(ext)
|
||||
? 'vtt'
|
||||
: YOUTUBE_SUBTITLE_EXTENSIONS.has(`.${ext}`)
|
||||
? ext
|
||||
: 'vtt';
|
||||
const safeSourceLanguage = sanitizeFilenameSegment(input.track.sourceLanguage);
|
||||
const targetPath = path.join(
|
||||
input.outputDir,
|
||||
`${input.prefix}.${safeSourceLanguage}.${safeExt}`,
|
||||
);
|
||||
const response = await fetch(input.track.downloadUrl, {
|
||||
signal: createFetchTimeoutSignal(YOUTUBE_DOWNLOAD_TIMEOUT_MS),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status} while downloading ${input.track.sourceLanguage}`);
|
||||
}
|
||||
const body = await response.text();
|
||||
const normalizedBody = isYoutubeTimedTextExtension(ext)
|
||||
? convertYoutubeTimedTextToVtt(body)
|
||||
: input.track.kind === 'auto' && safeExt === 'vtt'
|
||||
? normalizeYoutubeAutoVtt(body)
|
||||
: body;
|
||||
fs.writeFileSync(targetPath, normalizedBody, 'utf8');
|
||||
return { path: targetPath };
|
||||
}
|
||||
|
||||
function canDownloadSubtitleFromUrl(track: YoutubeTrackOption): boolean {
|
||||
if (!track.downloadUrl) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const ext = (track.fileExtension?.trim().toLowerCase() || 'vtt').replace(/[^a-z0-9]+/g, '');
|
||||
return isYoutubeTimedTextExtension(ext) || YOUTUBE_SUBTITLE_EXTENSIONS.has(`.${ext}`);
|
||||
}
|
||||
|
||||
function normalizeDownloadedAutoSubtitle(pathname: string, track: YoutubeTrackOption): void {
|
||||
if (track.kind !== 'auto' || path.extname(pathname).toLowerCase() !== '.vtt') {
|
||||
return;
|
||||
}
|
||||
const content = fs.readFileSync(pathname, 'utf8');
|
||||
const normalized = normalizeYoutubeAutoVtt(content);
|
||||
if (normalized !== content) {
|
||||
fs.writeFileSync(pathname, normalized, 'utf8');
|
||||
}
|
||||
}
|
||||
|
||||
export async function downloadYoutubeSubtitleTrack(input: {
|
||||
targetUrl: string;
|
||||
outputDir: string;
|
||||
track: YoutubeTrackOption;
|
||||
}): Promise<{ path: string }> {
|
||||
fs.mkdirSync(input.outputDir, { recursive: true });
|
||||
const prefix = input.track.id.replace(/[^a-z0-9_-]+/gi, '-');
|
||||
for (const name of fs.readdirSync(input.outputDir)) {
|
||||
if (name.startsWith(prefix)) {
|
||||
try {
|
||||
fs.rmSync(path.join(input.outputDir, name), { force: true });
|
||||
} catch {
|
||||
// ignore stale files
|
||||
}
|
||||
}
|
||||
}
|
||||
if (canDownloadSubtitleFromUrl(input.track)) {
|
||||
return await downloadSubtitleFromUrl({
|
||||
outputDir: input.outputDir,
|
||||
prefix,
|
||||
track: input.track,
|
||||
});
|
||||
}
|
||||
const outputTemplate = path.join(input.outputDir, `${prefix}.%(ext)s`);
|
||||
const args = [
|
||||
...buildDownloadArgs({
|
||||
targetUrl: input.targetUrl,
|
||||
outputTemplate,
|
||||
sourceLanguages: [input.track.sourceLanguage],
|
||||
includeAutoSubs: input.track.kind === 'auto',
|
||||
includeManualSubs: input.track.kind === 'manual',
|
||||
}),
|
||||
];
|
||||
|
||||
await runCapture('yt-dlp', args);
|
||||
const subtitlePath = pickLatestSubtitleFile(input.outputDir, prefix);
|
||||
if (!subtitlePath) {
|
||||
throw new Error(`No subtitle file was downloaded for ${input.track.sourceLanguage}`);
|
||||
}
|
||||
normalizeDownloadedAutoSubtitle(subtitlePath, input.track);
|
||||
return { path: subtitlePath };
|
||||
}
|
||||
|
||||
export async function downloadYoutubeSubtitleTracks(input: {
|
||||
targetUrl: string;
|
||||
outputDir: string;
|
||||
tracks: YoutubeTrackOption[];
|
||||
}): Promise<Map<string, string>> {
|
||||
fs.mkdirSync(input.outputDir, { recursive: true });
|
||||
const hasDuplicateSourceLanguages =
|
||||
new Set(input.tracks.map((track) => track.sourceLanguage)).size !== input.tracks.length;
|
||||
for (const name of fs.readdirSync(input.outputDir)) {
|
||||
if (name.startsWith(`${YOUTUBE_BATCH_PREFIX}.`)) {
|
||||
try {
|
||||
fs.rmSync(path.join(input.outputDir, name), { force: true });
|
||||
} catch {
|
||||
// ignore stale files
|
||||
}
|
||||
}
|
||||
}
|
||||
if (hasDuplicateSourceLanguages || input.tracks.every(canDownloadSubtitleFromUrl)) {
|
||||
const results = new Map<string, string>();
|
||||
for (const track of input.tracks) {
|
||||
const download = await downloadSubtitleFromUrl({
|
||||
outputDir: input.outputDir,
|
||||
prefix: track.id.replace(/[^a-z0-9_-]+/gi, '-'),
|
||||
track,
|
||||
});
|
||||
results.set(track.id, download.path);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
const outputTemplate = path.join(input.outputDir, `${YOUTUBE_BATCH_PREFIX}.%(ext)s`);
|
||||
const includeAutoSubs = input.tracks.some((track) => track.kind === 'auto');
|
||||
const includeManualSubs = input.tracks.some((track) => track.kind === 'manual');
|
||||
|
||||
const result = await runCaptureDetailed(
|
||||
'yt-dlp',
|
||||
buildDownloadArgs({
|
||||
targetUrl: input.targetUrl,
|
||||
outputTemplate,
|
||||
sourceLanguages: input.tracks.map((track) => track.sourceLanguage),
|
||||
includeAutoSubs,
|
||||
includeManualSubs,
|
||||
}),
|
||||
);
|
||||
|
||||
const results = new Map<string, string>();
|
||||
for (const track of input.tracks) {
|
||||
const subtitlePath = pickLatestSubtitleFileForLanguage(
|
||||
input.outputDir,
|
||||
YOUTUBE_BATCH_PREFIX,
|
||||
track.sourceLanguage,
|
||||
);
|
||||
if (subtitlePath) {
|
||||
normalizeDownloadedAutoSubtitle(subtitlePath, track);
|
||||
results.set(track.id, subtitlePath);
|
||||
}
|
||||
}
|
||||
if (results.size > 0) {
|
||||
return results;
|
||||
}
|
||||
if (result.code !== 0) {
|
||||
throw new Error(result.stderr.trim() || `yt-dlp exited with status ${result.code}`);
|
||||
}
|
||||
throw new Error(
|
||||
`No subtitle file was downloaded for ${input.tracks.map((track) => track.sourceLanguage).join(',')}`,
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user