diff --git a/launcher/youtube.ts b/launcher/youtube.ts index 9edd940..aa2770a 100644 --- a/launcher/youtube.ts +++ b/launcher/youtube.ts @@ -1,467 +1 @@ -import fs from 'node:fs'; -import path from 'node:path'; -import os from 'node:os'; -import type { Args, SubtitleCandidate, YoutubeSubgenOutputs } from './types.js'; -import { YOUTUBE_SUB_EXTENSIONS, YOUTUBE_AUDIO_EXTENSIONS } from './types.js'; -import { log } from './log.js'; -import { - resolvePathMaybe, - uniqueNormalizedLangCodes, - escapeRegExp, - normalizeBasename, - runExternalCommand, - commandExists, -} from './util.js'; -import { state } from './mpv.js'; - -function toYtdlpLangPattern(langCodes: string[]): string { - return langCodes.map((lang) => `${lang}.*`).join(','); -} - -function filenameHasLanguageTag(filenameLower: string, langCode: string): boolean { - const escaped = escapeRegExp(langCode); - const pattern = new RegExp(`(^|[._-])${escaped}([._-]|$)`); - return pattern.test(filenameLower); -} - -function classifyLanguage( - filename: string, - primaryLangCodes: string[], - secondaryLangCodes: string[], -): 'primary' | 'secondary' | null { - const lower = filename.toLowerCase(); - const primary = primaryLangCodes.some((code) => filenameHasLanguageTag(lower, code)); - const secondary = secondaryLangCodes.some((code) => filenameHasLanguageTag(lower, code)); - if (primary && !secondary) return 'primary'; - if (secondary && !primary) return 'secondary'; - return null; -} - -function preferredLangLabel(langCodes: string[], fallback: string): string { - return uniqueNormalizedLangCodes(langCodes)[0] || fallback; -} - -function sourceTag(source: SubtitleCandidate['source']): string { - if (source === 'manual' || source === 'auto') return `ytdlp-${source}`; - if (source === 'whisper-translate') return 'whisper-translate'; - return 'whisper'; -} - -function pickBestCandidate(candidates: SubtitleCandidate[]): SubtitleCandidate | null { - if (candidates.length === 0) return null; - const scored = [...candidates].sort((a, b) => { - const sourceA = a.source === 'manual' ? 1 : 0; - const sourceB = b.source === 'manual' ? 1 : 0; - if (sourceA !== sourceB) return sourceB - sourceA; - const srtA = a.ext === '.srt' ? 1 : 0; - const srtB = b.ext === '.srt' ? 1 : 0; - if (srtA !== srtB) return srtB - srtA; - return b.size - a.size; - }); - return scored[0] ?? null; -} - -function scanSubtitleCandidates( - tempDir: string, - knownSet: Set, - source: 'manual' | 'auto', - primaryLangCodes: string[], - secondaryLangCodes: string[], -): SubtitleCandidate[] { - const entries = fs.readdirSync(tempDir); - const out: SubtitleCandidate[] = []; - for (const name of entries) { - const fullPath = path.join(tempDir, name); - if (knownSet.has(fullPath)) continue; - let stat: fs.Stats; - try { - stat = fs.statSync(fullPath); - } catch { - continue; - } - if (!stat.isFile()) continue; - const ext = path.extname(fullPath).toLowerCase(); - if (!YOUTUBE_SUB_EXTENSIONS.has(ext)) continue; - const lang = classifyLanguage(name, primaryLangCodes, secondaryLangCodes); - if (!lang) continue; - out.push({ path: fullPath, lang, ext, size: stat.size, source }); - } - return out; -} - -async function convertToSrt( - inputPath: string, - tempDir: string, - langLabel: string, -): Promise { - if (path.extname(inputPath).toLowerCase() === '.srt') return inputPath; - const outputPath = path.join(tempDir, `converted.${langLabel}.srt`); - await runExternalCommand('ffmpeg', ['-y', '-loglevel', 'error', '-i', inputPath, outputPath]); - return outputPath; -} - -function findAudioFile(tempDir: string, preferredExt: string): string | null { - const entries = fs.readdirSync(tempDir); - const audioFiles: Array<{ path: string; ext: string; mtimeMs: number }> = []; - for (const name of entries) { - const fullPath = path.join(tempDir, name); - let stat: fs.Stats; - try { - stat = fs.statSync(fullPath); - } catch { - continue; - } - if (!stat.isFile()) continue; - const ext = path.extname(name).toLowerCase(); - if (!YOUTUBE_AUDIO_EXTENSIONS.has(ext)) continue; - audioFiles.push({ path: fullPath, ext, mtimeMs: stat.mtimeMs }); - } - if (audioFiles.length === 0) return null; - const preferred = audioFiles.find((entry) => entry.ext === `.${preferredExt.toLowerCase()}`); - if (preferred) return preferred.path; - audioFiles.sort((a, b) => b.mtimeMs - a.mtimeMs); - return audioFiles[0]?.path ?? null; -} - -async function runWhisper( - whisperBin: string, - modelPath: string, - audioPath: string, - language: string, - translate: boolean, - outputPrefix: string, -): Promise { - const args = [ - '-m', - modelPath, - '-f', - audioPath, - '--output-srt', - '--output-file', - outputPrefix, - '--language', - language, - ]; - if (translate) args.push('--translate'); - await runExternalCommand(whisperBin, args, { - commandLabel: 'whisper', - streamOutput: true, - }); - const outputPath = `${outputPrefix}.srt`; - if (!fs.existsSync(outputPath)) { - throw new Error(`whisper output not found: ${outputPath}`); - } - return outputPath; -} - -async function convertAudioForWhisper(inputPath: string, tempDir: string): Promise { - const wavPath = path.join(tempDir, 'whisper-input.wav'); - await runExternalCommand('ffmpeg', [ - '-y', - '-loglevel', - 'error', - '-i', - inputPath, - '-ar', - '16000', - '-ac', - '1', - '-c:a', - 'pcm_s16le', - wavPath, - ]); - if (!fs.existsSync(wavPath)) { - throw new Error(`Failed to prepare whisper audio input: ${wavPath}`); - } - return wavPath; -} - -export function resolveWhisperBinary(args: Args): string | null { - const explicit = args.whisperBin.trim(); - if (explicit) return resolvePathMaybe(explicit); - if (commandExists('whisper-cli')) return 'whisper-cli'; - return null; -} - -export async function generateYoutubeSubtitles( - target: string, - args: Args, - onReady?: (lang: 'primary' | 'secondary', pathToLoad: string) => Promise, -): Promise { - const outDir = path.resolve(resolvePathMaybe(args.youtubeSubgenOutDir)); - fs.mkdirSync(outDir, { recursive: true }); - - const primaryLangCodes = uniqueNormalizedLangCodes(args.youtubePrimarySubLangs); - const secondaryLangCodes = uniqueNormalizedLangCodes(args.youtubeSecondarySubLangs); - const primaryLabel = preferredLangLabel(primaryLangCodes, 'primary'); - const secondaryLabel = preferredLangLabel(secondaryLangCodes, 'secondary'); - const secondaryCanUseWhisperTranslate = - secondaryLangCodes.includes('en') || secondaryLangCodes.includes('eng'); - const ytdlpManualLangs = toYtdlpLangPattern([...primaryLangCodes, ...secondaryLangCodes]); - - const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-yt-subgen-')); - const knownFiles = new Set(); - let keepTemp = args.youtubeSubgenKeepTemp; - - const publishTrack = async ( - lang: 'primary' | 'secondary', - source: SubtitleCandidate['source'], - selectedPath: string, - basename: string, - ): Promise => { - const langLabel = lang === 'primary' ? primaryLabel : secondaryLabel; - const taggedPath = path.join(outDir, `${basename}.${langLabel}.${sourceTag(source)}.srt`); - const aliasPath = path.join(outDir, `${basename}.${langLabel}.srt`); - fs.copyFileSync(selectedPath, taggedPath); - fs.copyFileSync(taggedPath, aliasPath); - log('info', args.logLevel, `Generated subtitle (${langLabel}, ${source}) -> ${aliasPath}`); - if (onReady) await onReady(lang, aliasPath); - return aliasPath; - }; - - try { - log('debug', args.logLevel, `YouTube subtitle temp dir: ${tempDir}`); - const meta = await runExternalCommand( - 'yt-dlp', - ['--dump-single-json', '--no-warnings', target], - { - captureStdout: true, - logLevel: args.logLevel, - commandLabel: 'yt-dlp:meta', - }, - state.youtubeSubgenChildren, - ); - const metadata = JSON.parse(meta.stdout) as { id?: string }; - const videoId = metadata.id || `${Date.now()}`; - const basename = normalizeBasename(videoId, videoId); - - await runExternalCommand( - 'yt-dlp', - [ - '--skip-download', - '--no-warnings', - '--write-subs', - '--sub-format', - 'srt/vtt/best', - '--sub-langs', - ytdlpManualLangs, - '-o', - path.join(tempDir, '%(id)s.%(ext)s'), - target, - ], - { - allowFailure: true, - logLevel: args.logLevel, - commandLabel: 'yt-dlp:manual-subs', - streamOutput: true, - }, - state.youtubeSubgenChildren, - ); - - const manualSubs = scanSubtitleCandidates( - tempDir, - knownFiles, - 'manual', - primaryLangCodes, - secondaryLangCodes, - ); - for (const sub of manualSubs) knownFiles.add(sub.path); - let primaryCandidates = manualSubs.filter((entry) => entry.lang === 'primary'); - let secondaryCandidates = manualSubs.filter((entry) => entry.lang === 'secondary'); - - const missingAuto: string[] = []; - if (primaryCandidates.length === 0) missingAuto.push(toYtdlpLangPattern(primaryLangCodes)); - if (secondaryCandidates.length === 0) missingAuto.push(toYtdlpLangPattern(secondaryLangCodes)); - - if (missingAuto.length > 0) { - await runExternalCommand( - 'yt-dlp', - [ - '--skip-download', - '--no-warnings', - '--write-auto-subs', - '--sub-format', - 'srt/vtt/best', - '--sub-langs', - missingAuto.join(','), - '-o', - path.join(tempDir, '%(id)s.%(ext)s'), - target, - ], - { - allowFailure: true, - logLevel: args.logLevel, - commandLabel: 'yt-dlp:auto-subs', - streamOutput: true, - }, - state.youtubeSubgenChildren, - ); - - const autoSubs = scanSubtitleCandidates( - tempDir, - knownFiles, - 'auto', - primaryLangCodes, - secondaryLangCodes, - ); - for (const sub of autoSubs) knownFiles.add(sub.path); - primaryCandidates = primaryCandidates.concat( - autoSubs.filter((entry) => entry.lang === 'primary'), - ); - secondaryCandidates = secondaryCandidates.concat( - autoSubs.filter((entry) => entry.lang === 'secondary'), - ); - } - - let primaryAlias = ''; - let secondaryAlias = ''; - const selectedPrimary = pickBestCandidate(primaryCandidates); - const selectedSecondary = pickBestCandidate(secondaryCandidates); - - if (selectedPrimary) { - const srt = await convertToSrt(selectedPrimary.path, tempDir, primaryLabel); - primaryAlias = await publishTrack('primary', selectedPrimary.source, srt, basename); - } - if (selectedSecondary) { - const srt = await convertToSrt(selectedSecondary.path, tempDir, secondaryLabel); - secondaryAlias = await publishTrack('secondary', selectedSecondary.source, srt, basename); - } - - const needsPrimaryWhisper = !selectedPrimary; - const needsSecondaryWhisper = !selectedSecondary && secondaryCanUseWhisperTranslate; - if (needsPrimaryWhisper || needsSecondaryWhisper) { - const whisperBin = resolveWhisperBinary(args); - const modelPath = args.whisperModel.trim() - ? path.resolve(resolvePathMaybe(args.whisperModel.trim())) - : ''; - const hasWhisperFallback = !!whisperBin && !!modelPath && fs.existsSync(modelPath); - - if (!hasWhisperFallback) { - log( - 'warn', - args.logLevel, - 'Whisper fallback is not configured; continuing with available subtitle tracks.', - ); - } else { - try { - await runExternalCommand( - 'yt-dlp', - [ - '-f', - 'bestaudio/best', - '--extract-audio', - '--audio-format', - args.youtubeSubgenAudioFormat, - '--no-warnings', - '-o', - path.join(tempDir, '%(id)s.%(ext)s'), - target, - ], - { - logLevel: args.logLevel, - commandLabel: 'yt-dlp:audio', - streamOutput: true, - }, - state.youtubeSubgenChildren, - ); - const audioPath = findAudioFile(tempDir, args.youtubeSubgenAudioFormat); - if (!audioPath) { - throw new Error('Audio extraction succeeded, but no audio file was found.'); - } - const whisperAudioPath = await convertAudioForWhisper(audioPath, tempDir); - - if (needsPrimaryWhisper) { - try { - const primaryPrefix = path.join(tempDir, `${basename}.${primaryLabel}`); - const primarySrt = await runWhisper( - whisperBin!, - modelPath, - whisperAudioPath, - args.youtubeWhisperSourceLanguage, - false, - primaryPrefix, - ); - primaryAlias = await publishTrack('primary', 'whisper', primarySrt, basename); - } catch (error) { - log( - 'warn', - args.logLevel, - `Failed to generate primary subtitle via whisper fallback: ${(error as Error).message}`, - ); - } - } - - if (needsSecondaryWhisper) { - try { - const secondaryPrefix = path.join(tempDir, `${basename}.${secondaryLabel}`); - const secondarySrt = await runWhisper( - whisperBin!, - modelPath, - whisperAudioPath, - args.youtubeWhisperSourceLanguage, - true, - secondaryPrefix, - ); - secondaryAlias = await publishTrack( - 'secondary', - 'whisper-translate', - secondarySrt, - basename, - ); - } catch (error) { - log( - 'warn', - args.logLevel, - `Failed to generate secondary subtitle via whisper fallback: ${(error as Error).message}`, - ); - } - } - } catch (error) { - log( - 'warn', - args.logLevel, - `Whisper fallback pipeline failed: ${(error as Error).message}`, - ); - } - } - } - - if (!secondaryCanUseWhisperTranslate && !selectedSecondary) { - log( - 'warn', - args.logLevel, - `Secondary subtitle language (${secondaryLabel}) has no whisper translate fallback; relying on yt-dlp subtitles only.`, - ); - } - - if (!primaryAlias && !secondaryAlias) { - throw new Error('Failed to generate any subtitle tracks.'); - } - if (!primaryAlias || !secondaryAlias) { - log( - 'warn', - args.logLevel, - `Generated partial subtitle result: primary=${primaryAlias ? 'ok' : 'missing'}, secondary=${secondaryAlias ? 'ok' : 'missing'}`, - ); - } - - return { - basename, - primaryPath: primaryAlias || undefined, - secondaryPath: secondaryAlias || undefined, - }; - } catch (error) { - keepTemp = true; - throw error; - } finally { - if (keepTemp) { - log('warn', args.logLevel, `Keeping subtitle temp dir: ${tempDir}`); - } else { - try { - fs.rmSync(tempDir, { recursive: true, force: true }); - } catch { - // ignore cleanup failures - } - } - } -} +export { generateYoutubeSubtitles, resolveWhisperBinary } from './youtube/orchestrator.js'; diff --git a/launcher/youtube/audio-extraction.ts b/launcher/youtube/audio-extraction.ts new file mode 100644 index 0000000..8a7d938 --- /dev/null +++ b/launcher/youtube/audio-extraction.ts @@ -0,0 +1,84 @@ +import fs from 'node:fs'; +import path from 'node:path'; + +import type { Args } from '../types.js'; +import { YOUTUBE_AUDIO_EXTENSIONS } from '../types.js'; +import { runExternalCommand } from '../util.js'; + +export function findAudioFile(tempDir: string, preferredExt: string): string | null { + const entries = fs.readdirSync(tempDir); + const audioFiles: Array<{ path: string; ext: string; mtimeMs: number }> = []; + for (const name of entries) { + const fullPath = path.join(tempDir, name); + let stat: fs.Stats; + try { + stat = fs.statSync(fullPath); + } catch { + continue; + } + if (!stat.isFile()) continue; + const ext = path.extname(name).toLowerCase(); + if (!YOUTUBE_AUDIO_EXTENSIONS.has(ext)) continue; + audioFiles.push({ path: fullPath, ext, mtimeMs: stat.mtimeMs }); + } + if (audioFiles.length === 0) return null; + const preferred = audioFiles.find((entry) => entry.ext === `.${preferredExt.toLowerCase()}`); + if (preferred) return preferred.path; + audioFiles.sort((a, b) => b.mtimeMs - a.mtimeMs); + return audioFiles[0]?.path ?? null; +} + +export async function convertAudioForWhisper(inputPath: string, tempDir: string): Promise { + const wavPath = path.join(tempDir, 'whisper-input.wav'); + await runExternalCommand('ffmpeg', [ + '-y', + '-loglevel', + 'error', + '-i', + inputPath, + '-ar', + '16000', + '-ac', + '1', + '-c:a', + 'pcm_s16le', + wavPath, + ]); + if (!fs.existsSync(wavPath)) { + throw new Error(`Failed to prepare whisper audio input: ${wavPath}`); + } + return wavPath; +} + +export async function downloadYoutubeAudio( + target: string, + args: Args, + tempDir: string, + childTracker?: Set>, +): Promise { + await runExternalCommand( + 'yt-dlp', + [ + '-f', + 'bestaudio/best', + '--extract-audio', + '--audio-format', + args.youtubeSubgenAudioFormat, + '--no-warnings', + '-o', + path.join(tempDir, '%(id)s.%(ext)s'), + target, + ], + { + logLevel: args.logLevel, + commandLabel: 'yt-dlp:audio', + streamOutput: true, + }, + childTracker, + ); + const audioPath = findAudioFile(tempDir, args.youtubeSubgenAudioFormat); + if (!audioPath) { + throw new Error('Audio extraction succeeded, but no audio file was found.'); + } + return audioPath; +} diff --git a/launcher/youtube/manual-subs.ts b/launcher/youtube/manual-subs.ts new file mode 100644 index 0000000..13b22ce --- /dev/null +++ b/launcher/youtube/manual-subs.ts @@ -0,0 +1,99 @@ +import fs from 'node:fs'; +import path from 'node:path'; + +import type { SubtitleCandidate } from '../types.js'; +import { YOUTUBE_SUB_EXTENSIONS } from '../types.js'; +import { escapeRegExp, runExternalCommand } from '../util.js'; + +function filenameHasLanguageTag(filenameLower: string, langCode: string): boolean { + const escaped = escapeRegExp(langCode); + const pattern = new RegExp(`(^|[._-])${escaped}([._-]|$)`); + return pattern.test(filenameLower); +} + +function classifyLanguage( + filename: string, + primaryLangCodes: string[], + secondaryLangCodes: string[], +): 'primary' | 'secondary' | null { + const lower = filename.toLowerCase(); + const primary = primaryLangCodes.some((code) => filenameHasLanguageTag(lower, code)); + const secondary = secondaryLangCodes.some((code) => filenameHasLanguageTag(lower, code)); + if (primary && !secondary) return 'primary'; + if (secondary && !primary) return 'secondary'; + return null; +} + +export function toYtdlpLangPattern(langCodes: string[]): string { + return langCodes.map((lang) => `${lang}.*`).join(','); +} + +export function scanSubtitleCandidates( + tempDir: string, + knownSet: Set, + source: SubtitleCandidate['source'], + primaryLangCodes: string[], + secondaryLangCodes: string[], +): SubtitleCandidate[] { + const entries = fs.readdirSync(tempDir); + const out: SubtitleCandidate[] = []; + for (const name of entries) { + const fullPath = path.join(tempDir, name); + if (knownSet.has(fullPath)) continue; + let stat: fs.Stats; + try { + stat = fs.statSync(fullPath); + } catch { + continue; + } + if (!stat.isFile()) continue; + const ext = path.extname(fullPath).toLowerCase(); + if (!YOUTUBE_SUB_EXTENSIONS.has(ext)) continue; + const lang = classifyLanguage(name, primaryLangCodes, secondaryLangCodes); + if (!lang) continue; + out.push({ path: fullPath, lang, ext, size: stat.size, source }); + } + return out; +} + +export function pickBestCandidate(candidates: SubtitleCandidate[]): SubtitleCandidate | null { + if (candidates.length === 0) return null; + const scored = [...candidates].sort((a, b) => { + const srtA = a.ext === '.srt' ? 1 : 0; + const srtB = b.ext === '.srt' ? 1 : 0; + if (srtA !== srtB) return srtB - srtA; + return b.size - a.size; + }); + return scored[0] ?? null; +} + +export async function downloadManualSubtitles( + target: string, + tempDir: string, + langPattern: string, + logLevel: import('../types.js').LogLevel, + childTracker?: Set>, +): Promise { + await runExternalCommand( + 'yt-dlp', + [ + '--skip-download', + '--no-warnings', + '--write-subs', + '--sub-format', + 'srt/vtt/best', + '--sub-langs', + langPattern, + '-o', + path.join(tempDir, '%(id)s.%(ext)s'), + target, + ], + { + allowFailure: true, + logLevel, + commandLabel: 'yt-dlp:manual-subs', + streamOutput: true, + }, + childTracker, + ); +} diff --git a/launcher/youtube/orchestrator.test.ts b/launcher/youtube/orchestrator.test.ts new file mode 100644 index 0000000..5384ed2 --- /dev/null +++ b/launcher/youtube/orchestrator.test.ts @@ -0,0 +1,58 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { planYoutubeSubtitleGeneration } from './orchestrator'; + +test('planYoutubeSubtitleGeneration prefers manual subtitles and never schedules auto-subs', () => { + assert.deepEqual( + planYoutubeSubtitleGeneration({ + hasPrimaryManualSubtitle: true, + hasSecondaryManualSubtitle: false, + secondaryCanTranslate: true, + }), + { + fetchManualSubtitles: true, + fetchAutoSubtitles: false, + publishPrimaryManualSubtitle: false, + publishSecondaryManualSubtitle: false, + generatePrimarySubtitle: false, + generateSecondarySubtitle: true, + }, + ); +}); + +test('planYoutubeSubtitleGeneration generates only missing tracks', () => { + assert.deepEqual( + planYoutubeSubtitleGeneration({ + hasPrimaryManualSubtitle: false, + hasSecondaryManualSubtitle: true, + secondaryCanTranslate: true, + }), + { + fetchManualSubtitles: true, + fetchAutoSubtitles: false, + publishPrimaryManualSubtitle: false, + publishSecondaryManualSubtitle: false, + generatePrimarySubtitle: true, + generateSecondarySubtitle: false, + }, + ); +}); + +test('planYoutubeSubtitleGeneration reuses manual tracks already present on the YouTube video', () => { + assert.deepEqual( + planYoutubeSubtitleGeneration({ + hasPrimaryManualSubtitle: true, + hasSecondaryManualSubtitle: true, + secondaryCanTranslate: true, + }), + { + fetchManualSubtitles: true, + fetchAutoSubtitles: false, + publishPrimaryManualSubtitle: false, + publishSecondaryManualSubtitle: false, + generatePrimarySubtitle: false, + generateSecondarySubtitle: false, + }, + ); +}); diff --git a/launcher/youtube/orchestrator.ts b/launcher/youtube/orchestrator.ts new file mode 100644 index 0000000..b6702a5 --- /dev/null +++ b/launcher/youtube/orchestrator.ts @@ -0,0 +1,367 @@ +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +import type { Args, SubtitleCandidate, YoutubeSubgenOutputs } from '../types.js'; +import { log } from '../log.js'; +import { + commandExists, + normalizeBasename, + resolvePathMaybe, + runExternalCommand, + uniqueNormalizedLangCodes, +} from '../util.js'; +import { state } from '../mpv.js'; +import { downloadYoutubeAudio, convertAudioForWhisper } from './audio-extraction.js'; +import { + downloadManualSubtitles, + pickBestCandidate, + scanSubtitleCandidates, + toYtdlpLangPattern, +} from './manual-subs.js'; +import { runLoggedYoutubePhase } from './progress.js'; +import { fixSubtitleWithAi } from './subtitle-fix-ai.js'; +import { runWhisper } from './whisper.js'; + +export interface YoutubeSubtitleGenerationPlan { + fetchManualSubtitles: true; + fetchAutoSubtitles: false; + publishPrimaryManualSubtitle: false; + publishSecondaryManualSubtitle: false; + generatePrimarySubtitle: boolean; + generateSecondarySubtitle: boolean; +} + +export function planYoutubeSubtitleGeneration(input: { + hasPrimaryManualSubtitle: boolean; + hasSecondaryManualSubtitle: boolean; + secondaryCanTranslate: boolean; +}): YoutubeSubtitleGenerationPlan { + return { + fetchManualSubtitles: true, + fetchAutoSubtitles: false, + publishPrimaryManualSubtitle: false, + publishSecondaryManualSubtitle: false, + generatePrimarySubtitle: !input.hasPrimaryManualSubtitle, + generateSecondarySubtitle: !input.hasSecondaryManualSubtitle && input.secondaryCanTranslate, + }; +} + +function preferredLangLabel(langCodes: string[], fallback: string): string { + return uniqueNormalizedLangCodes(langCodes)[0] || fallback; +} + +function sourceTag(source: SubtitleCandidate['source']): string { + return source; +} + +export function resolveWhisperBinary(args: Args): string | null { + const explicit = args.whisperBin.trim(); + if (explicit) return resolvePathMaybe(explicit); + if (commandExists('whisper-cli')) return 'whisper-cli'; + return null; +} + +async function maybeFixSubtitleWithAi( + selectedPath: string, + args: Args, + expectedLanguage?: string, +): Promise { + if (!args.youtubeFixWithAi || args.aiConfig.enabled !== true) { + return selectedPath; + } + const fixedContent = await runLoggedYoutubePhase( + { + startMessage: `Starting AI subtitle fix: ${path.basename(selectedPath)}`, + finishMessage: `Finished AI subtitle fix: ${path.basename(selectedPath)}`, + failureMessage: `AI subtitle fix failed: ${path.basename(selectedPath)}`, + log: (level, message) => log(level, args.logLevel, message), + }, + async () => { + const originalContent = fs.readFileSync(selectedPath, 'utf8'); + return fixSubtitleWithAi( + originalContent, + args.aiConfig, + (message) => { + log('warn', args.logLevel, message); + }, + expectedLanguage, + ); + }, + ); + if (!fixedContent) { + return selectedPath; + } + + const fixedPath = selectedPath.replace(/\.srt$/i, '.fixed.srt'); + fs.writeFileSync(fixedPath, fixedContent, 'utf8'); + return fixedPath; +} + +export async function generateYoutubeSubtitles( + target: string, + args: Args, + onReady?: (lang: 'primary' | 'secondary', pathToLoad: string) => Promise, +): Promise { + const outDir = path.resolve(resolvePathMaybe(args.youtubeSubgenOutDir)); + fs.mkdirSync(outDir, { recursive: true }); + + const primaryLangCodes = uniqueNormalizedLangCodes(args.youtubePrimarySubLangs); + const secondaryLangCodes = uniqueNormalizedLangCodes(args.youtubeSecondarySubLangs); + const primaryLabel = preferredLangLabel(primaryLangCodes, 'primary'); + const secondaryLabel = preferredLangLabel(secondaryLangCodes, 'secondary'); + const secondaryCanUseWhisperTranslate = + secondaryLangCodes.includes('en') || secondaryLangCodes.includes('eng'); + const manualLangs = toYtdlpLangPattern([...primaryLangCodes, ...secondaryLangCodes]); + + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-yt-subgen-')); + const knownFiles = new Set(); + let keepTemp = args.youtubeSubgenKeepTemp; + + const publishTrack = async ( + lang: 'primary' | 'secondary', + source: SubtitleCandidate['source'], + selectedPath: string, + basename: string, + ): Promise => { + const langLabel = lang === 'primary' ? primaryLabel : secondaryLabel; + const taggedPath = path.join(outDir, `${basename}.${langLabel}.${sourceTag(source)}.srt`); + const aliasPath = path.join(outDir, `${basename}.${langLabel}.srt`); + fs.copyFileSync(selectedPath, taggedPath); + fs.copyFileSync(taggedPath, aliasPath); + log('info', args.logLevel, `Generated subtitle (${langLabel}, ${source}) -> ${aliasPath}`); + if (onReady) await onReady(lang, aliasPath); + return aliasPath; + }; + + try { + const meta = await runLoggedYoutubePhase( + { + startMessage: 'Starting YouTube metadata probe', + finishMessage: 'Finished YouTube metadata probe', + failureMessage: 'YouTube metadata probe failed', + log: (level, message) => log(level, args.logLevel, message), + }, + () => + runExternalCommand( + 'yt-dlp', + ['--dump-single-json', '--no-warnings', target], + { + captureStdout: true, + logLevel: args.logLevel, + commandLabel: 'yt-dlp:meta', + }, + state.youtubeSubgenChildren, + ), + ); + const metadata = JSON.parse(meta.stdout) as { id?: string }; + const videoId = metadata.id || `${Date.now()}`; + const basename = normalizeBasename(videoId, videoId); + + await runLoggedYoutubePhase( + { + startMessage: `Starting manual subtitle probe (${manualLangs || 'requested langs'})`, + finishMessage: 'Finished manual subtitle probe', + failureMessage: 'Manual subtitle probe failed', + log: (level, message) => log(level, args.logLevel, message), + }, + () => + downloadManualSubtitles( + target, + tempDir, + manualLangs, + args.logLevel, + state.youtubeSubgenChildren, + ), + ); + + const manualSubs = scanSubtitleCandidates( + tempDir, + knownFiles, + 'manual', + primaryLangCodes, + secondaryLangCodes, + ); + for (const sub of manualSubs) knownFiles.add(sub.path); + const selectedPrimary = pickBestCandidate( + manualSubs.filter((entry) => entry.lang === 'primary'), + ); + const selectedSecondary = pickBestCandidate( + manualSubs.filter((entry) => entry.lang === 'secondary'), + ); + + const plan = planYoutubeSubtitleGeneration({ + hasPrimaryManualSubtitle: Boolean(selectedPrimary), + hasSecondaryManualSubtitle: Boolean(selectedSecondary), + secondaryCanTranslate: secondaryCanUseWhisperTranslate, + }); + + let primaryAlias = ''; + let secondaryAlias = ''; + + if (selectedPrimary) { + log( + 'info', + args.logLevel, + `Using native YouTube subtitle track for primary (${primaryLabel}); skipping external subtitle copy.`, + ); + } + if (selectedSecondary) { + log( + 'info', + args.logLevel, + `Using native YouTube subtitle track for secondary (${secondaryLabel}); skipping external subtitle copy.`, + ); + } + + if (plan.generatePrimarySubtitle || plan.generateSecondarySubtitle) { + const whisperBin = resolveWhisperBinary(args); + const modelPath = args.whisperModel.trim() + ? path.resolve(resolvePathMaybe(args.whisperModel.trim())) + : ''; + const hasWhisperFallback = !!whisperBin && !!modelPath && fs.existsSync(modelPath); + + if (!hasWhisperFallback) { + log( + 'warn', + args.logLevel, + 'Whisper fallback is not configured; continuing with available subtitle tracks.', + ); + } else { + const audioPath = await runLoggedYoutubePhase( + { + startMessage: 'Starting fallback audio extraction for subtitle generation', + finishMessage: 'Finished fallback audio extraction', + failureMessage: 'Fallback audio extraction failed', + log: (level, message) => log(level, args.logLevel, message), + }, + () => + downloadYoutubeAudio(target, args, tempDir, state.youtubeSubgenChildren), + ); + const whisperAudioPath = await runLoggedYoutubePhase( + { + startMessage: 'Starting ffmpeg audio prep for whisper', + finishMessage: 'Finished ffmpeg audio prep for whisper', + failureMessage: 'ffmpeg audio prep for whisper failed', + log: (level, message) => log(level, args.logLevel, message), + }, + () => convertAudioForWhisper(audioPath, tempDir), + ); + + if (plan.generatePrimarySubtitle) { + try { + const primaryPrefix = path.join(tempDir, `${basename}.${primaryLabel}`); + const primarySrt = await runLoggedYoutubePhase( + { + startMessage: `Starting whisper primary subtitle generation (${primaryLabel})`, + finishMessage: `Finished whisper primary subtitle generation (${primaryLabel})`, + failureMessage: `Whisper primary subtitle generation failed (${primaryLabel})`, + log: (level, message) => log(level, args.logLevel, message), + }, + () => + runWhisper(whisperBin!, args, { + modelPath, + audioPath: whisperAudioPath, + language: args.youtubeWhisperSourceLanguage, + translate: false, + outputPrefix: primaryPrefix, + }), + ); + const fixedPrimary = await maybeFixSubtitleWithAi( + primarySrt, + args, + args.youtubeWhisperSourceLanguage, + ); + primaryAlias = await publishTrack( + 'primary', + fixedPrimary === primarySrt ? 'whisper' : 'whisper-fixed', + fixedPrimary, + basename, + ); + } catch (error) { + log( + 'warn', + args.logLevel, + `Failed to generate primary subtitle via whisper fallback: ${(error as Error).message}`, + ); + } + } + + if (plan.generateSecondarySubtitle) { + try { + const secondaryPrefix = path.join(tempDir, `${basename}.${secondaryLabel}`); + const secondarySrt = await runLoggedYoutubePhase( + { + startMessage: `Starting whisper secondary subtitle generation (${secondaryLabel})`, + finishMessage: `Finished whisper secondary subtitle generation (${secondaryLabel})`, + failureMessage: `Whisper secondary subtitle generation failed (${secondaryLabel})`, + log: (level, message) => log(level, args.logLevel, message), + }, + () => + runWhisper(whisperBin!, args, { + modelPath, + audioPath: whisperAudioPath, + language: args.youtubeWhisperSourceLanguage, + translate: true, + outputPrefix: secondaryPrefix, + }), + ); + const fixedSecondary = await maybeFixSubtitleWithAi(secondarySrt, args); + secondaryAlias = await publishTrack( + 'secondary', + fixedSecondary === secondarySrt ? 'whisper-translate' : 'whisper-translate-fixed', + fixedSecondary, + basename, + ); + } catch (error) { + log( + 'warn', + args.logLevel, + `Failed to generate secondary subtitle via whisper fallback: ${(error as Error).message}`, + ); + } + } + } + } + + if (!secondaryCanUseWhisperTranslate && !selectedSecondary) { + log( + 'warn', + args.logLevel, + `Secondary subtitle language (${secondaryLabel}) has no whisper translate fallback; relying on manual subtitles only.`, + ); + } + + if (!primaryAlias && !secondaryAlias && !selectedPrimary && !selectedSecondary) { + throw new Error('Failed to generate any subtitle tracks.'); + } + if ((!primaryAlias && !selectedPrimary) || (!secondaryAlias && !selectedSecondary)) { + log( + 'warn', + args.logLevel, + `Generated partial subtitle result: primary=${primaryAlias || selectedPrimary ? 'ok' : 'missing'}, secondary=${secondaryAlias || selectedSecondary ? 'ok' : 'missing'}`, + ); + } + + return { + basename, + primaryPath: primaryAlias || undefined, + secondaryPath: secondaryAlias || undefined, + primaryNative: Boolean(selectedPrimary), + secondaryNative: Boolean(selectedSecondary), + }; + } catch (error) { + keepTemp = true; + throw error; + } finally { + if (keepTemp) { + log('warn', args.logLevel, `Keeping subtitle temp dir: ${tempDir}`); + } else { + try { + fs.rmSync(tempDir, { recursive: true, force: true }); + } catch { + // ignore cleanup failures + } + } + } +} diff --git a/launcher/youtube/progress.test.ts b/launcher/youtube/progress.test.ts new file mode 100644 index 0000000..d76a6f0 --- /dev/null +++ b/launcher/youtube/progress.test.ts @@ -0,0 +1,55 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { runLoggedYoutubePhase } from './progress'; + +test('runLoggedYoutubePhase logs start and finish with elapsed time', async () => { + const entries: Array<{ level: 'info' | 'warn'; message: string }> = []; + let nowMs = 1_000; + + const result = await runLoggedYoutubePhase( + { + startMessage: 'Starting subtitle probe', + finishMessage: 'Finished subtitle probe', + log: (level, message) => entries.push({ level, message }), + now: () => nowMs, + }, + async () => { + nowMs = 2_500; + return 'ok'; + }, + ); + + assert.equal(result, 'ok'); + assert.deepEqual(entries, [ + { level: 'info', message: 'Starting subtitle probe' }, + { level: 'info', message: 'Finished subtitle probe (1.5s)' }, + ]); +}); + +test('runLoggedYoutubePhase logs failure with elapsed time and rethrows', async () => { + const entries: Array<{ level: 'info' | 'warn'; message: string }> = []; + let nowMs = 5_000; + + await assert.rejects( + runLoggedYoutubePhase( + { + startMessage: 'Starting whisper primary', + finishMessage: 'Finished whisper primary', + failureMessage: 'Failed whisper primary', + log: (level, message) => entries.push({ level, message }), + now: () => nowMs, + }, + async () => { + nowMs = 8_200; + throw new Error('boom'); + }, + ), + /boom/, + ); + + assert.deepEqual(entries, [ + { level: 'info', message: 'Starting whisper primary' }, + { level: 'warn', message: 'Failed whisper primary after 3.2s: boom' }, + ]); +}); diff --git a/launcher/youtube/progress.ts b/launcher/youtube/progress.ts new file mode 100644 index 0000000..ae15c48 --- /dev/null +++ b/launcher/youtube/progress.ts @@ -0,0 +1,33 @@ +type PhaseLogLevel = 'info' | 'warn'; + +export interface RunLoggedYoutubePhaseOptions { + startMessage: string; + finishMessage: string; + failureMessage?: string; + log: (level: PhaseLogLevel, message: string) => void; + now?: () => number; +} + +function formatElapsedMs(elapsedMs: number): string { + const seconds = Math.max(0, elapsedMs) / 1000; + return `${seconds.toFixed(1)}s`; +} + +export async function runLoggedYoutubePhase( + options: RunLoggedYoutubePhaseOptions, + run: () => Promise, +): Promise { + const now = options.now ?? Date.now; + const startedAt = now(); + options.log('info', options.startMessage); + try { + const result = await run(); + options.log('info', `${options.finishMessage} (${formatElapsedMs(now() - startedAt)})`); + return result; + } catch (error) { + const prefix = options.failureMessage ?? options.finishMessage; + const message = error instanceof Error ? error.message : String(error); + options.log('warn', `${prefix} after ${formatElapsedMs(now() - startedAt)}: ${message}`); + throw error; + } +} diff --git a/launcher/youtube/srt.test.ts b/launcher/youtube/srt.test.ts new file mode 100644 index 0000000..8792920 --- /dev/null +++ b/launcher/youtube/srt.test.ts @@ -0,0 +1,32 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { parseSrt, stringifySrt } from './srt'; + +test('parseSrt reads cue numbering timing and text', () => { + const cues = parseSrt(`1 +00:00:01,000 --> 00:00:02,000 +こんにちは + +2 +00:00:02,500 --> 00:00:03,000 +世界 +`); + + assert.equal(cues.length, 2); + assert.equal(cues[0]?.start, '00:00:01,000'); + assert.equal(cues[0]?.end, '00:00:02,000'); + assert.equal(cues[0]?.text, 'こんにちは'); + assert.equal(cues[1]?.text, '世界'); +}); + +test('stringifySrt preserves parseable cue structure', () => { + const roundTrip = stringifySrt( + parseSrt(`1 +00:00:01,000 --> 00:00:02,000 +こんにちは +`), + ); + + assert.match(roundTrip, /1\n00:00:01,000 --> 00:00:02,000\nこんにちは/); +}); diff --git a/launcher/youtube/srt.ts b/launcher/youtube/srt.ts new file mode 100644 index 0000000..2931054 --- /dev/null +++ b/launcher/youtube/srt.ts @@ -0,0 +1,40 @@ +export interface SrtCue { + index: number; + start: string; + end: string; + text: string; +} + +const TIMING_LINE_PATTERN = + /^(?\d{2}:\d{2}:\d{2},\d{3}) --> (?\d{2}:\d{2}:\d{2},\d{3})$/; + +export function parseSrt(content: string): SrtCue[] { + const normalized = content.replace(/\r\n/g, '\n').trim(); + if (!normalized) return []; + + return normalized + .split(/\n{2,}/) + .map((block) => { + const lines = block.split('\n'); + const index = Number.parseInt(lines[0] || '', 10); + const timingLine = lines[1] || ''; + const timingMatch = TIMING_LINE_PATTERN.exec(timingLine); + if (!Number.isInteger(index) || !timingMatch?.groups) { + throw new Error(`Invalid SRT cue block: ${block}`); + } + return { + index, + start: timingMatch.groups.start!, + end: timingMatch.groups.end!, + text: lines.slice(2).join('\n').trim(), + } satisfies SrtCue; + }) + .filter((cue) => cue.text.length > 0); +} + +export function stringifySrt(cues: SrtCue[]): string { + return cues + .map((cue, idx) => `${idx + 1}\n${cue.start} --> ${cue.end}\n${cue.text.trim()}\n`) + .join('\n') + .trimEnd(); +} diff --git a/launcher/youtube/whisper.test.ts b/launcher/youtube/whisper.test.ts new file mode 100644 index 0000000..bf1098a --- /dev/null +++ b/launcher/youtube/whisper.test.ts @@ -0,0 +1,47 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { buildWhisperArgs } from './whisper'; + +test('buildWhisperArgs includes threads and optional VAD flags', () => { + assert.deepEqual( + buildWhisperArgs({ + modelPath: '/models/ggml-large-v2.bin', + audioPath: '/tmp/input.wav', + outputPrefix: '/tmp/output', + language: 'ja', + translate: false, + threads: 8, + vadModelPath: '/models/vad.bin', + }), + [ + '-m', + '/models/ggml-large-v2.bin', + '-f', + '/tmp/input.wav', + '--output-srt', + '--output-file', + '/tmp/output', + '--language', + 'ja', + '--threads', + '8', + '-vm', + '/models/vad.bin', + '--vad', + ], + ); +}); + +test('buildWhisperArgs includes translate flag when requested', () => { + assert.ok( + buildWhisperArgs({ + modelPath: '/models/base.bin', + audioPath: '/tmp/input.wav', + outputPrefix: '/tmp/output', + language: 'ja', + translate: true, + threads: 4, + }).includes('--translate'), + ); +}); diff --git a/launcher/youtube/whisper.ts b/launcher/youtube/whisper.ts new file mode 100644 index 0000000..16ba77c --- /dev/null +++ b/launcher/youtube/whisper.ts @@ -0,0 +1,60 @@ +import fs from 'node:fs'; + +import type { Args } from '../types.js'; +import { runExternalCommand } from '../util.js'; + +export interface BuildWhisperArgsOptions { + modelPath: string; + audioPath: string; + outputPrefix: string; + language: string; + translate: boolean; + threads: number; + vadModelPath?: string; +} + +export function buildWhisperArgs(options: BuildWhisperArgsOptions): string[] { + const args = [ + '-m', + options.modelPath, + '-f', + options.audioPath, + '--output-srt', + '--output-file', + options.outputPrefix, + '--language', + options.language, + '--threads', + String(options.threads), + ]; + if (options.translate) args.push('--translate'); + if (options.vadModelPath) { + args.push('-vm', options.vadModelPath, '--vad'); + } + return args; +} + +export async function runWhisper( + whisperBin: string, + args: Args, + options: Omit, +): Promise { + const vadModelPath = + args.whisperVadModel.trim() && fs.existsSync(args.whisperVadModel.trim()) + ? args.whisperVadModel.trim() + : undefined; + const whisperArgs = buildWhisperArgs({ + ...options, + threads: args.whisperThreads, + vadModelPath, + }); + await runExternalCommand(whisperBin, whisperArgs, { + commandLabel: 'whisper', + streamOutput: true, + }); + const outputPath = `${options.outputPrefix}.srt`; + if (!fs.existsSync(outputPath)) { + throw new Error(`whisper output not found: ${outputPath}`); + } + return outputPath; +}