import fs from 'node:fs'; import path from 'node:path'; import os from 'node:os'; import type { Args, SubtitleCandidate, YoutubeSubgenOutputs } from './types.js'; import { YOUTUBE_SUB_EXTENSIONS, YOUTUBE_AUDIO_EXTENSIONS } from './types.js'; import { log } from './log.js'; import { resolvePathMaybe, uniqueNormalizedLangCodes, escapeRegExp, normalizeBasename, runExternalCommand, commandExists, } from './util.js'; import { state } from './mpv.js'; function toYtdlpLangPattern(langCodes: string[]): string { return langCodes.map((lang) => `${lang}.*`).join(','); } function filenameHasLanguageTag(filenameLower: string, langCode: string): boolean { const escaped = escapeRegExp(langCode); const pattern = new RegExp(`(^|[._-])${escaped}([._-]|$)`); return pattern.test(filenameLower); } function classifyLanguage( filename: string, primaryLangCodes: string[], secondaryLangCodes: string[], ): 'primary' | 'secondary' | null { const lower = filename.toLowerCase(); const primary = primaryLangCodes.some((code) => filenameHasLanguageTag(lower, code)); const secondary = secondaryLangCodes.some((code) => filenameHasLanguageTag(lower, code)); if (primary && !secondary) return 'primary'; if (secondary && !primary) return 'secondary'; return null; } function preferredLangLabel(langCodes: string[], fallback: string): string { return uniqueNormalizedLangCodes(langCodes)[0] || fallback; } function sourceTag(source: SubtitleCandidate['source']): string { if (source === 'manual' || source === 'auto') return `ytdlp-${source}`; if (source === 'whisper-translate') return 'whisper-translate'; return 'whisper'; } function pickBestCandidate(candidates: SubtitleCandidate[]): SubtitleCandidate | null { if (candidates.length === 0) return null; const scored = [...candidates].sort((a, b) => { const sourceA = a.source === 'manual' ? 1 : 0; const sourceB = b.source === 'manual' ? 1 : 0; if (sourceA !== sourceB) return sourceB - sourceA; const srtA = a.ext === '.srt' ? 1 : 0; const srtB = b.ext === '.srt' ? 1 : 0; if (srtA !== srtB) return srtB - srtA; return b.size - a.size; }); return scored[0]; } function scanSubtitleCandidates( tempDir: string, knownSet: Set, source: 'manual' | 'auto', primaryLangCodes: string[], secondaryLangCodes: string[], ): SubtitleCandidate[] { const entries = fs.readdirSync(tempDir); const out: SubtitleCandidate[] = []; for (const name of entries) { const fullPath = path.join(tempDir, name); if (knownSet.has(fullPath)) continue; let stat: fs.Stats; try { stat = fs.statSync(fullPath); } catch { continue; } if (!stat.isFile()) continue; const ext = path.extname(fullPath).toLowerCase(); if (!YOUTUBE_SUB_EXTENSIONS.has(ext)) continue; const lang = classifyLanguage(name, primaryLangCodes, secondaryLangCodes); if (!lang) continue; out.push({ path: fullPath, lang, ext, size: stat.size, source }); } return out; } async function convertToSrt( inputPath: string, tempDir: string, langLabel: string, ): Promise { if (path.extname(inputPath).toLowerCase() === '.srt') return inputPath; const outputPath = path.join(tempDir, `converted.${langLabel}.srt`); await runExternalCommand('ffmpeg', ['-y', '-loglevel', 'error', '-i', inputPath, outputPath]); return outputPath; } function findAudioFile(tempDir: string, preferredExt: string): string | null { const entries = fs.readdirSync(tempDir); const audioFiles: Array<{ path: string; ext: string; mtimeMs: number }> = []; for (const name of entries) { const fullPath = path.join(tempDir, name); let stat: fs.Stats; try { stat = fs.statSync(fullPath); } catch { continue; } if (!stat.isFile()) continue; const ext = path.extname(name).toLowerCase(); if (!YOUTUBE_AUDIO_EXTENSIONS.has(ext)) continue; audioFiles.push({ path: fullPath, ext, mtimeMs: stat.mtimeMs }); } if (audioFiles.length === 0) return null; const preferred = audioFiles.find((entry) => entry.ext === `.${preferredExt.toLowerCase()}`); if (preferred) return preferred.path; audioFiles.sort((a, b) => b.mtimeMs - a.mtimeMs); return audioFiles[0].path; } async function runWhisper( whisperBin: string, modelPath: string, audioPath: string, language: string, translate: boolean, outputPrefix: string, ): Promise { const args = [ '-m', modelPath, '-f', audioPath, '--output-srt', '--output-file', outputPrefix, '--language', language, ]; if (translate) args.push('--translate'); await runExternalCommand(whisperBin, args, { commandLabel: 'whisper', streamOutput: true, }); const outputPath = `${outputPrefix}.srt`; if (!fs.existsSync(outputPath)) { throw new Error(`whisper output not found: ${outputPath}`); } return outputPath; } async function convertAudioForWhisper(inputPath: string, tempDir: string): Promise { const wavPath = path.join(tempDir, 'whisper-input.wav'); await runExternalCommand('ffmpeg', [ '-y', '-loglevel', 'error', '-i', inputPath, '-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', wavPath, ]); if (!fs.existsSync(wavPath)) { throw new Error(`Failed to prepare whisper audio input: ${wavPath}`); } return wavPath; } export function resolveWhisperBinary(args: Args): string | null { const explicit = args.whisperBin.trim(); if (explicit) return resolvePathMaybe(explicit); if (commandExists('whisper-cli')) return 'whisper-cli'; return null; } export async function generateYoutubeSubtitles( target: string, args: Args, onReady?: (lang: 'primary' | 'secondary', pathToLoad: string) => Promise, ): Promise { const outDir = path.resolve(resolvePathMaybe(args.youtubeSubgenOutDir)); fs.mkdirSync(outDir, { recursive: true }); const primaryLangCodes = uniqueNormalizedLangCodes(args.youtubePrimarySubLangs); const secondaryLangCodes = uniqueNormalizedLangCodes(args.youtubeSecondarySubLangs); const primaryLabel = preferredLangLabel(primaryLangCodes, 'primary'); const secondaryLabel = preferredLangLabel(secondaryLangCodes, 'secondary'); const secondaryCanUseWhisperTranslate = secondaryLangCodes.includes('en') || secondaryLangCodes.includes('eng'); const ytdlpManualLangs = toYtdlpLangPattern([...primaryLangCodes, ...secondaryLangCodes]); const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-yt-subgen-')); const knownFiles = new Set(); let keepTemp = args.youtubeSubgenKeepTemp; const publishTrack = async ( lang: 'primary' | 'secondary', source: SubtitleCandidate['source'], selectedPath: string, basename: string, ): Promise => { const langLabel = lang === 'primary' ? primaryLabel : secondaryLabel; const taggedPath = path.join(outDir, `${basename}.${langLabel}.${sourceTag(source)}.srt`); const aliasPath = path.join(outDir, `${basename}.${langLabel}.srt`); fs.copyFileSync(selectedPath, taggedPath); fs.copyFileSync(taggedPath, aliasPath); log('info', args.logLevel, `Generated subtitle (${langLabel}, ${source}) -> ${aliasPath}`); if (onReady) await onReady(lang, aliasPath); return aliasPath; }; try { log('debug', args.logLevel, `YouTube subtitle temp dir: ${tempDir}`); const meta = await runExternalCommand( 'yt-dlp', ['--dump-single-json', '--no-warnings', target], { captureStdout: true, logLevel: args.logLevel, commandLabel: 'yt-dlp:meta', }, state.youtubeSubgenChildren, ); const metadata = JSON.parse(meta.stdout) as { id?: string }; const videoId = metadata.id || `${Date.now()}`; const basename = normalizeBasename(videoId, videoId); await runExternalCommand( 'yt-dlp', [ '--skip-download', '--no-warnings', '--write-subs', '--sub-format', 'srt/vtt/best', '--sub-langs', ytdlpManualLangs, '-o', path.join(tempDir, '%(id)s.%(ext)s'), target, ], { allowFailure: true, logLevel: args.logLevel, commandLabel: 'yt-dlp:manual-subs', streamOutput: true, }, state.youtubeSubgenChildren, ); const manualSubs = scanSubtitleCandidates( tempDir, knownFiles, 'manual', primaryLangCodes, secondaryLangCodes, ); for (const sub of manualSubs) knownFiles.add(sub.path); let primaryCandidates = manualSubs.filter((entry) => entry.lang === 'primary'); let secondaryCandidates = manualSubs.filter((entry) => entry.lang === 'secondary'); const missingAuto: string[] = []; if (primaryCandidates.length === 0) missingAuto.push(toYtdlpLangPattern(primaryLangCodes)); if (secondaryCandidates.length === 0) missingAuto.push(toYtdlpLangPattern(secondaryLangCodes)); if (missingAuto.length > 0) { await runExternalCommand( 'yt-dlp', [ '--skip-download', '--no-warnings', '--write-auto-subs', '--sub-format', 'srt/vtt/best', '--sub-langs', missingAuto.join(','), '-o', path.join(tempDir, '%(id)s.%(ext)s'), target, ], { allowFailure: true, logLevel: args.logLevel, commandLabel: 'yt-dlp:auto-subs', streamOutput: true, }, state.youtubeSubgenChildren, ); const autoSubs = scanSubtitleCandidates( tempDir, knownFiles, 'auto', primaryLangCodes, secondaryLangCodes, ); for (const sub of autoSubs) knownFiles.add(sub.path); primaryCandidates = primaryCandidates.concat( autoSubs.filter((entry) => entry.lang === 'primary'), ); secondaryCandidates = secondaryCandidates.concat( autoSubs.filter((entry) => entry.lang === 'secondary'), ); } let primaryAlias = ''; let secondaryAlias = ''; const selectedPrimary = pickBestCandidate(primaryCandidates); const selectedSecondary = pickBestCandidate(secondaryCandidates); if (selectedPrimary) { const srt = await convertToSrt(selectedPrimary.path, tempDir, primaryLabel); primaryAlias = await publishTrack('primary', selectedPrimary.source, srt, basename); } if (selectedSecondary) { const srt = await convertToSrt(selectedSecondary.path, tempDir, secondaryLabel); secondaryAlias = await publishTrack('secondary', selectedSecondary.source, srt, basename); } const needsPrimaryWhisper = !selectedPrimary; const needsSecondaryWhisper = !selectedSecondary && secondaryCanUseWhisperTranslate; if (needsPrimaryWhisper || needsSecondaryWhisper) { const whisperBin = resolveWhisperBinary(args); const modelPath = args.whisperModel.trim() ? path.resolve(resolvePathMaybe(args.whisperModel.trim())) : ''; const hasWhisperFallback = !!whisperBin && !!modelPath && fs.existsSync(modelPath); if (!hasWhisperFallback) { log( 'warn', args.logLevel, 'Whisper fallback is not configured; continuing with available subtitle tracks.', ); } else { try { await runExternalCommand( 'yt-dlp', [ '-f', 'bestaudio/best', '--extract-audio', '--audio-format', args.youtubeSubgenAudioFormat, '--no-warnings', '-o', path.join(tempDir, '%(id)s.%(ext)s'), target, ], { logLevel: args.logLevel, commandLabel: 'yt-dlp:audio', streamOutput: true, }, state.youtubeSubgenChildren, ); const audioPath = findAudioFile(tempDir, args.youtubeSubgenAudioFormat); if (!audioPath) { throw new Error('Audio extraction succeeded, but no audio file was found.'); } const whisperAudioPath = await convertAudioForWhisper(audioPath, tempDir); if (needsPrimaryWhisper) { try { const primaryPrefix = path.join(tempDir, `${basename}.${primaryLabel}`); const primarySrt = await runWhisper( whisperBin!, modelPath, whisperAudioPath, args.youtubeWhisperSourceLanguage, false, primaryPrefix, ); primaryAlias = await publishTrack('primary', 'whisper', primarySrt, basename); } catch (error) { log( 'warn', args.logLevel, `Failed to generate primary subtitle via whisper fallback: ${(error as Error).message}`, ); } } if (needsSecondaryWhisper) { try { const secondaryPrefix = path.join(tempDir, `${basename}.${secondaryLabel}`); const secondarySrt = await runWhisper( whisperBin!, modelPath, whisperAudioPath, args.youtubeWhisperSourceLanguage, true, secondaryPrefix, ); secondaryAlias = await publishTrack( 'secondary', 'whisper-translate', secondarySrt, basename, ); } catch (error) { log( 'warn', args.logLevel, `Failed to generate secondary subtitle via whisper fallback: ${(error as Error).message}`, ); } } } catch (error) { log( 'warn', args.logLevel, `Whisper fallback pipeline failed: ${(error as Error).message}`, ); } } } if (!secondaryCanUseWhisperTranslate && !selectedSecondary) { log( 'warn', args.logLevel, `Secondary subtitle language (${secondaryLabel}) has no whisper translate fallback; relying on yt-dlp subtitles only.`, ); } if (!primaryAlias && !secondaryAlias) { throw new Error('Failed to generate any subtitle tracks.'); } if (!primaryAlias || !secondaryAlias) { log( 'warn', args.logLevel, `Generated partial subtitle result: primary=${primaryAlias ? 'ok' : 'missing'}, secondary=${secondaryAlias ? 'ok' : 'missing'}`, ); } return { basename, primaryPath: primaryAlias || undefined, secondaryPath: secondaryAlias || undefined, }; } catch (error) { keepTemp = true; throw error; } finally { if (keepTemp) { log('warn', args.logLevel, `Keeping subtitle temp dir: ${tempDir}`); } else { try { fs.rmSync(tempDir, { recursive: true, force: true }); } catch { // ignore cleanup failures } } } }