refactor(youtube): extract subtitle generation pipeline

This commit is contained in:
2026-03-08 16:10:56 -07:00
parent 9e46176519
commit 4c0575afe0
11 changed files with 876 additions and 467 deletions

View File

@@ -0,0 +1,367 @@
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import type { Args, SubtitleCandidate, YoutubeSubgenOutputs } from '../types.js';
import { log } from '../log.js';
import {
commandExists,
normalizeBasename,
resolvePathMaybe,
runExternalCommand,
uniqueNormalizedLangCodes,
} from '../util.js';
import { state } from '../mpv.js';
import { downloadYoutubeAudio, convertAudioForWhisper } from './audio-extraction.js';
import {
downloadManualSubtitles,
pickBestCandidate,
scanSubtitleCandidates,
toYtdlpLangPattern,
} from './manual-subs.js';
import { runLoggedYoutubePhase } from './progress.js';
import { fixSubtitleWithAi } from './subtitle-fix-ai.js';
import { runWhisper } from './whisper.js';
export interface YoutubeSubtitleGenerationPlan {
fetchManualSubtitles: true;
fetchAutoSubtitles: false;
publishPrimaryManualSubtitle: false;
publishSecondaryManualSubtitle: false;
generatePrimarySubtitle: boolean;
generateSecondarySubtitle: boolean;
}
export function planYoutubeSubtitleGeneration(input: {
hasPrimaryManualSubtitle: boolean;
hasSecondaryManualSubtitle: boolean;
secondaryCanTranslate: boolean;
}): YoutubeSubtitleGenerationPlan {
return {
fetchManualSubtitles: true,
fetchAutoSubtitles: false,
publishPrimaryManualSubtitle: false,
publishSecondaryManualSubtitle: false,
generatePrimarySubtitle: !input.hasPrimaryManualSubtitle,
generateSecondarySubtitle: !input.hasSecondaryManualSubtitle && input.secondaryCanTranslate,
};
}
function preferredLangLabel(langCodes: string[], fallback: string): string {
return uniqueNormalizedLangCodes(langCodes)[0] || fallback;
}
function sourceTag(source: SubtitleCandidate['source']): string {
return source;
}
export function resolveWhisperBinary(args: Args): string | null {
const explicit = args.whisperBin.trim();
if (explicit) return resolvePathMaybe(explicit);
if (commandExists('whisper-cli')) return 'whisper-cli';
return null;
}
async function maybeFixSubtitleWithAi(
selectedPath: string,
args: Args,
expectedLanguage?: string,
): Promise<string> {
if (!args.youtubeFixWithAi || args.aiConfig.enabled !== true) {
return selectedPath;
}
const fixedContent = await runLoggedYoutubePhase(
{
startMessage: `Starting AI subtitle fix: ${path.basename(selectedPath)}`,
finishMessage: `Finished AI subtitle fix: ${path.basename(selectedPath)}`,
failureMessage: `AI subtitle fix failed: ${path.basename(selectedPath)}`,
log: (level, message) => log(level, args.logLevel, message),
},
async () => {
const originalContent = fs.readFileSync(selectedPath, 'utf8');
return fixSubtitleWithAi(
originalContent,
args.aiConfig,
(message) => {
log('warn', args.logLevel, message);
},
expectedLanguage,
);
},
);
if (!fixedContent) {
return selectedPath;
}
const fixedPath = selectedPath.replace(/\.srt$/i, '.fixed.srt');
fs.writeFileSync(fixedPath, fixedContent, 'utf8');
return fixedPath;
}
export async function generateYoutubeSubtitles(
target: string,
args: Args,
onReady?: (lang: 'primary' | 'secondary', pathToLoad: string) => Promise<void>,
): Promise<YoutubeSubgenOutputs> {
const outDir = path.resolve(resolvePathMaybe(args.youtubeSubgenOutDir));
fs.mkdirSync(outDir, { recursive: true });
const primaryLangCodes = uniqueNormalizedLangCodes(args.youtubePrimarySubLangs);
const secondaryLangCodes = uniqueNormalizedLangCodes(args.youtubeSecondarySubLangs);
const primaryLabel = preferredLangLabel(primaryLangCodes, 'primary');
const secondaryLabel = preferredLangLabel(secondaryLangCodes, 'secondary');
const secondaryCanUseWhisperTranslate =
secondaryLangCodes.includes('en') || secondaryLangCodes.includes('eng');
const manualLangs = toYtdlpLangPattern([...primaryLangCodes, ...secondaryLangCodes]);
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-yt-subgen-'));
const knownFiles = new Set<string>();
let keepTemp = args.youtubeSubgenKeepTemp;
const publishTrack = async (
lang: 'primary' | 'secondary',
source: SubtitleCandidate['source'],
selectedPath: string,
basename: string,
): Promise<string> => {
const langLabel = lang === 'primary' ? primaryLabel : secondaryLabel;
const taggedPath = path.join(outDir, `${basename}.${langLabel}.${sourceTag(source)}.srt`);
const aliasPath = path.join(outDir, `${basename}.${langLabel}.srt`);
fs.copyFileSync(selectedPath, taggedPath);
fs.copyFileSync(taggedPath, aliasPath);
log('info', args.logLevel, `Generated subtitle (${langLabel}, ${source}) -> ${aliasPath}`);
if (onReady) await onReady(lang, aliasPath);
return aliasPath;
};
try {
const meta = await runLoggedYoutubePhase(
{
startMessage: 'Starting YouTube metadata probe',
finishMessage: 'Finished YouTube metadata probe',
failureMessage: 'YouTube metadata probe failed',
log: (level, message) => log(level, args.logLevel, message),
},
() =>
runExternalCommand(
'yt-dlp',
['--dump-single-json', '--no-warnings', target],
{
captureStdout: true,
logLevel: args.logLevel,
commandLabel: 'yt-dlp:meta',
},
state.youtubeSubgenChildren,
),
);
const metadata = JSON.parse(meta.stdout) as { id?: string };
const videoId = metadata.id || `${Date.now()}`;
const basename = normalizeBasename(videoId, videoId);
await runLoggedYoutubePhase(
{
startMessage: `Starting manual subtitle probe (${manualLangs || 'requested langs'})`,
finishMessage: 'Finished manual subtitle probe',
failureMessage: 'Manual subtitle probe failed',
log: (level, message) => log(level, args.logLevel, message),
},
() =>
downloadManualSubtitles(
target,
tempDir,
manualLangs,
args.logLevel,
state.youtubeSubgenChildren,
),
);
const manualSubs = scanSubtitleCandidates(
tempDir,
knownFiles,
'manual',
primaryLangCodes,
secondaryLangCodes,
);
for (const sub of manualSubs) knownFiles.add(sub.path);
const selectedPrimary = pickBestCandidate(
manualSubs.filter((entry) => entry.lang === 'primary'),
);
const selectedSecondary = pickBestCandidate(
manualSubs.filter((entry) => entry.lang === 'secondary'),
);
const plan = planYoutubeSubtitleGeneration({
hasPrimaryManualSubtitle: Boolean(selectedPrimary),
hasSecondaryManualSubtitle: Boolean(selectedSecondary),
secondaryCanTranslate: secondaryCanUseWhisperTranslate,
});
let primaryAlias = '';
let secondaryAlias = '';
if (selectedPrimary) {
log(
'info',
args.logLevel,
`Using native YouTube subtitle track for primary (${primaryLabel}); skipping external subtitle copy.`,
);
}
if (selectedSecondary) {
log(
'info',
args.logLevel,
`Using native YouTube subtitle track for secondary (${secondaryLabel}); skipping external subtitle copy.`,
);
}
if (plan.generatePrimarySubtitle || plan.generateSecondarySubtitle) {
const whisperBin = resolveWhisperBinary(args);
const modelPath = args.whisperModel.trim()
? path.resolve(resolvePathMaybe(args.whisperModel.trim()))
: '';
const hasWhisperFallback = !!whisperBin && !!modelPath && fs.existsSync(modelPath);
if (!hasWhisperFallback) {
log(
'warn',
args.logLevel,
'Whisper fallback is not configured; continuing with available subtitle tracks.',
);
} else {
const audioPath = await runLoggedYoutubePhase(
{
startMessage: 'Starting fallback audio extraction for subtitle generation',
finishMessage: 'Finished fallback audio extraction',
failureMessage: 'Fallback audio extraction failed',
log: (level, message) => log(level, args.logLevel, message),
},
() =>
downloadYoutubeAudio(target, args, tempDir, state.youtubeSubgenChildren),
);
const whisperAudioPath = await runLoggedYoutubePhase(
{
startMessage: 'Starting ffmpeg audio prep for whisper',
finishMessage: 'Finished ffmpeg audio prep for whisper',
failureMessage: 'ffmpeg audio prep for whisper failed',
log: (level, message) => log(level, args.logLevel, message),
},
() => convertAudioForWhisper(audioPath, tempDir),
);
if (plan.generatePrimarySubtitle) {
try {
const primaryPrefix = path.join(tempDir, `${basename}.${primaryLabel}`);
const primarySrt = await runLoggedYoutubePhase(
{
startMessage: `Starting whisper primary subtitle generation (${primaryLabel})`,
finishMessage: `Finished whisper primary subtitle generation (${primaryLabel})`,
failureMessage: `Whisper primary subtitle generation failed (${primaryLabel})`,
log: (level, message) => log(level, args.logLevel, message),
},
() =>
runWhisper(whisperBin!, args, {
modelPath,
audioPath: whisperAudioPath,
language: args.youtubeWhisperSourceLanguage,
translate: false,
outputPrefix: primaryPrefix,
}),
);
const fixedPrimary = await maybeFixSubtitleWithAi(
primarySrt,
args,
args.youtubeWhisperSourceLanguage,
);
primaryAlias = await publishTrack(
'primary',
fixedPrimary === primarySrt ? 'whisper' : 'whisper-fixed',
fixedPrimary,
basename,
);
} catch (error) {
log(
'warn',
args.logLevel,
`Failed to generate primary subtitle via whisper fallback: ${(error as Error).message}`,
);
}
}
if (plan.generateSecondarySubtitle) {
try {
const secondaryPrefix = path.join(tempDir, `${basename}.${secondaryLabel}`);
const secondarySrt = await runLoggedYoutubePhase(
{
startMessage: `Starting whisper secondary subtitle generation (${secondaryLabel})`,
finishMessage: `Finished whisper secondary subtitle generation (${secondaryLabel})`,
failureMessage: `Whisper secondary subtitle generation failed (${secondaryLabel})`,
log: (level, message) => log(level, args.logLevel, message),
},
() =>
runWhisper(whisperBin!, args, {
modelPath,
audioPath: whisperAudioPath,
language: args.youtubeWhisperSourceLanguage,
translate: true,
outputPrefix: secondaryPrefix,
}),
);
const fixedSecondary = await maybeFixSubtitleWithAi(secondarySrt, args);
secondaryAlias = await publishTrack(
'secondary',
fixedSecondary === secondarySrt ? 'whisper-translate' : 'whisper-translate-fixed',
fixedSecondary,
basename,
);
} catch (error) {
log(
'warn',
args.logLevel,
`Failed to generate secondary subtitle via whisper fallback: ${(error as Error).message}`,
);
}
}
}
}
if (!secondaryCanUseWhisperTranslate && !selectedSecondary) {
log(
'warn',
args.logLevel,
`Secondary subtitle language (${secondaryLabel}) has no whisper translate fallback; relying on manual subtitles only.`,
);
}
if (!primaryAlias && !secondaryAlias && !selectedPrimary && !selectedSecondary) {
throw new Error('Failed to generate any subtitle tracks.');
}
if ((!primaryAlias && !selectedPrimary) || (!secondaryAlias && !selectedSecondary)) {
log(
'warn',
args.logLevel,
`Generated partial subtitle result: primary=${primaryAlias || selectedPrimary ? 'ok' : 'missing'}, secondary=${secondaryAlias || selectedSecondary ? 'ok' : 'missing'}`,
);
}
return {
basename,
primaryPath: primaryAlias || undefined,
secondaryPath: secondaryAlias || undefined,
primaryNative: Boolean(selectedPrimary),
secondaryNative: Boolean(selectedSecondary),
};
} catch (error) {
keepTemp = true;
throw error;
} finally {
if (keepTemp) {
log('warn', args.logLevel, `Keeping subtitle temp dir: ${tempDir}`);
} else {
try {
fs.rmSync(tempDir, { recursive: true, force: true });
} catch {
// ignore cleanup failures
}
}
}
}