This commit is contained in:
2026-02-17 22:50:57 -08:00
parent ffeef9c136
commit f20d019c11
315 changed files with 9876 additions and 12537 deletions

View File

@@ -1,17 +1,21 @@
import fs from "node:fs";
import path from "node:path";
import os from "node:os";
import type { Args, SubtitleCandidate, YoutubeSubgenOutputs } from "./types.js";
import { YOUTUBE_SUB_EXTENSIONS, YOUTUBE_AUDIO_EXTENSIONS } from "./types.js";
import { log } from "./log.js";
import fs from 'node:fs';
import path from 'node:path';
import os from 'node:os';
import type { Args, SubtitleCandidate, YoutubeSubgenOutputs } from './types.js';
import { YOUTUBE_SUB_EXTENSIONS, YOUTUBE_AUDIO_EXTENSIONS } from './types.js';
import { log } from './log.js';
import {
resolvePathMaybe, uniqueNormalizedLangCodes,
escapeRegExp, normalizeBasename, runExternalCommand, commandExists,
} from "./util.js";
import { state } from "./mpv.js";
resolvePathMaybe,
uniqueNormalizedLangCodes,
escapeRegExp,
normalizeBasename,
runExternalCommand,
commandExists,
} from './util.js';
import { state } from './mpv.js';
function toYtdlpLangPattern(langCodes: string[]): string {
return langCodes.map((lang) => `${lang}.*`).join(",");
return langCodes.map((lang) => `${lang}.*`).join(',');
}
function filenameHasLanguageTag(filenameLower: string, langCode: string): boolean {
@@ -24,16 +28,12 @@ function classifyLanguage(
filename: string,
primaryLangCodes: string[],
secondaryLangCodes: string[],
): "primary" | "secondary" | null {
): 'primary' | 'secondary' | null {
const lower = filename.toLowerCase();
const primary = primaryLangCodes.some((code) =>
filenameHasLanguageTag(lower, code),
);
const secondary = secondaryLangCodes.some((code) =>
filenameHasLanguageTag(lower, code),
);
if (primary && !secondary) return "primary";
if (secondary && !primary) return "secondary";
const primary = primaryLangCodes.some((code) => filenameHasLanguageTag(lower, code));
const secondary = secondaryLangCodes.some((code) => filenameHasLanguageTag(lower, code));
if (primary && !secondary) return 'primary';
if (secondary && !primary) return 'secondary';
return null;
}
@@ -41,20 +41,20 @@ function preferredLangLabel(langCodes: string[], fallback: string): string {
return uniqueNormalizedLangCodes(langCodes)[0] || fallback;
}
function sourceTag(source: SubtitleCandidate["source"]): string {
if (source === "manual" || source === "auto") return `ytdlp-${source}`;
if (source === "whisper-translate") return "whisper-translate";
return "whisper";
function sourceTag(source: SubtitleCandidate['source']): string {
if (source === 'manual' || source === 'auto') return `ytdlp-${source}`;
if (source === 'whisper-translate') return 'whisper-translate';
return 'whisper';
}
function pickBestCandidate(candidates: SubtitleCandidate[]): SubtitleCandidate | null {
if (candidates.length === 0) return null;
const scored = [...candidates].sort((a, b) => {
const sourceA = a.source === "manual" ? 1 : 0;
const sourceB = b.source === "manual" ? 1 : 0;
const sourceA = a.source === 'manual' ? 1 : 0;
const sourceB = b.source === 'manual' ? 1 : 0;
if (sourceA !== sourceB) return sourceB - sourceA;
const srtA = a.ext === ".srt" ? 1 : 0;
const srtB = b.ext === ".srt" ? 1 : 0;
const srtA = a.ext === '.srt' ? 1 : 0;
const srtB = b.ext === '.srt' ? 1 : 0;
if (srtA !== srtB) return srtB - srtA;
return b.size - a.size;
});
@@ -64,7 +64,7 @@ function pickBestCandidate(candidates: SubtitleCandidate[]): SubtitleCandidate |
function scanSubtitleCandidates(
tempDir: string,
knownSet: Set<string>,
source: "manual" | "auto",
source: 'manual' | 'auto',
primaryLangCodes: string[],
secondaryLangCodes: string[],
): SubtitleCandidate[] {
@@ -94,9 +94,9 @@ async function convertToSrt(
tempDir: string,
langLabel: string,
): Promise<string> {
if (path.extname(inputPath).toLowerCase() === ".srt") return inputPath;
if (path.extname(inputPath).toLowerCase() === '.srt') return inputPath;
const outputPath = path.join(tempDir, `converted.${langLabel}.srt`);
await runExternalCommand("ffmpeg", ["-y", "-loglevel", "error", "-i", inputPath, outputPath]);
await runExternalCommand('ffmpeg', ['-y', '-loglevel', 'error', '-i', inputPath, outputPath]);
return outputPath;
}
@@ -132,19 +132,19 @@ async function runWhisper(
outputPrefix: string,
): Promise<string> {
const args = [
"-m",
'-m',
modelPath,
"-f",
'-f',
audioPath,
"--output-srt",
"--output-file",
'--output-srt',
'--output-file',
outputPrefix,
"--language",
'--language',
language,
];
if (translate) args.push("--translate");
if (translate) args.push('--translate');
await runExternalCommand(whisperBin, args, {
commandLabel: "whisper",
commandLabel: 'whisper',
streamOutput: true,
});
const outputPath = `${outputPrefix}.srt`;
@@ -155,19 +155,19 @@ async function runWhisper(
}
async function convertAudioForWhisper(inputPath: string, tempDir: string): Promise<string> {
const wavPath = path.join(tempDir, "whisper-input.wav");
await runExternalCommand("ffmpeg", [
"-y",
"-loglevel",
"error",
"-i",
const wavPath = path.join(tempDir, 'whisper-input.wav');
await runExternalCommand('ffmpeg', [
'-y',
'-loglevel',
'error',
'-i',
inputPath,
"-ar",
"16000",
"-ac",
"1",
"-c:a",
"pcm_s16le",
'-ar',
'16000',
'-ac',
'1',
'-c:a',
'pcm_s16le',
wavPath,
]);
if (!fs.existsSync(wavPath)) {
@@ -179,65 +179,55 @@ async function convertAudioForWhisper(inputPath: string, tempDir: string): Promi
export function resolveWhisperBinary(args: Args): string | null {
const explicit = args.whisperBin.trim();
if (explicit) return resolvePathMaybe(explicit);
if (commandExists("whisper-cli")) return "whisper-cli";
if (commandExists('whisper-cli')) return 'whisper-cli';
return null;
}
export async function generateYoutubeSubtitles(
target: string,
args: Args,
onReady?: (lang: "primary" | "secondary", pathToLoad: string) => Promise<void>,
onReady?: (lang: 'primary' | 'secondary', pathToLoad: string) => Promise<void>,
): Promise<YoutubeSubgenOutputs> {
const outDir = path.resolve(resolvePathMaybe(args.youtubeSubgenOutDir));
fs.mkdirSync(outDir, { recursive: true });
const primaryLangCodes = uniqueNormalizedLangCodes(args.youtubePrimarySubLangs);
const secondaryLangCodes = uniqueNormalizedLangCodes(args.youtubeSecondarySubLangs);
const primaryLabel = preferredLangLabel(primaryLangCodes, "primary");
const secondaryLabel = preferredLangLabel(secondaryLangCodes, "secondary");
const primaryLabel = preferredLangLabel(primaryLangCodes, 'primary');
const secondaryLabel = preferredLangLabel(secondaryLangCodes, 'secondary');
const secondaryCanUseWhisperTranslate =
secondaryLangCodes.includes("en") || secondaryLangCodes.includes("eng");
const ytdlpManualLangs = toYtdlpLangPattern([
...primaryLangCodes,
...secondaryLangCodes,
]);
secondaryLangCodes.includes('en') || secondaryLangCodes.includes('eng');
const ytdlpManualLangs = toYtdlpLangPattern([...primaryLangCodes, ...secondaryLangCodes]);
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "subminer-yt-subgen-"));
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-yt-subgen-'));
const knownFiles = new Set<string>();
let keepTemp = args.youtubeSubgenKeepTemp;
const publishTrack = async (
lang: "primary" | "secondary",
source: SubtitleCandidate["source"],
lang: 'primary' | 'secondary',
source: SubtitleCandidate['source'],
selectedPath: string,
basename: string,
): Promise<string> => {
const langLabel = lang === "primary" ? primaryLabel : secondaryLabel;
const taggedPath = path.join(
outDir,
`${basename}.${langLabel}.${sourceTag(source)}.srt`,
);
const langLabel = lang === 'primary' ? primaryLabel : secondaryLabel;
const taggedPath = path.join(outDir, `${basename}.${langLabel}.${sourceTag(source)}.srt`);
const aliasPath = path.join(outDir, `${basename}.${langLabel}.srt`);
fs.copyFileSync(selectedPath, taggedPath);
fs.copyFileSync(taggedPath, aliasPath);
log(
"info",
args.logLevel,
`Generated subtitle (${langLabel}, ${source}) -> ${aliasPath}`,
);
log('info', args.logLevel, `Generated subtitle (${langLabel}, ${source}) -> ${aliasPath}`);
if (onReady) await onReady(lang, aliasPath);
return aliasPath;
};
try {
log("debug", args.logLevel, `YouTube subtitle temp dir: ${tempDir}`);
log('debug', args.logLevel, `YouTube subtitle temp dir: ${tempDir}`);
const meta = await runExternalCommand(
"yt-dlp",
["--dump-single-json", "--no-warnings", target],
'yt-dlp',
['--dump-single-json', '--no-warnings', target],
{
captureStdout: true,
logLevel: args.logLevel,
commandLabel: "yt-dlp:meta",
commandLabel: 'yt-dlp:meta',
},
state.youtubeSubgenChildren,
);
@@ -246,23 +236,23 @@ export async function generateYoutubeSubtitles(
const basename = normalizeBasename(videoId, videoId);
await runExternalCommand(
"yt-dlp",
'yt-dlp',
[
"--skip-download",
"--no-warnings",
"--write-subs",
"--sub-format",
"srt/vtt/best",
"--sub-langs",
'--skip-download',
'--no-warnings',
'--write-subs',
'--sub-format',
'srt/vtt/best',
'--sub-langs',
ytdlpManualLangs,
"-o",
path.join(tempDir, "%(id)s.%(ext)s"),
'-o',
path.join(tempDir, '%(id)s.%(ext)s'),
target,
],
{
allowFailure: true,
logLevel: args.logLevel,
commandLabel: "yt-dlp:manual-subs",
commandLabel: 'yt-dlp:manual-subs',
streamOutput: true,
},
state.youtubeSubgenChildren,
@@ -271,41 +261,37 @@ export async function generateYoutubeSubtitles(
const manualSubs = scanSubtitleCandidates(
tempDir,
knownFiles,
"manual",
'manual',
primaryLangCodes,
secondaryLangCodes,
);
for (const sub of manualSubs) knownFiles.add(sub.path);
let primaryCandidates = manualSubs.filter((entry) => entry.lang === "primary");
let secondaryCandidates = manualSubs.filter(
(entry) => entry.lang === "secondary",
);
let primaryCandidates = manualSubs.filter((entry) => entry.lang === 'primary');
let secondaryCandidates = manualSubs.filter((entry) => entry.lang === 'secondary');
const missingAuto: string[] = [];
if (primaryCandidates.length === 0)
missingAuto.push(toYtdlpLangPattern(primaryLangCodes));
if (secondaryCandidates.length === 0)
missingAuto.push(toYtdlpLangPattern(secondaryLangCodes));
if (primaryCandidates.length === 0) missingAuto.push(toYtdlpLangPattern(primaryLangCodes));
if (secondaryCandidates.length === 0) missingAuto.push(toYtdlpLangPattern(secondaryLangCodes));
if (missingAuto.length > 0) {
await runExternalCommand(
"yt-dlp",
'yt-dlp',
[
"--skip-download",
"--no-warnings",
"--write-auto-subs",
"--sub-format",
"srt/vtt/best",
"--sub-langs",
missingAuto.join(","),
"-o",
path.join(tempDir, "%(id)s.%(ext)s"),
'--skip-download',
'--no-warnings',
'--write-auto-subs',
'--sub-format',
'srt/vtt/best',
'--sub-langs',
missingAuto.join(','),
'-o',
path.join(tempDir, '%(id)s.%(ext)s'),
target,
],
{
allowFailure: true,
logLevel: args.logLevel,
commandLabel: "yt-dlp:auto-subs",
commandLabel: 'yt-dlp:auto-subs',
streamOutput: true,
},
state.youtubeSubgenChildren,
@@ -314,45 +300,31 @@ export async function generateYoutubeSubtitles(
const autoSubs = scanSubtitleCandidates(
tempDir,
knownFiles,
"auto",
'auto',
primaryLangCodes,
secondaryLangCodes,
);
for (const sub of autoSubs) knownFiles.add(sub.path);
primaryCandidates = primaryCandidates.concat(
autoSubs.filter((entry) => entry.lang === "primary"),
autoSubs.filter((entry) => entry.lang === 'primary'),
);
secondaryCandidates = secondaryCandidates.concat(
autoSubs.filter((entry) => entry.lang === "secondary"),
autoSubs.filter((entry) => entry.lang === 'secondary'),
);
}
let primaryAlias = "";
let secondaryAlias = "";
let primaryAlias = '';
let secondaryAlias = '';
const selectedPrimary = pickBestCandidate(primaryCandidates);
const selectedSecondary = pickBestCandidate(secondaryCandidates);
if (selectedPrimary) {
const srt = await convertToSrt(selectedPrimary.path, tempDir, primaryLabel);
primaryAlias = await publishTrack(
"primary",
selectedPrimary.source,
srt,
basename,
);
primaryAlias = await publishTrack('primary', selectedPrimary.source, srt, basename);
}
if (selectedSecondary) {
const srt = await convertToSrt(
selectedSecondary.path,
tempDir,
secondaryLabel,
);
secondaryAlias = await publishTrack(
"secondary",
selectedSecondary.source,
srt,
basename,
);
const srt = await convertToSrt(selectedSecondary.path, tempDir, secondaryLabel);
secondaryAlias = await publishTrack('secondary', selectedSecondary.source, srt, basename);
}
const needsPrimaryWhisper = !selectedPrimary;
@@ -361,40 +333,40 @@ export async function generateYoutubeSubtitles(
const whisperBin = resolveWhisperBinary(args);
const modelPath = args.whisperModel.trim()
? path.resolve(resolvePathMaybe(args.whisperModel.trim()))
: "";
: '';
const hasWhisperFallback = !!whisperBin && !!modelPath && fs.existsSync(modelPath);
if (!hasWhisperFallback) {
log(
"warn",
'warn',
args.logLevel,
"Whisper fallback is not configured; continuing with available subtitle tracks.",
'Whisper fallback is not configured; continuing with available subtitle tracks.',
);
} else {
try {
await runExternalCommand(
"yt-dlp",
'yt-dlp',
[
"-f",
"bestaudio/best",
"--extract-audio",
"--audio-format",
'-f',
'bestaudio/best',
'--extract-audio',
'--audio-format',
args.youtubeSubgenAudioFormat,
"--no-warnings",
"-o",
path.join(tempDir, "%(id)s.%(ext)s"),
'--no-warnings',
'-o',
path.join(tempDir, '%(id)s.%(ext)s'),
target,
],
{
logLevel: args.logLevel,
commandLabel: "yt-dlp:audio",
commandLabel: 'yt-dlp:audio',
streamOutput: true,
},
state.youtubeSubgenChildren,
);
const audioPath = findAudioFile(tempDir, args.youtubeSubgenAudioFormat);
if (!audioPath) {
throw new Error("Audio extraction succeeded, but no audio file was found.");
throw new Error('Audio extraction succeeded, but no audio file was found.');
}
const whisperAudioPath = await convertAudioForWhisper(audioPath, tempDir);
@@ -409,15 +381,10 @@ export async function generateYoutubeSubtitles(
false,
primaryPrefix,
);
primaryAlias = await publishTrack(
"primary",
"whisper",
primarySrt,
basename,
);
primaryAlias = await publishTrack('primary', 'whisper', primarySrt, basename);
} catch (error) {
log(
"warn",
'warn',
args.logLevel,
`Failed to generate primary subtitle via whisper fallback: ${(error as Error).message}`,
);
@@ -426,10 +393,7 @@ export async function generateYoutubeSubtitles(
if (needsSecondaryWhisper) {
try {
const secondaryPrefix = path.join(
tempDir,
`${basename}.${secondaryLabel}`,
);
const secondaryPrefix = path.join(tempDir, `${basename}.${secondaryLabel}`);
const secondarySrt = await runWhisper(
whisperBin!,
modelPath,
@@ -439,14 +403,14 @@ export async function generateYoutubeSubtitles(
secondaryPrefix,
);
secondaryAlias = await publishTrack(
"secondary",
"whisper-translate",
'secondary',
'whisper-translate',
secondarySrt,
basename,
);
} catch (error) {
log(
"warn",
'warn',
args.logLevel,
`Failed to generate secondary subtitle via whisper fallback: ${(error as Error).message}`,
);
@@ -454,7 +418,7 @@ export async function generateYoutubeSubtitles(
}
} catch (error) {
log(
"warn",
'warn',
args.logLevel,
`Whisper fallback pipeline failed: ${(error as Error).message}`,
);
@@ -464,20 +428,20 @@ export async function generateYoutubeSubtitles(
if (!secondaryCanUseWhisperTranslate && !selectedSecondary) {
log(
"warn",
'warn',
args.logLevel,
`Secondary subtitle language (${secondaryLabel}) has no whisper translate fallback; relying on yt-dlp subtitles only.`,
);
}
if (!primaryAlias && !secondaryAlias) {
throw new Error("Failed to generate any subtitle tracks.");
throw new Error('Failed to generate any subtitle tracks.');
}
if (!primaryAlias || !secondaryAlias) {
log(
"warn",
'warn',
args.logLevel,
`Generated partial subtitle result: primary=${primaryAlias ? "ok" : "missing"}, secondary=${secondaryAlias ? "ok" : "missing"}`,
`Generated partial subtitle result: primary=${primaryAlias ? 'ok' : 'missing'}, secondary=${secondaryAlias ? 'ok' : 'missing'}`,
);
}
@@ -491,7 +455,7 @@ export async function generateYoutubeSubtitles(
throw error;
} finally {
if (keepTemp) {
log("warn", args.logLevel, `Keeping subtitle temp dir: ${tempDir}`);
log('warn', args.logLevel, `Keeping subtitle temp dir: ${tempDir}`);
} else {
try {
fs.rmSync(tempDir, { recursive: true, force: true });