fix: address CodeRabbit review feedback

This commit is contained in:
2026-03-22 19:37:49 -07:00
parent 8da3a26855
commit d65575c80d
33 changed files with 678 additions and 67 deletions

View File

@@ -0,0 +1 @@
export type YoutubeTrackKind = 'manual' | 'auto';

View File

@@ -1,4 +1,6 @@
export type YoutubeTrackKind = 'manual' | 'auto';
import type { YoutubeTrackKind } from './kinds';
export type { YoutubeTrackKind };
export function normalizeYoutubeLangCode(value: string): string {
return value.trim().toLowerCase().replace(/_/g, '-').replace(/[^a-z0-9-]+/g, '');
@@ -37,4 +39,3 @@ export function formatYoutubeTrackLabel(input: {
const base = input.title?.trim() || language;
return `${base} (${input.kind})`;
}

View File

@@ -6,10 +6,6 @@ import path from 'node:path';
import { retimeYoutubeSubtitle } from './retime';
test('retimeYoutubeSubtitle uses the downloaded subtitle path as-is', async () => {
if (process.platform === 'win32') {
return;
}
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-youtube-retime-'));
try {
const primaryPath = path.join(root, 'primary.vtt');

View File

@@ -1,7 +1,7 @@
export async function retimeYoutubeSubtitle(input: {
primaryPath: string;
secondaryPath: string | null;
}): Promise<{ ok: boolean; path: string; strategy: 'none'; message: string }> {
}): Promise<{ ok: boolean; path: string; strategy: 'none' | 'alass' | 'ffsubsync'; message: string }> {
return {
ok: true,
path: input.primaryPath,

View File

@@ -74,16 +74,31 @@ export function convertYoutubeTimedTextToVtt(xml: string): string {
return 'WEBVTT\n';
}
const blocks = rows.map((row, index) => {
const blocks: string[] = [];
let previousText = '';
for (let index = 0; index < rows.length; index += 1) {
const row = rows[index]!;
const nextRow = rows[index + 1];
const unclampedEnd = row.startMs + row.durationMs;
const clampedEnd =
nextRow && unclampedEnd > nextRow.startMs
? Math.max(row.startMs, nextRow.startMs - 1)
: unclampedEnd;
if (clampedEnd <= row.startMs) {
previousText = row.text;
continue;
}
return `${formatVttTimestamp(row.startMs)} --> ${formatVttTimestamp(clampedEnd)}\n${row.text}`;
});
const text =
previousText && row.text.startsWith(previousText)
? row.text.slice(previousText.length).trimStart()
: row.text;
previousText = row.text;
if (!text) {
continue;
}
blocks.push(`${formatVttTimestamp(row.startMs)} --> ${formatVttTimestamp(clampedEnd)}\n${text}`);
}
return `WEBVTT\n\n${blocks.join('\n\n')}\n`;
}

View File

@@ -470,3 +470,48 @@ test('downloadYoutubeSubtitleTracks prefers direct download URLs when available'
);
});
});
test('downloadYoutubeSubtitleTracks keeps duplicate source-language direct downloads distinct', async () => {
await withTempDir(async (root) => {
const seen: string[] = [];
await withStubFetch(
async (url) => {
seen.push(url);
return new Response(`WEBVTT\n${url}\n`, { status: 200 });
},
async () => {
const result = await downloadYoutubeSubtitleTracks({
targetUrl: 'https://www.youtube.com/watch?v=abc123',
outputDir: path.join(root, 'out'),
tracks: [
{
id: 'auto:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'auto',
label: 'Japanese (auto)',
downloadUrl: 'https://example.com/subs/ja-auto.vtt',
fileExtension: 'vtt',
},
{
id: 'manual:ja-orig',
language: 'ja',
sourceLanguage: 'ja-orig',
kind: 'manual',
label: 'Japanese (manual)',
downloadUrl: 'https://example.com/subs/ja-manual.vtt',
fileExtension: 'vtt',
},
],
mode: 'download',
});
assert.deepEqual(seen, [
'https://example.com/subs/ja-auto.vtt',
'https://example.com/subs/ja-manual.vtt',
]);
assert.notEqual(result.get('auto:ja-orig'), result.get('manual:ja-orig'));
},
);
});
});

View File

@@ -7,12 +7,28 @@ import { convertYoutubeTimedTextToVtt, isYoutubeTimedTextExtension } from './tim
const YOUTUBE_SUBTITLE_EXTENSIONS = new Set(['.srt', '.vtt', '.ass']);
const YOUTUBE_BATCH_PREFIX = 'youtube-batch';
const YOUTUBE_DOWNLOAD_TIMEOUT_MS = 15_000;
function runCapture(command: string, args: string[]): Promise<{ stdout: string; stderr: string }> {
function createFetchTimeoutSignal(timeoutMs: number): AbortSignal | undefined {
if (typeof AbortSignal !== 'undefined' && typeof AbortSignal.timeout === 'function') {
return AbortSignal.timeout(timeoutMs);
}
return undefined;
}
function runCapture(
command: string,
args: string[],
timeoutMs = YOUTUBE_DOWNLOAD_TIMEOUT_MS,
): Promise<{ stdout: string; stderr: string }> {
return new Promise((resolve, reject) => {
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
let stdout = '';
let stderr = '';
const timer = setTimeout(() => {
proc.kill();
reject(new Error(`yt-dlp timed out after ${timeoutMs}ms`));
}, timeoutMs);
proc.stdout.setEncoding('utf8');
proc.stderr.setEncoding('utf8');
proc.stdout.on('data', (chunk) => {
@@ -21,8 +37,12 @@ function runCapture(command: string, args: string[]): Promise<{ stdout: string;
proc.stderr.on('data', (chunk) => {
stderr += String(chunk);
});
proc.once('error', reject);
proc.once('error', (error) => {
clearTimeout(timer);
reject(error);
});
proc.once('close', (code) => {
clearTimeout(timer);
if (code === 0) {
resolve({ stdout, stderr });
return;
@@ -35,11 +55,16 @@ function runCapture(command: string, args: string[]): Promise<{ stdout: string;
function runCaptureDetailed(
command: string,
args: string[],
timeoutMs = YOUTUBE_DOWNLOAD_TIMEOUT_MS,
): Promise<{ stdout: string; stderr: string; code: number }> {
return new Promise((resolve, reject) => {
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
let stdout = '';
let stderr = '';
const timer = setTimeout(() => {
proc.kill();
reject(new Error(`yt-dlp timed out after ${timeoutMs}ms`));
}, timeoutMs);
proc.stdout.setEncoding('utf8');
proc.stderr.setEncoding('utf8');
proc.stdout.on('data', (chunk) => {
@@ -48,8 +73,12 @@ function runCaptureDetailed(
proc.stderr.on('data', (chunk) => {
stderr += String(chunk);
});
proc.once('error', reject);
proc.once('error', (error) => {
clearTimeout(timer);
reject(error);
});
proc.once('close', (code) => {
clearTimeout(timer);
resolve({ stdout, stderr, code: code ?? 1 });
});
});
@@ -125,8 +154,13 @@ async function downloadSubtitleFromUrl(input: {
: YOUTUBE_SUBTITLE_EXTENSIONS.has(`.${ext}`)
? ext
: 'vtt';
const targetPath = path.join(input.outputDir, `${input.prefix}.${input.track.sourceLanguage}.${safeExt}`);
const response = await fetch(input.track.downloadUrl);
const targetPath = path.join(
input.outputDir,
`${input.prefix}.${input.track.sourceLanguage}.${safeExt}`,
);
const response = await fetch(input.track.downloadUrl, {
signal: createFetchTimeoutSignal(YOUTUBE_DOWNLOAD_TIMEOUT_MS),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status} while downloading ${input.track.sourceLanguage}`);
}
@@ -195,6 +229,8 @@ export async function downloadYoutubeSubtitleTracks(input: {
mode: YoutubeFlowMode;
}): Promise<Map<string, string>> {
fs.mkdirSync(input.outputDir, { recursive: true });
const hasDuplicateSourceLanguages =
new Set(input.tracks.map((track) => track.sourceLanguage)).size !== input.tracks.length;
for (const name of fs.readdirSync(input.outputDir)) {
if (name.startsWith(`${YOUTUBE_BATCH_PREFIX}.`)) {
try {
@@ -204,12 +240,12 @@ export async function downloadYoutubeSubtitleTracks(input: {
}
}
}
if (input.tracks.every(canDownloadSubtitleFromUrl)) {
if (hasDuplicateSourceLanguages || input.tracks.every(canDownloadSubtitleFromUrl)) {
const results = new Map<string, string>();
for (const track of input.tracks) {
const download = await downloadSubtitleFromUrl({
outputDir: input.outputDir,
prefix: YOUTUBE_BATCH_PREFIX,
prefix: track.id.replace(/[^a-z0-9_-]+/gi, '-'),
track,
});
results.set(track.id, download.path);

View File

@@ -16,11 +16,15 @@ async function withTempDir<T>(fn: (dir: string) => Promise<T>): Promise<T> {
function makeFakeYtDlpScript(dir: string, payload: unknown): void {
const scriptPath = path.join(dir, 'yt-dlp');
const stdoutBody = typeof payload === 'string' ? payload : JSON.stringify(payload);
const script = `#!/usr/bin/env node
process.stdout.write(${JSON.stringify(JSON.stringify(payload))});
process.stdout.write(${JSON.stringify(stdoutBody)});
`;
fs.writeFileSync(scriptPath, script, 'utf8');
fs.chmodSync(scriptPath, 0o755);
if (process.platform !== 'win32') {
fs.chmodSync(scriptPath, 0o755);
}
fs.writeFileSync(scriptPath + '.cmd', `@echo off\r\nnode "${scriptPath}"\r\n`, 'utf8');
}
async function withFakeYtDlp<T>(payload: unknown, fn: () => Promise<T>): Promise<T> {
@@ -78,3 +82,12 @@ test('probeYoutubeTracks keeps preferring srt for manual captions', async () =>
},
);
});
test('probeYoutubeTracks reports malformed yt-dlp JSON with context', async () => {
await withFakeYtDlp('not-json', async () => {
await assert.rejects(
async () => await probeYoutubeTracks('https://www.youtube.com/watch?v=abc123'),
/Failed to parse yt-dlp output as JSON/,
);
});
});

View File

@@ -1,10 +1,6 @@
import { spawn } from 'node:child_process';
import type { YoutubeTrackOption } from '../../../types';
import {
formatYoutubeTrackLabel,
normalizeYoutubeLangCode,
type YoutubeTrackKind,
} from './labels';
import { formatYoutubeTrackLabel, normalizeYoutubeLangCode, type YoutubeTrackKind } from './labels';
export type YoutubeTrackProbeResult = {
videoId: string;
@@ -102,7 +98,21 @@ export type { YoutubeTrackOption };
export async function probeYoutubeTracks(targetUrl: string): Promise<YoutubeTrackProbeResult> {
const { stdout } = await runCapture('yt-dlp', ['--dump-single-json', '--no-warnings', targetUrl]);
const info = JSON.parse(stdout) as YtDlpInfo;
const trimmedStdout = stdout.trim();
if (!trimmedStdout) {
throw new Error('yt-dlp returned empty output while probing subtitle tracks');
}
let info: YtDlpInfo;
try {
info = JSON.parse(trimmedStdout) as YtDlpInfo;
} catch (error) {
const snippet = trimmedStdout.slice(0, 200);
throw new Error(
`Failed to parse yt-dlp output as JSON: ${
error instanceof Error ? error.message : String(error)
}${snippet ? `; stdout=${snippet}` : ''}`,
);
}
const tracks = [...toTracks(info.subtitles, 'manual'), ...toTracks(info.automatic_captions, 'auto')];
return {
videoId: info.id || '',