mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-23 00:11:28 -07:00
fix: address CodeRabbit review feedback
This commit is contained in:
1
src/core/services/youtube/kinds.ts
Normal file
1
src/core/services/youtube/kinds.ts
Normal file
@@ -0,0 +1 @@
|
||||
export type YoutubeTrackKind = 'manual' | 'auto';
|
||||
@@ -1,4 +1,6 @@
|
||||
export type YoutubeTrackKind = 'manual' | 'auto';
|
||||
import type { YoutubeTrackKind } from './kinds';
|
||||
|
||||
export type { YoutubeTrackKind };
|
||||
|
||||
export function normalizeYoutubeLangCode(value: string): string {
|
||||
return value.trim().toLowerCase().replace(/_/g, '-').replace(/[^a-z0-9-]+/g, '');
|
||||
@@ -37,4 +39,3 @@ export function formatYoutubeTrackLabel(input: {
|
||||
const base = input.title?.trim() || language;
|
||||
return `${base} (${input.kind})`;
|
||||
}
|
||||
|
||||
|
||||
@@ -6,10 +6,6 @@ import path from 'node:path';
|
||||
import { retimeYoutubeSubtitle } from './retime';
|
||||
|
||||
test('retimeYoutubeSubtitle uses the downloaded subtitle path as-is', async () => {
|
||||
if (process.platform === 'win32') {
|
||||
return;
|
||||
}
|
||||
|
||||
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-youtube-retime-'));
|
||||
try {
|
||||
const primaryPath = path.join(root, 'primary.vtt');
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
export async function retimeYoutubeSubtitle(input: {
|
||||
primaryPath: string;
|
||||
secondaryPath: string | null;
|
||||
}): Promise<{ ok: boolean; path: string; strategy: 'none'; message: string }> {
|
||||
}): Promise<{ ok: boolean; path: string; strategy: 'none' | 'alass' | 'ffsubsync'; message: string }> {
|
||||
return {
|
||||
ok: true,
|
||||
path: input.primaryPath,
|
||||
|
||||
@@ -74,16 +74,31 @@ export function convertYoutubeTimedTextToVtt(xml: string): string {
|
||||
return 'WEBVTT\n';
|
||||
}
|
||||
|
||||
const blocks = rows.map((row, index) => {
|
||||
const blocks: string[] = [];
|
||||
let previousText = '';
|
||||
for (let index = 0; index < rows.length; index += 1) {
|
||||
const row = rows[index]!;
|
||||
const nextRow = rows[index + 1];
|
||||
const unclampedEnd = row.startMs + row.durationMs;
|
||||
const clampedEnd =
|
||||
nextRow && unclampedEnd > nextRow.startMs
|
||||
? Math.max(row.startMs, nextRow.startMs - 1)
|
||||
: unclampedEnd;
|
||||
if (clampedEnd <= row.startMs) {
|
||||
previousText = row.text;
|
||||
continue;
|
||||
}
|
||||
|
||||
return `${formatVttTimestamp(row.startMs)} --> ${formatVttTimestamp(clampedEnd)}\n${row.text}`;
|
||||
});
|
||||
const text =
|
||||
previousText && row.text.startsWith(previousText)
|
||||
? row.text.slice(previousText.length).trimStart()
|
||||
: row.text;
|
||||
previousText = row.text;
|
||||
if (!text) {
|
||||
continue;
|
||||
}
|
||||
blocks.push(`${formatVttTimestamp(row.startMs)} --> ${formatVttTimestamp(clampedEnd)}\n${text}`);
|
||||
}
|
||||
|
||||
return `WEBVTT\n\n${blocks.join('\n\n')}\n`;
|
||||
}
|
||||
|
||||
@@ -470,3 +470,48 @@ test('downloadYoutubeSubtitleTracks prefers direct download URLs when available'
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
test('downloadYoutubeSubtitleTracks keeps duplicate source-language direct downloads distinct', async () => {
|
||||
await withTempDir(async (root) => {
|
||||
const seen: string[] = [];
|
||||
await withStubFetch(
|
||||
async (url) => {
|
||||
seen.push(url);
|
||||
return new Response(`WEBVTT\n${url}\n`, { status: 200 });
|
||||
},
|
||||
async () => {
|
||||
const result = await downloadYoutubeSubtitleTracks({
|
||||
targetUrl: 'https://www.youtube.com/watch?v=abc123',
|
||||
outputDir: path.join(root, 'out'),
|
||||
tracks: [
|
||||
{
|
||||
id: 'auto:ja-orig',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja-orig',
|
||||
kind: 'auto',
|
||||
label: 'Japanese (auto)',
|
||||
downloadUrl: 'https://example.com/subs/ja-auto.vtt',
|
||||
fileExtension: 'vtt',
|
||||
},
|
||||
{
|
||||
id: 'manual:ja-orig',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja-orig',
|
||||
kind: 'manual',
|
||||
label: 'Japanese (manual)',
|
||||
downloadUrl: 'https://example.com/subs/ja-manual.vtt',
|
||||
fileExtension: 'vtt',
|
||||
},
|
||||
],
|
||||
mode: 'download',
|
||||
});
|
||||
|
||||
assert.deepEqual(seen, [
|
||||
'https://example.com/subs/ja-auto.vtt',
|
||||
'https://example.com/subs/ja-manual.vtt',
|
||||
]);
|
||||
assert.notEqual(result.get('auto:ja-orig'), result.get('manual:ja-orig'));
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -7,12 +7,28 @@ import { convertYoutubeTimedTextToVtt, isYoutubeTimedTextExtension } from './tim
|
||||
|
||||
const YOUTUBE_SUBTITLE_EXTENSIONS = new Set(['.srt', '.vtt', '.ass']);
|
||||
const YOUTUBE_BATCH_PREFIX = 'youtube-batch';
|
||||
const YOUTUBE_DOWNLOAD_TIMEOUT_MS = 15_000;
|
||||
|
||||
function runCapture(command: string, args: string[]): Promise<{ stdout: string; stderr: string }> {
|
||||
function createFetchTimeoutSignal(timeoutMs: number): AbortSignal | undefined {
|
||||
if (typeof AbortSignal !== 'undefined' && typeof AbortSignal.timeout === 'function') {
|
||||
return AbortSignal.timeout(timeoutMs);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function runCapture(
|
||||
command: string,
|
||||
args: string[],
|
||||
timeoutMs = YOUTUBE_DOWNLOAD_TIMEOUT_MS,
|
||||
): Promise<{ stdout: string; stderr: string }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
const timer = setTimeout(() => {
|
||||
proc.kill();
|
||||
reject(new Error(`yt-dlp timed out after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
proc.stdout.setEncoding('utf8');
|
||||
proc.stderr.setEncoding('utf8');
|
||||
proc.stdout.on('data', (chunk) => {
|
||||
@@ -21,8 +37,12 @@ function runCapture(command: string, args: string[]): Promise<{ stdout: string;
|
||||
proc.stderr.on('data', (chunk) => {
|
||||
stderr += String(chunk);
|
||||
});
|
||||
proc.once('error', reject);
|
||||
proc.once('error', (error) => {
|
||||
clearTimeout(timer);
|
||||
reject(error);
|
||||
});
|
||||
proc.once('close', (code) => {
|
||||
clearTimeout(timer);
|
||||
if (code === 0) {
|
||||
resolve({ stdout, stderr });
|
||||
return;
|
||||
@@ -35,11 +55,16 @@ function runCapture(command: string, args: string[]): Promise<{ stdout: string;
|
||||
function runCaptureDetailed(
|
||||
command: string,
|
||||
args: string[],
|
||||
timeoutMs = YOUTUBE_DOWNLOAD_TIMEOUT_MS,
|
||||
): Promise<{ stdout: string; stderr: string; code: number }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
const timer = setTimeout(() => {
|
||||
proc.kill();
|
||||
reject(new Error(`yt-dlp timed out after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
proc.stdout.setEncoding('utf8');
|
||||
proc.stderr.setEncoding('utf8');
|
||||
proc.stdout.on('data', (chunk) => {
|
||||
@@ -48,8 +73,12 @@ function runCaptureDetailed(
|
||||
proc.stderr.on('data', (chunk) => {
|
||||
stderr += String(chunk);
|
||||
});
|
||||
proc.once('error', reject);
|
||||
proc.once('error', (error) => {
|
||||
clearTimeout(timer);
|
||||
reject(error);
|
||||
});
|
||||
proc.once('close', (code) => {
|
||||
clearTimeout(timer);
|
||||
resolve({ stdout, stderr, code: code ?? 1 });
|
||||
});
|
||||
});
|
||||
@@ -125,8 +154,13 @@ async function downloadSubtitleFromUrl(input: {
|
||||
: YOUTUBE_SUBTITLE_EXTENSIONS.has(`.${ext}`)
|
||||
? ext
|
||||
: 'vtt';
|
||||
const targetPath = path.join(input.outputDir, `${input.prefix}.${input.track.sourceLanguage}.${safeExt}`);
|
||||
const response = await fetch(input.track.downloadUrl);
|
||||
const targetPath = path.join(
|
||||
input.outputDir,
|
||||
`${input.prefix}.${input.track.sourceLanguage}.${safeExt}`,
|
||||
);
|
||||
const response = await fetch(input.track.downloadUrl, {
|
||||
signal: createFetchTimeoutSignal(YOUTUBE_DOWNLOAD_TIMEOUT_MS),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status} while downloading ${input.track.sourceLanguage}`);
|
||||
}
|
||||
@@ -195,6 +229,8 @@ export async function downloadYoutubeSubtitleTracks(input: {
|
||||
mode: YoutubeFlowMode;
|
||||
}): Promise<Map<string, string>> {
|
||||
fs.mkdirSync(input.outputDir, { recursive: true });
|
||||
const hasDuplicateSourceLanguages =
|
||||
new Set(input.tracks.map((track) => track.sourceLanguage)).size !== input.tracks.length;
|
||||
for (const name of fs.readdirSync(input.outputDir)) {
|
||||
if (name.startsWith(`${YOUTUBE_BATCH_PREFIX}.`)) {
|
||||
try {
|
||||
@@ -204,12 +240,12 @@ export async function downloadYoutubeSubtitleTracks(input: {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (input.tracks.every(canDownloadSubtitleFromUrl)) {
|
||||
if (hasDuplicateSourceLanguages || input.tracks.every(canDownloadSubtitleFromUrl)) {
|
||||
const results = new Map<string, string>();
|
||||
for (const track of input.tracks) {
|
||||
const download = await downloadSubtitleFromUrl({
|
||||
outputDir: input.outputDir,
|
||||
prefix: YOUTUBE_BATCH_PREFIX,
|
||||
prefix: track.id.replace(/[^a-z0-9_-]+/gi, '-'),
|
||||
track,
|
||||
});
|
||||
results.set(track.id, download.path);
|
||||
|
||||
@@ -16,11 +16,15 @@ async function withTempDir<T>(fn: (dir: string) => Promise<T>): Promise<T> {
|
||||
|
||||
function makeFakeYtDlpScript(dir: string, payload: unknown): void {
|
||||
const scriptPath = path.join(dir, 'yt-dlp');
|
||||
const stdoutBody = typeof payload === 'string' ? payload : JSON.stringify(payload);
|
||||
const script = `#!/usr/bin/env node
|
||||
process.stdout.write(${JSON.stringify(JSON.stringify(payload))});
|
||||
process.stdout.write(${JSON.stringify(stdoutBody)});
|
||||
`;
|
||||
fs.writeFileSync(scriptPath, script, 'utf8');
|
||||
fs.chmodSync(scriptPath, 0o755);
|
||||
if (process.platform !== 'win32') {
|
||||
fs.chmodSync(scriptPath, 0o755);
|
||||
}
|
||||
fs.writeFileSync(scriptPath + '.cmd', `@echo off\r\nnode "${scriptPath}"\r\n`, 'utf8');
|
||||
}
|
||||
|
||||
async function withFakeYtDlp<T>(payload: unknown, fn: () => Promise<T>): Promise<T> {
|
||||
@@ -78,3 +82,12 @@ test('probeYoutubeTracks keeps preferring srt for manual captions', async () =>
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
test('probeYoutubeTracks reports malformed yt-dlp JSON with context', async () => {
|
||||
await withFakeYtDlp('not-json', async () => {
|
||||
await assert.rejects(
|
||||
async () => await probeYoutubeTracks('https://www.youtube.com/watch?v=abc123'),
|
||||
/Failed to parse yt-dlp output as JSON/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
import { spawn } from 'node:child_process';
|
||||
import type { YoutubeTrackOption } from '../../../types';
|
||||
import {
|
||||
formatYoutubeTrackLabel,
|
||||
normalizeYoutubeLangCode,
|
||||
type YoutubeTrackKind,
|
||||
} from './labels';
|
||||
import { formatYoutubeTrackLabel, normalizeYoutubeLangCode, type YoutubeTrackKind } from './labels';
|
||||
|
||||
export type YoutubeTrackProbeResult = {
|
||||
videoId: string;
|
||||
@@ -102,7 +98,21 @@ export type { YoutubeTrackOption };
|
||||
|
||||
export async function probeYoutubeTracks(targetUrl: string): Promise<YoutubeTrackProbeResult> {
|
||||
const { stdout } = await runCapture('yt-dlp', ['--dump-single-json', '--no-warnings', targetUrl]);
|
||||
const info = JSON.parse(stdout) as YtDlpInfo;
|
||||
const trimmedStdout = stdout.trim();
|
||||
if (!trimmedStdout) {
|
||||
throw new Error('yt-dlp returned empty output while probing subtitle tracks');
|
||||
}
|
||||
let info: YtDlpInfo;
|
||||
try {
|
||||
info = JSON.parse(trimmedStdout) as YtDlpInfo;
|
||||
} catch (error) {
|
||||
const snippet = trimmedStdout.slice(0, 200);
|
||||
throw new Error(
|
||||
`Failed to parse yt-dlp output as JSON: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}${snippet ? `; stdout=${snippet}` : ''}`,
|
||||
);
|
||||
}
|
||||
const tracks = [...toTracks(info.subtitles, 'manual'), ...toTracks(info.automatic_captions, 'auto')];
|
||||
return {
|
||||
videoId: info.id || '',
|
||||
|
||||
Reference in New Issue
Block a user