mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-23 00:11:28 -07:00
fix: align youtube playback with shared overlay startup
This commit is contained in:
25
src/core/services/youtube/generate.ts
Normal file
25
src/core/services/youtube/generate.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import type { YoutubeFlowMode } from '../../../types';
|
||||
import type { YoutubeTrackOption } from './track-probe';
|
||||
import { downloadYoutubeSubtitleTrack, downloadYoutubeSubtitleTracks } from './track-download';
|
||||
|
||||
export function isYoutubeGenerationMode(mode: YoutubeFlowMode): boolean {
|
||||
return mode === 'generate';
|
||||
}
|
||||
|
||||
export async function acquireYoutubeSubtitleTrack(input: {
|
||||
targetUrl: string;
|
||||
outputDir: string;
|
||||
track: YoutubeTrackOption;
|
||||
mode: YoutubeFlowMode;
|
||||
}): Promise<{ path: string }> {
|
||||
return await downloadYoutubeSubtitleTrack(input);
|
||||
}
|
||||
|
||||
export async function acquireYoutubeSubtitleTracks(input: {
|
||||
targetUrl: string;
|
||||
outputDir: string;
|
||||
tracks: YoutubeTrackOption[];
|
||||
mode: YoutubeFlowMode;
|
||||
}): Promise<Map<string, string>> {
|
||||
return await downloadYoutubeSubtitleTracks(input);
|
||||
}
|
||||
40
src/core/services/youtube/labels.ts
Normal file
40
src/core/services/youtube/labels.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
export type YoutubeTrackKind = 'manual' | 'auto';
|
||||
|
||||
export function normalizeYoutubeLangCode(value: string): string {
|
||||
return value.trim().toLowerCase().replace(/_/g, '-').replace(/[^a-z0-9-]+/g, '');
|
||||
}
|
||||
|
||||
export function isJapaneseYoutubeLang(value: string): boolean {
|
||||
const normalized = normalizeYoutubeLangCode(value);
|
||||
return (
|
||||
normalized === 'ja' ||
|
||||
normalized === 'jp' ||
|
||||
normalized === 'jpn' ||
|
||||
normalized === 'japanese' ||
|
||||
normalized.startsWith('ja-') ||
|
||||
normalized.startsWith('jp-')
|
||||
);
|
||||
}
|
||||
|
||||
export function isEnglishYoutubeLang(value: string): boolean {
|
||||
const normalized = normalizeYoutubeLangCode(value);
|
||||
return (
|
||||
normalized === 'en' ||
|
||||
normalized === 'eng' ||
|
||||
normalized === 'english' ||
|
||||
normalized === 'enus' ||
|
||||
normalized === 'en-us' ||
|
||||
normalized.startsWith('en-')
|
||||
);
|
||||
}
|
||||
|
||||
export function formatYoutubeTrackLabel(input: {
|
||||
language: string;
|
||||
kind: YoutubeTrackKind;
|
||||
title?: string;
|
||||
}): string {
|
||||
const language = input.language.trim() || 'unknown';
|
||||
const base = input.title?.trim() || language;
|
||||
return `${base} (${input.kind})`;
|
||||
}
|
||||
|
||||
33
src/core/services/youtube/retime.test.ts
Normal file
33
src/core/services/youtube/retime.test.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { retimeYoutubeSubtitle } from './retime';
|
||||
|
||||
test('retimeYoutubeSubtitle uses the downloaded subtitle path as-is', async () => {
|
||||
if (process.platform === 'win32') {
|
||||
return;
|
||||
}
|
||||
|
||||
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-youtube-retime-'));
|
||||
try {
|
||||
const primaryPath = path.join(root, 'primary.vtt');
|
||||
const referencePath = path.join(root, 'reference.vtt');
|
||||
fs.writeFileSync(primaryPath, 'WEBVTT\n', 'utf8');
|
||||
fs.writeFileSync(referencePath, 'WEBVTT\n', 'utf8');
|
||||
|
||||
const result = await retimeYoutubeSubtitle({
|
||||
primaryPath,
|
||||
secondaryPath: referencePath,
|
||||
});
|
||||
|
||||
assert.equal(result.ok, true);
|
||||
assert.equal(result.strategy, 'none');
|
||||
assert.equal(result.path, primaryPath);
|
||||
assert.equal(result.message, 'Using downloaded subtitle as-is (no automatic retime enabled)');
|
||||
assert.equal(fs.readFileSync(result.path, 'utf8'), 'WEBVTT\n');
|
||||
} finally {
|
||||
fs.rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
11
src/core/services/youtube/retime.ts
Normal file
11
src/core/services/youtube/retime.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
export async function retimeYoutubeSubtitle(input: {
|
||||
primaryPath: string;
|
||||
secondaryPath: string | null;
|
||||
}): Promise<{ ok: boolean; path: string; strategy: 'none'; message: string }> {
|
||||
return {
|
||||
ok: true,
|
||||
path: input.primaryPath,
|
||||
strategy: 'none',
|
||||
message: `Using downloaded subtitle as-is${input.secondaryPath ? ' (no automatic retime enabled)' : ''}`,
|
||||
};
|
||||
}
|
||||
89
src/core/services/youtube/timedtext.ts
Normal file
89
src/core/services/youtube/timedtext.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
interface YoutubeTimedTextRow {
|
||||
startMs: number;
|
||||
durationMs: number;
|
||||
text: string;
|
||||
}
|
||||
|
||||
const YOUTUBE_TIMEDTEXT_EXTENSIONS = new Set(['srv1', 'srv2', 'srv3', 'ytsrv3']);
|
||||
|
||||
function decodeHtmlEntities(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/&#(\d+);/g, (_match, codePoint) => String.fromCodePoint(Number(codePoint)))
|
||||
.replace(/&#x([0-9a-f]+);/gi, (_match, codePoint) =>
|
||||
String.fromCodePoint(Number.parseInt(codePoint, 16)),
|
||||
);
|
||||
}
|
||||
|
||||
function parseAttributeMap(raw: string): Map<string, string> {
|
||||
const attrs = new Map<string, string>();
|
||||
for (const match of raw.matchAll(/([a-zA-Z0-9:_-]+)="([^"]*)"/g)) {
|
||||
attrs.set(match[1]!, match[2]!);
|
||||
}
|
||||
return attrs;
|
||||
}
|
||||
|
||||
function extractYoutubeTimedTextRows(xml: string): YoutubeTimedTextRow[] {
|
||||
const rows: YoutubeTimedTextRow[] = [];
|
||||
|
||||
for (const match of xml.matchAll(/<p\b([^>]*)>([\s\S]*?)<\/p>/g)) {
|
||||
const attrs = parseAttributeMap(match[1] ?? '');
|
||||
const startMs = Number(attrs.get('t'));
|
||||
const durationMs = Number(attrs.get('d'));
|
||||
if (!Number.isFinite(startMs) || !Number.isFinite(durationMs)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const inner = (match[2] ?? '')
|
||||
.replace(/<br\s*\/?>/gi, '\n')
|
||||
.replace(/<[^>]+>/g, '');
|
||||
const text = decodeHtmlEntities(inner).trim();
|
||||
if (!text) {
|
||||
continue;
|
||||
}
|
||||
|
||||
rows.push({ startMs, durationMs, text });
|
||||
}
|
||||
|
||||
return rows;
|
||||
}
|
||||
|
||||
function formatVttTimestamp(ms: number): string {
|
||||
const totalMs = Math.max(0, Math.floor(ms));
|
||||
const hours = Math.floor(totalMs / 3_600_000);
|
||||
const minutes = Math.floor((totalMs % 3_600_000) / 60_000);
|
||||
const seconds = Math.floor((totalMs % 60_000) / 1_000);
|
||||
const millis = totalMs % 1_000;
|
||||
return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}.${String(millis).padStart(3, '0')}`;
|
||||
}
|
||||
|
||||
export function isYoutubeTimedTextExtension(value: string | undefined): boolean {
|
||||
if (!value) {
|
||||
return false;
|
||||
}
|
||||
return YOUTUBE_TIMEDTEXT_EXTENSIONS.has(value.trim().toLowerCase());
|
||||
}
|
||||
|
||||
export function convertYoutubeTimedTextToVtt(xml: string): string {
|
||||
const rows = extractYoutubeTimedTextRows(xml);
|
||||
if (rows.length === 0) {
|
||||
return 'WEBVTT\n';
|
||||
}
|
||||
|
||||
const blocks = rows.map((row, index) => {
|
||||
const nextRow = rows[index + 1];
|
||||
const unclampedEnd = row.startMs + row.durationMs;
|
||||
const clampedEnd =
|
||||
nextRow && unclampedEnd > nextRow.startMs
|
||||
? Math.max(row.startMs, nextRow.startMs - 1)
|
||||
: unclampedEnd;
|
||||
|
||||
return `${formatVttTimestamp(row.startMs)} --> ${formatVttTimestamp(clampedEnd)}\n${row.text}`;
|
||||
});
|
||||
|
||||
return `WEBVTT\n\n${blocks.join('\n\n')}\n`;
|
||||
}
|
||||
472
src/core/services/youtube/track-download.test.ts
Normal file
472
src/core/services/youtube/track-download.test.ts
Normal file
@@ -0,0 +1,472 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { downloadYoutubeSubtitleTrack, downloadYoutubeSubtitleTracks } from './track-download';
|
||||
|
||||
async function withTempDir<T>(fn: (dir: string) => Promise<T>): Promise<T> {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-youtube-track-download-'));
|
||||
try {
|
||||
return await fn(dir);
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
function makeFakeYtDlpScript(dir: string): string {
|
||||
const scriptPath = path.join(dir, 'yt-dlp');
|
||||
const script = `#!/usr/bin/env node
|
||||
const fs = require('node:fs');
|
||||
const path = require('node:path');
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
let outputTemplate = '';
|
||||
const wantsAutoSubs = args.includes('--write-auto-subs');
|
||||
const wantsManualSubs = args.includes('--write-subs');
|
||||
const subLangIndex = args.indexOf('--sub-langs');
|
||||
const subLang = subLangIndex >= 0 ? args[subLangIndex + 1] || '' : '';
|
||||
const subLangs = subLang ? subLang.split(',').filter(Boolean) : [];
|
||||
for (let i = 0; i < args.length; i += 1) {
|
||||
if (args[i] === '-o' && typeof args[i + 1] === 'string') {
|
||||
outputTemplate = args[i + 1];
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (process.env.YTDLP_EXPECT_AUTO_SUBS === '1' && !wantsAutoSubs) {
|
||||
process.exit(2);
|
||||
}
|
||||
if (process.env.YTDLP_EXPECT_MANUAL_SUBS === '1' && !wantsManualSubs) {
|
||||
process.exit(3);
|
||||
}
|
||||
if (process.env.YTDLP_EXPECT_SUB_LANG && subLang !== process.env.YTDLP_EXPECT_SUB_LANG) {
|
||||
process.exit(4);
|
||||
}
|
||||
|
||||
const prefix = outputTemplate.replace(/\.%\([^)]+\)s$/, '');
|
||||
if (!prefix) {
|
||||
process.exit(1);
|
||||
}
|
||||
fs.mkdirSync(path.dirname(prefix), { recursive: true });
|
||||
|
||||
if (process.env.YTDLP_FAKE_MODE === 'multi') {
|
||||
for (const lang of subLangs) {
|
||||
fs.writeFileSync(\`\${prefix}.\${lang}.vtt\`, 'WEBVTT\\n');
|
||||
}
|
||||
} else if (process.env.YTDLP_FAKE_MODE === 'rolling-auto') {
|
||||
fs.writeFileSync(
|
||||
\`\${prefix}.vtt\`,
|
||||
[
|
||||
'WEBVTT',
|
||||
'',
|
||||
'00:00:01.000 --> 00:00:02.000',
|
||||
'今日は',
|
||||
'',
|
||||
'00:00:02.000 --> 00:00:03.000',
|
||||
'今日はいい天気ですね',
|
||||
'',
|
||||
'00:00:03.000 --> 00:00:04.000',
|
||||
'今日はいい天気ですね本当に',
|
||||
'',
|
||||
].join('\\n'),
|
||||
);
|
||||
} else if (process.env.YTDLP_FAKE_MODE === 'multi-primary-only-fail') {
|
||||
const primaryLang = subLangs[0];
|
||||
if (primaryLang) {
|
||||
fs.writeFileSync(\`\${prefix}.\${primaryLang}.vtt\`, 'WEBVTT\\n');
|
||||
}
|
||||
process.stderr.write("ERROR: Unable to download video subtitles for 'en': HTTP Error 429: Too Many Requests\\n");
|
||||
process.exit(1);
|
||||
} else if (process.env.YTDLP_FAKE_MODE === 'both') {
|
||||
fs.writeFileSync(\`\${prefix}.vtt\`, 'WEBVTT\\n');
|
||||
fs.writeFileSync(\`\${prefix}.orig.webp\`, 'webp');
|
||||
} else if (process.env.YTDLP_FAKE_MODE === 'webp-only') {
|
||||
fs.writeFileSync(\`\${prefix}.orig.webp\`, 'webp');
|
||||
} else {
|
||||
fs.writeFileSync(\`\${prefix}.vtt\`, 'WEBVTT\\n');
|
||||
}
|
||||
process.exit(0);
|
||||
`;
|
||||
fs.writeFileSync(scriptPath, script, 'utf8');
|
||||
fs.chmodSync(scriptPath, 0o755);
|
||||
return scriptPath;
|
||||
}
|
||||
|
||||
async function withFakeYtDlp<T>(
|
||||
mode: 'both' | 'webp-only' | 'multi' | 'multi-primary-only-fail' | 'rolling-auto',
|
||||
fn: (dir: string, binDir: string) => Promise<T>,
|
||||
): Promise<T> {
|
||||
return await withTempDir(async (root) => {
|
||||
const binDir = path.join(root, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
makeFakeYtDlpScript(binDir);
|
||||
|
||||
const originalPath = process.env.PATH ?? '';
|
||||
process.env.PATH = `${binDir}${path.delimiter}${originalPath}`;
|
||||
process.env.YTDLP_FAKE_MODE = mode;
|
||||
try {
|
||||
return await fn(root, binDir);
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
delete process.env.YTDLP_FAKE_MODE;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function withFakeYtDlpExpectations<T>(
|
||||
expectations: Partial<Record<'YTDLP_EXPECT_AUTO_SUBS' | 'YTDLP_EXPECT_MANUAL_SUBS' | 'YTDLP_EXPECT_SUB_LANG', string>>,
|
||||
fn: () => Promise<T>,
|
||||
): Promise<T> {
|
||||
const previous = {
|
||||
YTDLP_EXPECT_AUTO_SUBS: process.env.YTDLP_EXPECT_AUTO_SUBS,
|
||||
YTDLP_EXPECT_MANUAL_SUBS: process.env.YTDLP_EXPECT_MANUAL_SUBS,
|
||||
YTDLP_EXPECT_SUB_LANG: process.env.YTDLP_EXPECT_SUB_LANG,
|
||||
};
|
||||
Object.assign(process.env, expectations);
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
for (const [key, value] of Object.entries(previous)) {
|
||||
if (value === undefined) {
|
||||
delete process.env[key];
|
||||
} else {
|
||||
process.env[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function withStubFetch<T>(
|
||||
handler: (url: string) => Promise<Response> | Response,
|
||||
fn: () => Promise<T>,
|
||||
): Promise<T> {
|
||||
const originalFetch = globalThis.fetch;
|
||||
globalThis.fetch = (async (input: string | URL | Request) => {
|
||||
const url =
|
||||
typeof input === 'string'
|
||||
? input
|
||||
: input instanceof URL
|
||||
? input.toString()
|
||||
: input.url;
|
||||
return await handler(url);
|
||||
}) as typeof fetch;
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
}
|
||||
|
||||
test('downloadYoutubeSubtitleTrack prefers subtitle files over later webp artifacts', async () => {
|
||||
if (process.platform === 'win32') {
|
||||
return;
|
||||
}
|
||||
|
||||
await withFakeYtDlp('both', async (root) => {
|
||||
const result = await downloadYoutubeSubtitleTrack({
|
||||
targetUrl: 'https://www.youtube.com/watch?v=abc123',
|
||||
outputDir: path.join(root, 'out'),
|
||||
track: {
|
||||
id: 'auto:ja-orig',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja-orig',
|
||||
kind: 'auto',
|
||||
label: 'Japanese (auto)',
|
||||
},
|
||||
mode: 'download',
|
||||
});
|
||||
|
||||
assert.equal(path.extname(result.path), '.vtt');
|
||||
assert.match(path.basename(result.path), /^auto-ja-orig\./);
|
||||
});
|
||||
});
|
||||
|
||||
test('downloadYoutubeSubtitleTrack ignores stale subtitle files from prior runs', async () => {
|
||||
if (process.platform === 'win32') {
|
||||
return;
|
||||
}
|
||||
|
||||
await withFakeYtDlp('webp-only', async (root) => {
|
||||
const outputDir = path.join(root, 'out');
|
||||
fs.mkdirSync(outputDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(outputDir, 'auto-ja.vtt'), 'stale subtitle');
|
||||
|
||||
await assert.rejects(
|
||||
async () =>
|
||||
await downloadYoutubeSubtitleTrack({
|
||||
targetUrl: 'https://www.youtube.com/watch?v=abc123',
|
||||
outputDir,
|
||||
track: {
|
||||
id: 'auto:ja',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja',
|
||||
kind: 'auto',
|
||||
label: 'Japanese (auto)',
|
||||
},
|
||||
mode: 'download',
|
||||
}),
|
||||
/No subtitle file was downloaded/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
test('downloadYoutubeSubtitleTrack uses auto subtitle flags and raw source language for auto tracks', async () => {
|
||||
if (process.platform === 'win32') {
|
||||
return;
|
||||
}
|
||||
|
||||
await withFakeYtDlp('both', async (root) => {
|
||||
await withFakeYtDlpExpectations(
|
||||
{
|
||||
YTDLP_EXPECT_AUTO_SUBS: '1',
|
||||
YTDLP_EXPECT_SUB_LANG: 'ja-orig',
|
||||
},
|
||||
async () => {
|
||||
const result = await downloadYoutubeSubtitleTrack({
|
||||
targetUrl: 'https://www.youtube.com/watch?v=abc123',
|
||||
outputDir: path.join(root, 'out'),
|
||||
track: {
|
||||
id: 'auto:ja-orig',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja-orig',
|
||||
kind: 'auto',
|
||||
label: 'Japanese (auto)',
|
||||
},
|
||||
mode: 'download',
|
||||
});
|
||||
|
||||
assert.equal(path.extname(result.path), '.vtt');
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
test('downloadYoutubeSubtitleTrack keeps manual subtitle flag for manual tracks', async () => {
|
||||
if (process.platform === 'win32') {
|
||||
return;
|
||||
}
|
||||
|
||||
await withFakeYtDlp('both', async (root) => {
|
||||
await withFakeYtDlpExpectations(
|
||||
{
|
||||
YTDLP_EXPECT_MANUAL_SUBS: '1',
|
||||
YTDLP_EXPECT_SUB_LANG: 'ja',
|
||||
},
|
||||
async () => {
|
||||
const result = await downloadYoutubeSubtitleTrack({
|
||||
targetUrl: 'https://www.youtube.com/watch?v=abc123',
|
||||
outputDir: path.join(root, 'out'),
|
||||
track: {
|
||||
id: 'manual:ja',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja',
|
||||
kind: 'manual',
|
||||
label: 'Japanese (manual)',
|
||||
},
|
||||
mode: 'download',
|
||||
});
|
||||
|
||||
assert.equal(path.extname(result.path), '.vtt');
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
test('downloadYoutubeSubtitleTrack prefers direct download URL when available', async () => {
|
||||
await withTempDir(async (root) => {
|
||||
await withStubFetch(
|
||||
async (url) => {
|
||||
assert.equal(url, 'https://example.com/subs/ja.vtt');
|
||||
return new Response('WEBVTT\n', { status: 200 });
|
||||
},
|
||||
async () => {
|
||||
const result = await downloadYoutubeSubtitleTrack({
|
||||
targetUrl: 'https://www.youtube.com/watch?v=abc123',
|
||||
outputDir: path.join(root, 'out'),
|
||||
track: {
|
||||
id: 'auto:ja-orig',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja-orig',
|
||||
kind: 'auto',
|
||||
label: 'Japanese (auto)',
|
||||
downloadUrl: 'https://example.com/subs/ja.vtt',
|
||||
fileExtension: 'vtt',
|
||||
},
|
||||
mode: 'download',
|
||||
});
|
||||
|
||||
assert.equal(path.basename(result.path), 'auto-ja-orig.ja-orig.vtt');
|
||||
assert.equal(fs.readFileSync(result.path, 'utf8'), 'WEBVTT\n');
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
test('downloadYoutubeSubtitleTrack converts srv3 auto subtitles into regular vtt', async () => {
|
||||
await withTempDir(async (root) => {
|
||||
await withStubFetch(
|
||||
async (url) => {
|
||||
assert.equal(url, 'https://example.com/subs/ja.srv3');
|
||||
return new Response(
|
||||
[
|
||||
'<timedtext><body>',
|
||||
'<p t="1000" d="2500">今日は</p>',
|
||||
'<p t="2000" d="2500">今日はいい天気ですね</p>',
|
||||
'<p t="3500" d="2500">今日はいい天気ですね本当に</p>',
|
||||
'</body></timedtext>',
|
||||
].join(''),
|
||||
{ status: 200 },
|
||||
);
|
||||
},
|
||||
async () => {
|
||||
const result = await downloadYoutubeSubtitleTrack({
|
||||
targetUrl: 'https://www.youtube.com/watch?v=abc123',
|
||||
outputDir: path.join(root, 'out'),
|
||||
track: {
|
||||
id: 'auto:ja-orig',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja-orig',
|
||||
kind: 'auto',
|
||||
label: 'Japanese (auto)',
|
||||
downloadUrl: 'https://example.com/subs/ja.srv3',
|
||||
fileExtension: 'srv3',
|
||||
},
|
||||
mode: 'download',
|
||||
});
|
||||
|
||||
assert.equal(path.basename(result.path), 'auto-ja-orig.ja-orig.vtt');
|
||||
assert.equal(
|
||||
fs.readFileSync(result.path, 'utf8'),
|
||||
[
|
||||
'WEBVTT',
|
||||
'',
|
||||
'00:00:01.000 --> 00:00:01.999',
|
||||
'今日は',
|
||||
'',
|
||||
'00:00:02.000 --> 00:00:03.499',
|
||||
'いい天気ですね',
|
||||
'',
|
||||
'00:00:03.500 --> 00:00:06.000',
|
||||
'本当に',
|
||||
'',
|
||||
].join('\n'),
|
||||
);
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
test('downloadYoutubeSubtitleTracks downloads primary and secondary in one invocation', async () => {
|
||||
if (process.platform === 'win32') {
|
||||
return;
|
||||
}
|
||||
|
||||
await withFakeYtDlp('multi', async (root) => {
|
||||
const outputDir = path.join(root, 'out');
|
||||
const result = await downloadYoutubeSubtitleTracks({
|
||||
targetUrl: 'https://www.youtube.com/watch?v=abc123',
|
||||
outputDir,
|
||||
tracks: [
|
||||
{
|
||||
id: 'auto:ja-orig',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja-orig',
|
||||
kind: 'auto',
|
||||
label: 'Japanese (auto)',
|
||||
},
|
||||
{
|
||||
id: 'auto:en',
|
||||
language: 'en',
|
||||
sourceLanguage: 'en',
|
||||
kind: 'auto',
|
||||
label: 'English (auto)',
|
||||
},
|
||||
],
|
||||
mode: 'download',
|
||||
});
|
||||
|
||||
assert.match(path.basename(result.get('auto:ja-orig') ?? ''), /\.ja-orig\.vtt$/);
|
||||
assert.match(path.basename(result.get('auto:en') ?? ''), /\.en\.vtt$/);
|
||||
});
|
||||
});
|
||||
|
||||
test('downloadYoutubeSubtitleTracks preserves successfully downloaded primary file on partial failure', async () => {
|
||||
if (process.platform === 'win32') {
|
||||
return;
|
||||
}
|
||||
|
||||
await withFakeYtDlp('multi-primary-only-fail', async (root) => {
|
||||
const outputDir = path.join(root, 'out');
|
||||
const result = await downloadYoutubeSubtitleTracks({
|
||||
targetUrl: 'https://www.youtube.com/watch?v=abc123',
|
||||
outputDir,
|
||||
tracks: [
|
||||
{
|
||||
id: 'auto:ja-orig',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja-orig',
|
||||
kind: 'auto',
|
||||
label: 'Japanese (auto)',
|
||||
},
|
||||
{
|
||||
id: 'auto:en',
|
||||
language: 'en',
|
||||
sourceLanguage: 'en',
|
||||
kind: 'auto',
|
||||
label: 'English (auto)',
|
||||
},
|
||||
],
|
||||
mode: 'download',
|
||||
});
|
||||
|
||||
assert.match(path.basename(result.get('auto:ja-orig') ?? ''), /\.ja-orig\.vtt$/);
|
||||
assert.equal(result.has('auto:en'), false);
|
||||
});
|
||||
});
|
||||
|
||||
test('downloadYoutubeSubtitleTracks prefers direct download URLs when available', async () => {
|
||||
await withTempDir(async (root) => {
|
||||
const seen: string[] = [];
|
||||
await withStubFetch(
|
||||
async (url) => {
|
||||
seen.push(url);
|
||||
return new Response(`WEBVTT\n${url}\n`, { status: 200 });
|
||||
},
|
||||
async () => {
|
||||
const result = await downloadYoutubeSubtitleTracks({
|
||||
targetUrl: 'https://www.youtube.com/watch?v=abc123',
|
||||
outputDir: path.join(root, 'out'),
|
||||
tracks: [
|
||||
{
|
||||
id: 'auto:ja-orig',
|
||||
language: 'ja',
|
||||
sourceLanguage: 'ja-orig',
|
||||
kind: 'auto',
|
||||
label: 'Japanese (auto)',
|
||||
downloadUrl: 'https://example.com/subs/ja.vtt',
|
||||
fileExtension: 'vtt',
|
||||
},
|
||||
{
|
||||
id: 'auto:en',
|
||||
language: 'en',
|
||||
sourceLanguage: 'en',
|
||||
kind: 'auto',
|
||||
label: 'English (auto)',
|
||||
downloadUrl: 'https://example.com/subs/en.vtt',
|
||||
fileExtension: 'vtt',
|
||||
},
|
||||
],
|
||||
mode: 'download',
|
||||
});
|
||||
|
||||
assert.deepEqual(seen, [
|
||||
'https://example.com/subs/ja.vtt',
|
||||
'https://example.com/subs/en.vtt',
|
||||
]);
|
||||
assert.match(path.basename(result.get('auto:ja-orig') ?? ''), /\.ja-orig\.vtt$/);
|
||||
assert.match(path.basename(result.get('auto:en') ?? ''), /\.en\.vtt$/);
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
256
src/core/services/youtube/track-download.ts
Normal file
256
src/core/services/youtube/track-download.ts
Normal file
@@ -0,0 +1,256 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { spawn } from 'node:child_process';
|
||||
import type { YoutubeFlowMode } from '../../../types';
|
||||
import type { YoutubeTrackOption } from './track-probe';
|
||||
import { convertYoutubeTimedTextToVtt, isYoutubeTimedTextExtension } from './timedtext';
|
||||
|
||||
const YOUTUBE_SUBTITLE_EXTENSIONS = new Set(['.srt', '.vtt', '.ass']);
|
||||
const YOUTUBE_BATCH_PREFIX = 'youtube-batch';
|
||||
|
||||
function runCapture(command: string, args: string[]): Promise<{ stdout: string; stderr: string }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
proc.stdout.setEncoding('utf8');
|
||||
proc.stderr.setEncoding('utf8');
|
||||
proc.stdout.on('data', (chunk) => {
|
||||
stdout += String(chunk);
|
||||
});
|
||||
proc.stderr.on('data', (chunk) => {
|
||||
stderr += String(chunk);
|
||||
});
|
||||
proc.once('error', reject);
|
||||
proc.once('close', (code) => {
|
||||
if (code === 0) {
|
||||
resolve({ stdout, stderr });
|
||||
return;
|
||||
}
|
||||
reject(new Error(stderr.trim() || `yt-dlp exited with status ${code ?? 'unknown'}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function runCaptureDetailed(
|
||||
command: string,
|
||||
args: string[],
|
||||
): Promise<{ stdout: string; stderr: string; code: number }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
proc.stdout.setEncoding('utf8');
|
||||
proc.stderr.setEncoding('utf8');
|
||||
proc.stdout.on('data', (chunk) => {
|
||||
stdout += String(chunk);
|
||||
});
|
||||
proc.stderr.on('data', (chunk) => {
|
||||
stderr += String(chunk);
|
||||
});
|
||||
proc.once('error', reject);
|
||||
proc.once('close', (code) => {
|
||||
resolve({ stdout, stderr, code: code ?? 1 });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function pickLatestSubtitleFile(dir: string, prefix: string): string | null {
|
||||
const entries = fs.readdirSync(dir).map((name) => path.join(dir, name));
|
||||
const candidates = entries.filter((candidate) => {
|
||||
const basename = path.basename(candidate);
|
||||
const ext = path.extname(basename).toLowerCase();
|
||||
return basename.startsWith(prefix) && YOUTUBE_SUBTITLE_EXTENSIONS.has(ext);
|
||||
});
|
||||
candidates.sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs);
|
||||
return candidates[0] ?? null;
|
||||
}
|
||||
|
||||
function pickLatestSubtitleFileForLanguage(
|
||||
dir: string,
|
||||
prefix: string,
|
||||
sourceLanguage: string,
|
||||
): string | null {
|
||||
const entries = fs.readdirSync(dir).map((name) => path.join(dir, name));
|
||||
const candidates = entries.filter((candidate) => {
|
||||
const basename = path.basename(candidate);
|
||||
const ext = path.extname(basename).toLowerCase();
|
||||
return (
|
||||
basename.startsWith(`${prefix}.`) &&
|
||||
basename.includes(`.${sourceLanguage}.`) &&
|
||||
YOUTUBE_SUBTITLE_EXTENSIONS.has(ext)
|
||||
);
|
||||
});
|
||||
candidates.sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs);
|
||||
return candidates[0] ?? null;
|
||||
}
|
||||
|
||||
function buildDownloadArgs(input: {
|
||||
targetUrl: string;
|
||||
outputTemplate: string;
|
||||
sourceLanguages: string[];
|
||||
includeAutoSubs: boolean;
|
||||
includeManualSubs: boolean;
|
||||
}): string[] {
|
||||
const args = ['--skip-download', '--no-warnings'];
|
||||
if (input.includeAutoSubs) {
|
||||
args.push('--write-auto-subs');
|
||||
}
|
||||
if (input.includeManualSubs) {
|
||||
args.push('--write-subs');
|
||||
}
|
||||
args.push(
|
||||
'--sub-format',
|
||||
'srt/vtt/best',
|
||||
'--sub-langs',
|
||||
input.sourceLanguages.join(','),
|
||||
'-o',
|
||||
input.outputTemplate,
|
||||
input.targetUrl,
|
||||
);
|
||||
return args;
|
||||
}
|
||||
|
||||
async function downloadSubtitleFromUrl(input: {
|
||||
outputDir: string;
|
||||
prefix: string;
|
||||
track: YoutubeTrackOption;
|
||||
}): Promise<{ path: string }> {
|
||||
if (!input.track.downloadUrl) {
|
||||
throw new Error(`No direct subtitle URL available for ${input.track.sourceLanguage}`);
|
||||
}
|
||||
const ext = (input.track.fileExtension?.trim().toLowerCase() || 'vtt').replace(/[^a-z0-9]+/g, '');
|
||||
const safeExt = isYoutubeTimedTextExtension(ext)
|
||||
? 'vtt'
|
||||
: YOUTUBE_SUBTITLE_EXTENSIONS.has(`.${ext}`)
|
||||
? ext
|
||||
: 'vtt';
|
||||
const targetPath = path.join(input.outputDir, `${input.prefix}.${input.track.sourceLanguage}.${safeExt}`);
|
||||
const response = await fetch(input.track.downloadUrl);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status} while downloading ${input.track.sourceLanguage}`);
|
||||
}
|
||||
const body = await response.text();
|
||||
const normalizedBody = isYoutubeTimedTextExtension(ext) ? convertYoutubeTimedTextToVtt(body) : body;
|
||||
fs.writeFileSync(targetPath, normalizedBody, 'utf8');
|
||||
return { path: targetPath };
|
||||
}
|
||||
|
||||
function canDownloadSubtitleFromUrl(track: YoutubeTrackOption): boolean {
|
||||
if (!track.downloadUrl) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const ext = (track.fileExtension?.trim().toLowerCase() || 'vtt').replace(/[^a-z0-9]+/g, '');
|
||||
return isYoutubeTimedTextExtension(ext) || YOUTUBE_SUBTITLE_EXTENSIONS.has(`.${ext}`);
|
||||
}
|
||||
|
||||
export async function downloadYoutubeSubtitleTrack(input: {
|
||||
targetUrl: string;
|
||||
outputDir: string;
|
||||
track: YoutubeTrackOption;
|
||||
mode: YoutubeFlowMode;
|
||||
}): Promise<{ path: string }> {
|
||||
fs.mkdirSync(input.outputDir, { recursive: true });
|
||||
const prefix = input.track.id.replace(/[^a-z0-9_-]+/gi, '-');
|
||||
for (const name of fs.readdirSync(input.outputDir)) {
|
||||
if (name.startsWith(prefix)) {
|
||||
try {
|
||||
fs.rmSync(path.join(input.outputDir, name), { force: true });
|
||||
} catch {
|
||||
// ignore stale files
|
||||
}
|
||||
}
|
||||
}
|
||||
if (canDownloadSubtitleFromUrl(input.track)) {
|
||||
return await downloadSubtitleFromUrl({
|
||||
outputDir: input.outputDir,
|
||||
prefix,
|
||||
track: input.track,
|
||||
});
|
||||
}
|
||||
const outputTemplate = path.join(input.outputDir, `${prefix}.%(ext)s`);
|
||||
const args = [
|
||||
...buildDownloadArgs({
|
||||
targetUrl: input.targetUrl,
|
||||
outputTemplate,
|
||||
sourceLanguages: [input.track.sourceLanguage],
|
||||
includeAutoSubs: input.mode === 'generate' || input.track.kind === 'auto',
|
||||
includeManualSubs: input.track.kind === 'manual',
|
||||
}),
|
||||
];
|
||||
|
||||
await runCapture('yt-dlp', args);
|
||||
const subtitlePath = pickLatestSubtitleFile(input.outputDir, prefix);
|
||||
if (!subtitlePath) {
|
||||
throw new Error(`No subtitle file was downloaded for ${input.track.sourceLanguage}`);
|
||||
}
|
||||
return { path: subtitlePath };
|
||||
}
|
||||
|
||||
export async function downloadYoutubeSubtitleTracks(input: {
|
||||
targetUrl: string;
|
||||
outputDir: string;
|
||||
tracks: YoutubeTrackOption[];
|
||||
mode: YoutubeFlowMode;
|
||||
}): Promise<Map<string, string>> {
|
||||
fs.mkdirSync(input.outputDir, { recursive: true });
|
||||
for (const name of fs.readdirSync(input.outputDir)) {
|
||||
if (name.startsWith(`${YOUTUBE_BATCH_PREFIX}.`)) {
|
||||
try {
|
||||
fs.rmSync(path.join(input.outputDir, name), { force: true });
|
||||
} catch {
|
||||
// ignore stale files
|
||||
}
|
||||
}
|
||||
}
|
||||
if (input.tracks.every(canDownloadSubtitleFromUrl)) {
|
||||
const results = new Map<string, string>();
|
||||
for (const track of input.tracks) {
|
||||
const download = await downloadSubtitleFromUrl({
|
||||
outputDir: input.outputDir,
|
||||
prefix: YOUTUBE_BATCH_PREFIX,
|
||||
track,
|
||||
});
|
||||
results.set(track.id, download.path);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
const outputTemplate = path.join(input.outputDir, `${YOUTUBE_BATCH_PREFIX}.%(ext)s`);
|
||||
const includeAutoSubs =
|
||||
input.mode === 'generate' || input.tracks.some((track) => track.kind === 'auto');
|
||||
const includeManualSubs = input.tracks.some((track) => track.kind === 'manual');
|
||||
|
||||
const result = await runCaptureDetailed(
|
||||
'yt-dlp',
|
||||
buildDownloadArgs({
|
||||
targetUrl: input.targetUrl,
|
||||
outputTemplate,
|
||||
sourceLanguages: input.tracks.map((track) => track.sourceLanguage),
|
||||
includeAutoSubs,
|
||||
includeManualSubs,
|
||||
}),
|
||||
);
|
||||
|
||||
const results = new Map<string, string>();
|
||||
for (const track of input.tracks) {
|
||||
const subtitlePath = pickLatestSubtitleFileForLanguage(
|
||||
input.outputDir,
|
||||
YOUTUBE_BATCH_PREFIX,
|
||||
track.sourceLanguage,
|
||||
);
|
||||
if (subtitlePath) {
|
||||
results.set(track.id, subtitlePath);
|
||||
}
|
||||
}
|
||||
if (results.size > 0) {
|
||||
return results;
|
||||
}
|
||||
if (result.code !== 0) {
|
||||
throw new Error(result.stderr.trim() || `yt-dlp exited with status ${result.code}`);
|
||||
}
|
||||
throw new Error(
|
||||
`No subtitle file was downloaded for ${input.tracks.map((track) => track.sourceLanguage).join(',')}`,
|
||||
);
|
||||
}
|
||||
80
src/core/services/youtube/track-probe.test.ts
Normal file
80
src/core/services/youtube/track-probe.test.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { probeYoutubeTracks } from './track-probe';
|
||||
|
||||
async function withTempDir<T>(fn: (dir: string) => Promise<T>): Promise<T> {
|
||||
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-youtube-track-probe-'));
|
||||
try {
|
||||
return await fn(dir);
|
||||
} finally {
|
||||
fs.rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
function makeFakeYtDlpScript(dir: string, payload: unknown): void {
|
||||
const scriptPath = path.join(dir, 'yt-dlp');
|
||||
const script = `#!/usr/bin/env node
|
||||
process.stdout.write(${JSON.stringify(JSON.stringify(payload))});
|
||||
`;
|
||||
fs.writeFileSync(scriptPath, script, 'utf8');
|
||||
fs.chmodSync(scriptPath, 0o755);
|
||||
}
|
||||
|
||||
async function withFakeYtDlp<T>(payload: unknown, fn: () => Promise<T>): Promise<T> {
|
||||
return await withTempDir(async (root) => {
|
||||
const binDir = path.join(root, 'bin');
|
||||
fs.mkdirSync(binDir, { recursive: true });
|
||||
makeFakeYtDlpScript(binDir, payload);
|
||||
const originalPath = process.env.PATH ?? '';
|
||||
process.env.PATH = `${binDir}${path.delimiter}${originalPath}`;
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
test('probeYoutubeTracks prefers srv3 over vtt for automatic captions', async () => {
|
||||
await withFakeYtDlp(
|
||||
{
|
||||
id: 'abc123',
|
||||
title: 'Example',
|
||||
automatic_captions: {
|
||||
'ja-orig': [
|
||||
{ ext: 'vtt', url: 'https://example.com/ja.vtt', name: 'Japanese auto' },
|
||||
{ ext: 'srv3', url: 'https://example.com/ja.srv3', name: 'Japanese auto' },
|
||||
],
|
||||
},
|
||||
},
|
||||
async () => {
|
||||
const result = await probeYoutubeTracks('https://www.youtube.com/watch?v=abc123');
|
||||
assert.equal(result.videoId, 'abc123');
|
||||
assert.equal(result.tracks[0]?.downloadUrl, 'https://example.com/ja.srv3');
|
||||
assert.equal(result.tracks[0]?.fileExtension, 'srv3');
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
test('probeYoutubeTracks keeps preferring srt for manual captions', async () => {
|
||||
await withFakeYtDlp(
|
||||
{
|
||||
id: 'abc123',
|
||||
title: 'Example',
|
||||
subtitles: {
|
||||
ja: [
|
||||
{ ext: 'srv3', url: 'https://example.com/ja.srv3', name: 'Japanese manual' },
|
||||
{ ext: 'srt', url: 'https://example.com/ja.srt', name: 'Japanese manual' },
|
||||
],
|
||||
},
|
||||
},
|
||||
async () => {
|
||||
const result = await probeYoutubeTracks('https://www.youtube.com/watch?v=abc123');
|
||||
assert.equal(result.tracks[0]?.downloadUrl, 'https://example.com/ja.srt');
|
||||
assert.equal(result.tracks[0]?.fileExtension, 'srt');
|
||||
},
|
||||
);
|
||||
});
|
||||
112
src/core/services/youtube/track-probe.ts
Normal file
112
src/core/services/youtube/track-probe.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
import { spawn } from 'node:child_process';
|
||||
import type { YoutubeTrackOption } from '../../../types';
|
||||
import {
|
||||
formatYoutubeTrackLabel,
|
||||
normalizeYoutubeLangCode,
|
||||
type YoutubeTrackKind,
|
||||
} from './labels';
|
||||
|
||||
export type YoutubeTrackProbeResult = {
|
||||
videoId: string;
|
||||
title: string;
|
||||
tracks: YoutubeTrackOption[];
|
||||
};
|
||||
|
||||
type YtDlpSubtitleEntry = Array<{ ext?: string; name?: string; url?: string }>;
|
||||
|
||||
type YtDlpInfo = {
|
||||
id?: string;
|
||||
title?: string;
|
||||
subtitles?: Record<string, YtDlpSubtitleEntry>;
|
||||
automatic_captions?: Record<string, YtDlpSubtitleEntry>;
|
||||
};
|
||||
|
||||
function runCapture(command: string, args: string[]): Promise<{ stdout: string; stderr: string }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(command, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
proc.stdout.setEncoding('utf8');
|
||||
proc.stderr.setEncoding('utf8');
|
||||
proc.stdout.on('data', (chunk) => {
|
||||
stdout += String(chunk);
|
||||
});
|
||||
proc.stderr.on('data', (chunk) => {
|
||||
stderr += String(chunk);
|
||||
});
|
||||
proc.once('error', reject);
|
||||
proc.once('close', (code) => {
|
||||
if (code === 0) {
|
||||
resolve({ stdout, stderr });
|
||||
return;
|
||||
}
|
||||
reject(new Error(stderr.trim() || `yt-dlp exited with status ${code ?? 'unknown'}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function choosePreferredFormat(
|
||||
formats: YtDlpSubtitleEntry,
|
||||
kind: YoutubeTrackKind,
|
||||
): { ext: string; url: string; title?: string } | null {
|
||||
const preferredOrder =
|
||||
kind === 'auto'
|
||||
? ['srv3', 'srv2', 'srv1', 'vtt', 'srt', 'ttml', 'json3']
|
||||
: ['srt', 'vtt', 'srv3', 'srv2', 'srv1', 'ttml', 'json3'];
|
||||
for (const ext of preferredOrder) {
|
||||
const match = formats.find(
|
||||
(format) => typeof format.url === 'string' && format.url && format.ext === ext,
|
||||
);
|
||||
if (match?.url) {
|
||||
return { ext, url: match.url, title: match.name?.trim() || undefined };
|
||||
}
|
||||
}
|
||||
|
||||
const fallback = formats.find((format) => typeof format.url === 'string' && format.url);
|
||||
if (!fallback?.url) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
ext: fallback.ext?.trim() || 'vtt',
|
||||
url: fallback.url,
|
||||
title: fallback.name?.trim() || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
function toTracks(entries: Record<string, YtDlpSubtitleEntry> | undefined, kind: YoutubeTrackKind) {
|
||||
const tracks: YoutubeTrackOption[] = [];
|
||||
if (!entries) return tracks;
|
||||
for (const [language, formats] of Object.entries(entries)) {
|
||||
if (!Array.isArray(formats) || formats.length === 0) continue;
|
||||
const preferredFormat = choosePreferredFormat(formats, kind);
|
||||
if (!preferredFormat) continue;
|
||||
const sourceLanguage = language.trim() || language;
|
||||
const normalizedLanguage = normalizeYoutubeLangCode(sourceLanguage) || sourceLanguage;
|
||||
const title = preferredFormat.title;
|
||||
tracks.push({
|
||||
id: `${kind}:${sourceLanguage}`,
|
||||
language: normalizedLanguage,
|
||||
sourceLanguage,
|
||||
kind,
|
||||
title,
|
||||
label: formatYoutubeTrackLabel({ language: normalizedLanguage, kind, title }),
|
||||
downloadUrl: preferredFormat.url,
|
||||
fileExtension: preferredFormat.ext,
|
||||
});
|
||||
}
|
||||
return tracks;
|
||||
}
|
||||
|
||||
export type { YoutubeTrackOption };
|
||||
|
||||
export async function probeYoutubeTracks(targetUrl: string): Promise<YoutubeTrackProbeResult> {
|
||||
const { stdout } = await runCapture('yt-dlp', ['--dump-single-json', '--no-warnings', targetUrl]);
|
||||
const info = JSON.parse(stdout) as YtDlpInfo;
|
||||
const tracks = [...toTracks(info.subtitles, 'manual'), ...toTracks(info.automatic_captions, 'auto')];
|
||||
return {
|
||||
videoId: info.id || '',
|
||||
title: info.title || '',
|
||||
tracks,
|
||||
};
|
||||
}
|
||||
63
src/core/services/youtube/track-selection.ts
Normal file
63
src/core/services/youtube/track-selection.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import { isEnglishYoutubeLang, isJapaneseYoutubeLang } from './labels';
|
||||
import type { YoutubeTrackOption } from './track-probe';
|
||||
|
||||
function pickTrack(
|
||||
tracks: YoutubeTrackOption[],
|
||||
matcher: (value: string) => boolean,
|
||||
excludeId?: string,
|
||||
): YoutubeTrackOption | null {
|
||||
const matching = tracks.filter((track) => matcher(track.language) && track.id !== excludeId);
|
||||
return matching[0] ?? null;
|
||||
}
|
||||
|
||||
export function chooseDefaultYoutubeTrackIds(
|
||||
tracks: YoutubeTrackOption[],
|
||||
): { primaryTrackId: string | null; secondaryTrackId: string | null } {
|
||||
const primary =
|
||||
pickTrack(
|
||||
tracks.filter((track) => track.kind === 'manual'),
|
||||
isJapaneseYoutubeLang,
|
||||
) ||
|
||||
pickTrack(
|
||||
tracks.filter((track) => track.kind === 'auto'),
|
||||
isJapaneseYoutubeLang,
|
||||
) ||
|
||||
tracks.find((track) => track.kind === 'manual') ||
|
||||
tracks[0] ||
|
||||
null;
|
||||
|
||||
const secondary =
|
||||
pickTrack(
|
||||
tracks.filter((track) => track.kind === 'manual'),
|
||||
isEnglishYoutubeLang,
|
||||
primary?.id ?? undefined,
|
||||
) ||
|
||||
pickTrack(
|
||||
tracks.filter((track) => track.kind === 'auto'),
|
||||
isEnglishYoutubeLang,
|
||||
primary?.id ?? undefined,
|
||||
) ||
|
||||
null;
|
||||
|
||||
return {
|
||||
primaryTrackId: primary?.id ?? null,
|
||||
secondaryTrackId: secondary?.id ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
export function normalizeYoutubeTrackSelection(input: {
|
||||
primaryTrackId: string | null;
|
||||
secondaryTrackId: string | null;
|
||||
}): {
|
||||
primaryTrackId: string | null;
|
||||
secondaryTrackId: string | null;
|
||||
} {
|
||||
if (input.primaryTrackId && input.secondaryTrackId && input.primaryTrackId === input.secondaryTrackId) {
|
||||
return {
|
||||
primaryTrackId: input.primaryTrackId,
|
||||
secondaryTrackId: null,
|
||||
};
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user