fix(launcher): remove youtube subtitle mode

This commit is contained in:
2026-03-08 16:03:24 -07:00
parent 6a44b54b51
commit a6ece5388a
19 changed files with 714 additions and 202 deletions

View File

@@ -34,12 +34,7 @@ function checkDependencies(args: Args): void {
missing.push('yt-dlp');
}
if (
args.targetKind === 'url' &&
isYoutubeTarget(args.target) &&
args.youtubeSubgenMode !== 'off' &&
!commandExists('ffmpeg')
) {
if (args.targetKind === 'url' && isYoutubeTarget(args.target) && !commandExists('ffmpeg')) {
missing.push('ffmpeg');
}
@@ -164,22 +159,28 @@ export async function runPlaybackCommand(context: LauncherCommandContext): Promi
const isYoutubeUrl = selectedTarget.kind === 'url' && isYoutubeTarget(selectedTarget.target);
let preloadedSubtitles: { primaryPath?: string; secondaryPath?: string } | undefined;
if (isYoutubeUrl && args.youtubeSubgenMode === 'preprocess') {
log('info', args.logLevel, 'YouTube subtitle mode: preprocess');
if (isYoutubeUrl) {
log('info', args.logLevel, 'YouTube subtitle generation: preload before mpv');
const generated = await generateYoutubeSubtitles(selectedTarget.target, args);
preloadedSubtitles = {
primaryPath: generated.primaryPath,
secondaryPath: generated.secondaryPath,
};
const primaryStatus = generated.primaryPath
? 'ready'
: generated.primaryNative
? 'native'
: 'missing';
const secondaryStatus = generated.secondaryPath
? 'ready'
: generated.secondaryNative
? 'native'
: 'missing';
log(
'info',
args.logLevel,
`YouTube preprocess result: primary=${generated.primaryPath ? 'ready' : 'missing'}, secondary=${generated.secondaryPath ? 'ready' : 'missing'}`,
`YouTube subtitle result: primary=${primaryStatus}, secondary=${secondaryStatus}`,
);
} else if (isYoutubeUrl && args.youtubeSubgenMode === 'automatic') {
log('info', args.logLevel, 'YouTube subtitle mode: automatic (background)');
} else if (isYoutubeUrl) {
log('info', args.logLevel, 'YouTube subtitle mode: off');
}
const shouldPauseUntilOverlayReady =
@@ -201,26 +202,6 @@ export async function runPlaybackCommand(context: LauncherCommandContext): Promi
{ startPaused: shouldPauseUntilOverlayReady },
);
if (isYoutubeUrl && args.youtubeSubgenMode === 'automatic') {
void generateYoutubeSubtitles(selectedTarget.target, args, async (lang, subtitlePath) => {
try {
await loadSubtitleIntoMpv(mpvSocketPath, subtitlePath, lang === 'primary', args.logLevel);
} catch (error) {
log(
'warn',
args.logLevel,
`Generated subtitle ready but failed to load in mpv: ${(error as Error).message}`,
);
}
}).catch((error) => {
log(
'warn',
args.logLevel,
`Background subtitle generation failed: ${(error as Error).message}`,
);
});
}
const ready = await waitForUnixSocketReady(mpvSocketPath, 10000);
const pluginAutoStartEnabled = pluginRuntimeConfig.autoStart;
const shouldStartOverlay = args.startOverlay || args.autoStartOverlay;

View File

@@ -6,10 +6,24 @@ import { parsePluginRuntimeConfigContent } from './config/plugin-runtime-config.
test('parseLauncherYoutubeSubgenConfig keeps only valid typed values', () => {
const parsed = parseLauncherYoutubeSubgenConfig({
ai: {
enabled: true,
apiKey: 'shared-key',
baseUrl: 'https://openrouter.ai/api',
model: 'openrouter/shared-model',
systemPrompt: 'Legacy shared prompt.',
requestTimeoutMs: 12000,
},
youtubeSubgen: {
mode: 'preprocess',
whisperBin: '/usr/bin/whisper',
whisperModel: '/models/base.bin',
whisperVadModel: '/models/vad.bin',
whisperThreads: 6.8,
fixWithAi: true,
ai: {
model: 'openrouter/subgen-model',
systemPrompt: 'Fix subtitles only.',
},
primarySubLanguages: ['ja', 42, 'en'],
},
secondarySub: {
@@ -24,9 +38,17 @@ test('parseLauncherYoutubeSubgenConfig keeps only valid typed values', () => {
},
});
assert.equal(parsed.mode, 'preprocess');
assert.equal('mode' in parsed, false);
assert.deepEqual(parsed.primarySubLanguages, ['ja', 'en']);
assert.deepEqual(parsed.secondarySubLanguages, ['eng', 'deu']);
assert.equal(parsed.whisperVadModel, '/models/vad.bin');
assert.equal(parsed.whisperThreads, 6);
assert.equal(parsed.fixWithAi, true);
assert.equal(parsed.ai?.enabled, true);
assert.equal(parsed.ai?.apiKey, 'shared-key');
assert.equal(parsed.ai?.model, 'openrouter/subgen-model');
assert.equal(parsed.ai?.systemPrompt, 'Fix subtitles only.');
assert.equal(parsed.ai?.requestTimeoutMs, 12000);
assert.equal(parsed.jimakuLanguagePreference, 'ja');
assert.equal(parsed.jimakuMaxEntryResults, 8);
});

View File

@@ -1,13 +1,7 @@
import fs from 'node:fs';
import path from 'node:path';
import { fail } from '../log.js';
import type {
Args,
Backend,
LauncherYoutubeSubgenConfig,
LogLevel,
YoutubeSubgenMode,
} from '../types.js';
import type { Args, Backend, LauncherYoutubeSubgenConfig, LogLevel } from '../types.js';
import {
DEFAULT_JIMAKU_API_BASE_URL,
DEFAULT_YOUTUBE_PRIMARY_SUB_LANGS,
@@ -54,14 +48,6 @@ function parseLogLevel(value: string): LogLevel {
fail(`Invalid log level: ${value} (must be debug, info, warn, or error)`);
}
function parseYoutubeMode(value: string): YoutubeSubgenMode {
const normalized = value.toLowerCase();
if (normalized === 'automatic' || normalized === 'preprocess' || normalized === 'off') {
return normalized as YoutubeSubgenMode;
}
fail(`Invalid yt-subgen mode: ${value} (must be automatic, preprocess, or off)`);
}
function parseBackend(value: string): Backend {
if (value === 'auto' || value === 'hyprland' || value === 'x11' || value === 'macos') {
return value as Backend;
@@ -91,13 +77,6 @@ function parseDictionaryTarget(value: string): string {
}
export function createDefaultArgs(launcherConfig: LauncherYoutubeSubgenConfig): Args {
const envMode = (process.env.SUBMINER_YT_SUBGEN_MODE || '').toLowerCase();
const defaultMode: YoutubeSubgenMode =
envMode === 'preprocess' || envMode === 'off' || envMode === 'automatic'
? (envMode as YoutubeSubgenMode)
: launcherConfig.mode
? launcherConfig.mode
: 'automatic';
const configuredSecondaryLangs = uniqueNormalizedLangCodes(
launcherConfig.secondarySubLanguages ?? [],
);
@@ -120,12 +99,18 @@ export function createDefaultArgs(launcherConfig: LauncherYoutubeSubgenConfig):
recursive: false,
profile: 'subminer',
startOverlay: false,
youtubeSubgenMode: defaultMode,
whisperBin: process.env.SUBMINER_WHISPER_BIN || launcherConfig.whisperBin || '',
whisperModel: process.env.SUBMINER_WHISPER_MODEL || launcherConfig.whisperModel || '',
whisperVadModel: process.env.SUBMINER_WHISPER_VAD_MODEL || launcherConfig.whisperVadModel || '',
whisperThreads: (() => {
const envValue = Number.parseInt(process.env.SUBMINER_WHISPER_THREADS || '', 10);
if (Number.isInteger(envValue) && envValue > 0) return envValue;
return launcherConfig.whisperThreads || 4;
})(),
youtubeSubgenOutDir: process.env.SUBMINER_YT_SUBGEN_OUT_DIR || DEFAULT_YOUTUBE_SUBGEN_OUT_DIR,
youtubeSubgenAudioFormat: process.env.SUBMINER_YT_SUBGEN_AUDIO_FORMAT || 'm4a',
youtubeSubgenKeepTemp: process.env.SUBMINER_YT_SUBGEN_KEEP_TEMP === '1',
youtubeFixWithAi: launcherConfig.fixWithAi === true,
jimakuApiKey: process.env.SUBMINER_JIMAKU_API_KEY || '',
jimakuApiKeyCommand: process.env.SUBMINER_JIMAKU_API_KEY_COMMAND || '',
jimakuApiBaseUrl: process.env.SUBMINER_JIMAKU_API_BASE_URL || DEFAULT_JIMAKU_API_BASE_URL,
@@ -152,6 +137,15 @@ export function createDefaultArgs(launcherConfig: LauncherYoutubeSubgenConfig):
youtubeSecondarySubLangs: secondarySubLangs,
youtubeAudioLangs,
youtubeWhisperSourceLanguage: inferWhisperLanguage(primarySubLangs, 'ja'),
aiConfig: {
enabled: launcherConfig.ai?.enabled,
apiKey: launcherConfig.ai?.apiKey,
apiKeyCommand: launcherConfig.ai?.apiKeyCommand,
baseUrl: launcherConfig.ai?.baseUrl,
model: launcherConfig.ai?.model,
systemPrompt: launcherConfig.ai?.systemPrompt,
requestTimeoutMs: launcherConfig.ai?.requestTimeoutMs,
},
useTexthooker: true,
autoStartOverlay: false,
texthookerOnly: false,
@@ -242,8 +236,6 @@ export function applyInvocationsToArgs(parsed: Args, invocations: CliInvocations
if (invocations.ytInvocation) {
if (invocations.ytInvocation.logLevel)
parsed.logLevel = parseLogLevel(invocations.ytInvocation.logLevel);
if (invocations.ytInvocation.mode)
parsed.youtubeSubgenMode = parseYoutubeMode(invocations.ytInvocation.mode);
if (invocations.ytInvocation.outDir)
parsed.youtubeSubgenOutDir = invocations.ytInvocation.outDir;
if (invocations.ytInvocation.keepTemp) parsed.youtubeSubgenKeepTemp = true;
@@ -251,6 +243,10 @@ export function applyInvocationsToArgs(parsed: Args, invocations: CliInvocations
parsed.whisperBin = invocations.ytInvocation.whisperBin;
if (invocations.ytInvocation.whisperModel)
parsed.whisperModel = invocations.ytInvocation.whisperModel;
if (invocations.ytInvocation.whisperVadModel)
parsed.whisperVadModel = invocations.ytInvocation.whisperVadModel;
if (invocations.ytInvocation.whisperThreads)
parsed.whisperThreads = invocations.ytInvocation.whisperThreads;
if (invocations.ytInvocation.ytSubgenAudioFormat) {
parsed.youtubeSubgenAudioFormat = invocations.ytInvocation.ytSubgenAudioFormat;
}

View File

@@ -16,11 +16,12 @@ export interface JellyfinInvocation {
export interface YtInvocation {
target?: string;
mode?: string;
outDir?: string;
keepTemp?: boolean;
whisperBin?: string;
whisperModel?: string;
whisperVadModel?: string;
whisperThreads?: number;
ytSubgenAudioFormat?: string;
logLevel?: string;
}
@@ -201,21 +202,27 @@ export function parseCliPrograms(
.alias('youtube')
.description('YouTube workflows')
.argument('[target]', 'YouTube URL or ytsearch: query')
.option('-m, --mode <mode>', 'Subtitle generation mode')
.option('-o, --out-dir <dir>', 'Subtitle output dir')
.option('--keep-temp', 'Keep temp files')
.option('--whisper-bin <path>', 'whisper.cpp CLI path')
.option('--whisper-model <path>', 'whisper model path')
.option('--whisper-vad-model <path>', 'whisper.cpp VAD model path')
.option('--whisper-threads <n>', 'whisper.cpp thread count')
.option('--yt-subgen-audio-format <format>', 'Audio extraction format')
.option('--log-level <level>', 'Log level')
.action((target: string | undefined, options: Record<string, unknown>) => {
ytInvocation = {
target,
mode: typeof options.mode === 'string' ? options.mode : undefined,
outDir: typeof options.outDir === 'string' ? options.outDir : undefined,
keepTemp: options.keepTemp === true,
whisperBin: typeof options.whisperBin === 'string' ? options.whisperBin : undefined,
whisperModel: typeof options.whisperModel === 'string' ? options.whisperModel : undefined,
whisperVadModel:
typeof options.whisperVadModel === 'string' ? options.whisperVadModel : undefined,
whisperThreads:
typeof options.whisperThreads === 'number' && Number.isFinite(options.whisperThreads)
? Math.floor(options.whisperThreads)
: undefined,
ytSubgenAudioFormat:
typeof options.ytSubgenAudioFormat === 'string' ? options.ytSubgenAudioFormat : undefined,
logLevel: typeof options.logLevel === 'string' ? options.logLevel : undefined,

View File

@@ -1,4 +1,5 @@
import type { LauncherYoutubeSubgenConfig } from '../types.js';
import { mergeAiConfig } from '../../src/ai/config.js';
function asStringArray(value: unknown): string[] | undefined {
if (!Array.isArray(value)) return undefined;
@@ -21,17 +22,58 @@ export function parseLauncherYoutubeSubgenConfig(
const jimakuRaw = root.jimaku;
const jimaku =
jimakuRaw && typeof jimakuRaw === 'object' ? (jimakuRaw as Record<string, unknown>) : null;
const aiRaw = root.ai;
const ai = aiRaw && typeof aiRaw === 'object' ? (aiRaw as Record<string, unknown>) : null;
const youtubeAiRaw = youtubeSubgen?.ai;
const youtubeAi =
youtubeAiRaw && typeof youtubeAiRaw === 'object'
? (youtubeAiRaw as Record<string, unknown>)
: null;
const mode = youtubeSubgen?.mode;
const jimakuLanguagePreference = jimaku?.languagePreference;
const jimakuMaxEntryResults = jimaku?.maxEntryResults;
return {
mode: mode === 'automatic' || mode === 'preprocess' || mode === 'off' ? mode : undefined,
whisperBin:
typeof youtubeSubgen?.whisperBin === 'string' ? youtubeSubgen.whisperBin : undefined,
whisperModel:
typeof youtubeSubgen?.whisperModel === 'string' ? youtubeSubgen.whisperModel : undefined,
whisperVadModel:
typeof youtubeSubgen?.whisperVadModel === 'string'
? youtubeSubgen.whisperVadModel
: undefined,
whisperThreads:
typeof youtubeSubgen?.whisperThreads === 'number' &&
Number.isFinite(youtubeSubgen.whisperThreads) &&
youtubeSubgen.whisperThreads > 0
? Math.floor(youtubeSubgen.whisperThreads)
: undefined,
fixWithAi: typeof youtubeSubgen?.fixWithAi === 'boolean' ? youtubeSubgen.fixWithAi : undefined,
ai: mergeAiConfig(
ai
? {
enabled: typeof ai.enabled === 'boolean' ? ai.enabled : undefined,
apiKey: typeof ai.apiKey === 'string' ? ai.apiKey : undefined,
apiKeyCommand: typeof ai.apiKeyCommand === 'string' ? ai.apiKeyCommand : undefined,
baseUrl: typeof ai.baseUrl === 'string' ? ai.baseUrl : undefined,
model: typeof ai.model === 'string' ? ai.model : undefined,
systemPrompt: typeof ai.systemPrompt === 'string' ? ai.systemPrompt : undefined,
requestTimeoutMs:
typeof ai.requestTimeoutMs === 'number' &&
Number.isFinite(ai.requestTimeoutMs) &&
ai.requestTimeoutMs > 0
? Math.floor(ai.requestTimeoutMs)
: undefined,
}
: undefined,
youtubeAi
? {
model: typeof youtubeAi.model === 'string' ? youtubeAi.model : undefined,
systemPrompt:
typeof youtubeAi.systemPrompt === 'string' ? youtubeAi.systemPrompt : undefined,
}
: undefined,
),
primarySubLanguages: asStringArray(youtubeSubgen?.primarySubLanguages),
secondarySubLanguages: asStringArray(secondarySub?.secondarySubLanguages),
jimakuApiKey: typeof jimaku?.apiKey === 'string' ? jimaku.apiKey : undefined,

View File

@@ -162,6 +162,134 @@ test('doctor reports checks and exits non-zero without hard dependencies', () =>
});
});
test('youtube command rejects removed --mode option', () => {
withTempDir((root) => {
const homeDir = path.join(root, 'home');
const xdgConfigHome = path.join(root, 'xdg');
const appPath = path.join(root, 'fake-subminer.sh');
fs.writeFileSync(appPath, '#!/bin/sh\nexit 0\n');
fs.chmodSync(appPath, 0o755);
const env = {
...makeTestEnv(homeDir, xdgConfigHome),
SUBMINER_APPIMAGE_PATH: appPath,
};
const result = runLauncher(
['youtube', 'https://www.youtube.com/watch?v=test123', '--mode', 'automatic'],
env,
);
assert.equal(result.status, 1);
assert.match(result.stderr, /unknown option '--mode'/i);
});
});
test('youtube playback generates subtitles before mpv launch', () => {
withTempDir((root) => {
const homeDir = path.join(root, 'home');
const xdgConfigHome = path.join(root, 'xdg');
const binDir = path.join(root, 'bin');
const appPath = path.join(root, 'fake-subminer.sh');
const ytdlpLogPath = path.join(root, 'yt-dlp.log');
const mpvCapturePath = path.join(root, 'mpv-order.txt');
const mpvArgsPath = path.join(root, 'mpv-args.txt');
const socketPath = path.join(root, 'mpv.sock');
fs.mkdirSync(binDir, { recursive: true });
fs.mkdirSync(path.join(xdgConfigHome, 'SubMiner'), { recursive: true });
fs.mkdirSync(path.join(xdgConfigHome, 'mpv', 'script-opts'), { recursive: true });
fs.writeFileSync(
path.join(xdgConfigHome, 'SubMiner', 'setup-state.json'),
JSON.stringify({
version: 1,
status: 'completed',
completedAt: '2026-03-08T00:00:00.000Z',
completionSource: 'user',
lastSeenYomitanDictionaryCount: 0,
pluginInstallStatus: 'installed',
pluginInstallPathSummary: null,
}),
);
fs.writeFileSync(
path.join(xdgConfigHome, 'mpv', 'script-opts', 'subminer.conf'),
`socket_path=${socketPath}\nauto_start=no\nauto_start_visible_overlay=no\nauto_start_pause_until_ready=no\n`,
);
fs.writeFileSync(appPath, '#!/bin/sh\nexit 0\n');
fs.chmodSync(appPath, 0o755);
fs.writeFileSync(
path.join(binDir, 'yt-dlp'),
`#!/bin/sh
set -eu
printf '%s\\n' "$*" >> "$SUBMINER_TEST_YTDLP_LOG"
if printf '%s\\n' "$*" | grep -q -- '--dump-single-json'; then
printf '{"id":"video123"}\\n'
exit 0
fi
out_dir=""
prev=""
for arg in "$@"; do
if [ "$prev" = "-o" ]; then
out_dir=$(dirname "$arg")
break
fi
prev="$arg"
done
mkdir -p "$out_dir"
printf '1\\n00:00:00,000 --> 00:00:01,000\\nこんにちは\\n' > "$out_dir/video123.ja.srt"
printf '1\\n00:00:00,000 --> 00:00:01,000\\nhello\\n' > "$out_dir/video123.en.srt"
`,
'utf8',
);
fs.chmodSync(path.join(binDir, 'yt-dlp'), 0o755);
fs.writeFileSync(path.join(binDir, 'ffmpeg'), '#!/bin/sh\nexit 0\n', 'utf8');
fs.chmodSync(path.join(binDir, 'ffmpeg'), 0o755);
fs.writeFileSync(
path.join(binDir, 'mpv'),
`#!/bin/sh
set -eu
if [ -s "$SUBMINER_TEST_YTDLP_LOG" ]; then
printf 'generated-before-mpv\\n' > "$SUBMINER_TEST_MPV_ORDER"
else
printf 'mpv-before-generation\\n' > "$SUBMINER_TEST_MPV_ORDER"
fi
printf '%s\\n' "$@" > "$SUBMINER_TEST_MPV_ARGS"
socket_path=""
for arg in "$@"; do
case "$arg" in
--input-ipc-server=*)
socket_path="\${arg#--input-ipc-server=}"
;;
esac
done
bun -e "const net=require('node:net'); const fs=require('node:fs'); const socket=process.argv[1]; try { fs.rmSync(socket,{force:true}); } catch {} const server=net.createServer((conn)=>conn.end()); server.listen(socket,()=>setTimeout(()=>server.close(()=>process.exit(0)),250));" "$socket_path"
`,
'utf8',
);
fs.chmodSync(path.join(binDir, 'mpv'), 0o755);
const env = {
...makeTestEnv(homeDir, xdgConfigHome),
PATH: `${binDir}${path.delimiter}${process.env.PATH || ''}`,
SUBMINER_APPIMAGE_PATH: appPath,
SUBMINER_TEST_YTDLP_LOG: ytdlpLogPath,
SUBMINER_TEST_MPV_ORDER: mpvCapturePath,
SUBMINER_TEST_MPV_ARGS: mpvArgsPath,
};
const result = runLauncher(['youtube', 'https://www.youtube.com/watch?v=test123'], env);
assert.equal(result.status, 0);
assert.equal(fs.readFileSync(mpvCapturePath, 'utf8').trim(), 'generated-before-mpv');
assert.match(
fs.readFileSync(mpvArgsPath, 'utf8'),
/https:\/\/www\.youtube\.com\/watch\?v=test123/,
);
assert.match(fs.readFileSync(ytdlpLogPath, 'utf8'), /--dump-single-json/);
});
});
test('dictionary command forwards --dictionary and --dictionary-target to app command path', () => {
withTempDir((root) => {
const homeDir = path.join(root, 'home');

View File

@@ -42,26 +42,38 @@ export const DEFAULT_MPV_SUBMINER_ARGS = [
] as const;
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
export type YoutubeSubgenMode = 'automatic' | 'preprocess' | 'off';
export type Backend = 'auto' | 'hyprland' | 'x11' | 'macos';
export type JimakuLanguagePreference = 'ja' | 'en' | 'none';
export interface LauncherAiConfig {
enabled?: boolean;
apiKey?: string;
apiKeyCommand?: string;
baseUrl?: string;
model?: string;
systemPrompt?: string;
requestTimeoutMs?: number;
}
export interface Args {
backend: Backend;
directory: string;
recursive: boolean;
profile: string;
startOverlay: boolean;
youtubeSubgenMode: YoutubeSubgenMode;
whisperBin: string;
whisperModel: string;
whisperVadModel: string;
whisperThreads: number;
youtubeSubgenOutDir: string;
youtubeSubgenAudioFormat: string;
youtubeSubgenKeepTemp: boolean;
youtubeFixWithAi: boolean;
youtubePrimarySubLangs: string[];
youtubeSecondarySubLangs: string[];
youtubeAudioLangs: string[];
youtubeWhisperSourceLanguage: string;
aiConfig: LauncherAiConfig;
useTexthooker: boolean;
autoStartOverlay: boolean;
texthookerOnly: boolean;
@@ -96,9 +108,12 @@ export interface Args {
}
export interface LauncherYoutubeSubgenConfig {
mode?: YoutubeSubgenMode;
whisperBin?: string;
whisperModel?: string;
whisperVadModel?: string;
whisperThreads?: number;
fixWithAi?: boolean;
ai?: LauncherAiConfig;
primarySubLanguages?: string[];
secondarySubLanguages?: string[];
jimakuApiKey?: string;
@@ -144,13 +159,15 @@ export interface SubtitleCandidate {
lang: 'primary' | 'secondary';
ext: string;
size: number;
source: 'manual' | 'auto' | 'whisper' | 'whisper-translate';
source: 'manual' | 'whisper' | 'whisper-fixed' | 'whisper-translate' | 'whisper-translate-fixed';
}
export interface YoutubeSubgenOutputs {
basename: string;
primaryPath?: string;
secondaryPath?: string;
primaryNative?: boolean;
secondaryNative?: boolean;
}
export interface MpvTrack {