mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-20 12:11:28 -07:00
fix: harden AI subtitle fix response parsing
This commit is contained in:
126
launcher/youtube/subtitle-fix-ai.test.ts
Normal file
126
launcher/youtube/subtitle-fix-ai.test.ts
Normal file
@@ -0,0 +1,126 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
|
||||
import { applyFixedCueBatch, parseAiSubtitleFixResponse } from './subtitle-fix-ai';
|
||||
import { parseSrt } from './srt';
|
||||
|
||||
test('applyFixedCueBatch accepts content-only fixes with identical timing', () => {
|
||||
const original = parseSrt(`1
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
こんいちは
|
||||
|
||||
2
|
||||
00:00:03,000 --> 00:00:04,000
|
||||
世界
|
||||
`);
|
||||
const fixed = parseSrt(`1
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
こんにちは
|
||||
|
||||
2
|
||||
00:00:03,000 --> 00:00:04,000
|
||||
世界
|
||||
`);
|
||||
|
||||
const merged = applyFixedCueBatch(original, fixed);
|
||||
assert.equal(merged[0]?.text, 'こんにちは');
|
||||
});
|
||||
|
||||
test('applyFixedCueBatch rejects changed timestamps', () => {
|
||||
const original = parseSrt(`1
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
こんいちは
|
||||
`);
|
||||
const fixed = parseSrt(`1
|
||||
00:00:01,100 --> 00:00:02,000
|
||||
こんにちは
|
||||
`);
|
||||
|
||||
assert.throws(() => applyFixedCueBatch(original, fixed), /timestamps/i);
|
||||
});
|
||||
|
||||
test('parseAiSubtitleFixResponse accepts valid SRT wrapped in markdown fences', () => {
|
||||
const original = parseSrt(`1
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
こんいちは
|
||||
|
||||
2
|
||||
00:00:03,000 --> 00:00:04,000
|
||||
世界
|
||||
`);
|
||||
|
||||
const parsed = parseAiSubtitleFixResponse(
|
||||
original,
|
||||
'```srt\n1\n00:00:01,000 --> 00:00:02,000\nこんにちは\n\n2\n00:00:03,000 --> 00:00:04,000\n世界\n```',
|
||||
);
|
||||
|
||||
assert.equal(parsed[0]?.text, 'こんにちは');
|
||||
assert.equal(parsed[1]?.text, '世界');
|
||||
});
|
||||
|
||||
test('parseAiSubtitleFixResponse accepts text-only one-block-per-cue output', () => {
|
||||
const original = parseSrt(`1
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
こんいちは
|
||||
|
||||
2
|
||||
00:00:03,000 --> 00:00:04,000
|
||||
世界
|
||||
`);
|
||||
|
||||
const parsed = parseAiSubtitleFixResponse(
|
||||
original,
|
||||
`こんにちは
|
||||
|
||||
世界`,
|
||||
);
|
||||
|
||||
assert.equal(parsed[0]?.start, '00:00:01,000');
|
||||
assert.equal(parsed[0]?.text, 'こんにちは');
|
||||
assert.equal(parsed[1]?.end, '00:00:04,000');
|
||||
assert.equal(parsed[1]?.text, '世界');
|
||||
});
|
||||
|
||||
test('parseAiSubtitleFixResponse rejects unrecoverable text-only output', () => {
|
||||
const original = parseSrt(`1
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
こんいちは
|
||||
|
||||
2
|
||||
00:00:03,000 --> 00:00:04,000
|
||||
世界
|
||||
`);
|
||||
|
||||
assert.throws(
|
||||
() => parseAiSubtitleFixResponse(original, 'こんにちは\n世界\n余分です'),
|
||||
/cue block|cue count/i,
|
||||
);
|
||||
});
|
||||
|
||||
test('parseAiSubtitleFixResponse rejects language drift for primary Japanese subtitles', () => {
|
||||
const original = parseSrt(`1
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
こんにちは
|
||||
|
||||
2
|
||||
00:00:03,000 --> 00:00:04,000
|
||||
今日はいい天気ですね
|
||||
`);
|
||||
|
||||
assert.throws(
|
||||
() =>
|
||||
parseAiSubtitleFixResponse(
|
||||
original,
|
||||
`1
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
Hello
|
||||
|
||||
2
|
||||
00:00:03,000 --> 00:00:04,000
|
||||
The weather is nice today
|
||||
`,
|
||||
'ja',
|
||||
),
|
||||
/language/i,
|
||||
);
|
||||
});
|
||||
213
launcher/youtube/subtitle-fix-ai.ts
Normal file
213
launcher/youtube/subtitle-fix-ai.ts
Normal file
@@ -0,0 +1,213 @@
|
||||
import type { LauncherAiConfig } from '../types.js';
|
||||
import { requestAiChatCompletion, resolveAiApiKey } from '../../src/ai/client.js';
|
||||
import { parseSrt, stringifySrt, type SrtCue } from './srt.js';
|
||||
|
||||
const DEFAULT_SUBTITLE_FIX_PROMPT =
|
||||
'Fix transcription mistakes only. Preserve cue numbering, timestamps, and valid SRT formatting exactly. Return only corrected SRT.';
|
||||
|
||||
const SRT_BLOCK_PATTERN =
|
||||
/(?:^|\n)(\d+\n\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}[\s\S]*)$/;
|
||||
const CODE_FENCE_PATTERN = /^```(?:\w+)?\s*\n([\s\S]*?)\n```$/;
|
||||
const JAPANESE_CHAR_PATTERN = /[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}]/gu;
|
||||
const LATIN_LETTER_PATTERN = /\p{Script=Latin}/gu;
|
||||
|
||||
export function applyFixedCueBatch(original: SrtCue[], fixed: SrtCue[]): SrtCue[] {
|
||||
if (original.length !== fixed.length) {
|
||||
throw new Error('Fixed subtitle batch must preserve cue count.');
|
||||
}
|
||||
|
||||
return original.map((cue, index) => {
|
||||
const nextCue = fixed[index];
|
||||
if (!nextCue) {
|
||||
throw new Error('Missing fixed subtitle cue.');
|
||||
}
|
||||
if (cue.start !== nextCue.start || cue.end !== nextCue.end) {
|
||||
throw new Error('Fixed subtitle batch must preserve cue timestamps.');
|
||||
}
|
||||
return {
|
||||
...cue,
|
||||
text: nextCue.text,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function chunkCues(cues: SrtCue[], size: number): SrtCue[][] {
|
||||
const chunks: SrtCue[][] = [];
|
||||
for (let index = 0; index < cues.length; index += size) {
|
||||
chunks.push(cues.slice(index, index + size));
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
function normalizeAiSubtitleFixCandidates(content: string): string[] {
|
||||
const trimmed = content.replace(/\r\n/g, '\n').trim();
|
||||
if (!trimmed) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const candidates = new Set<string>([trimmed]);
|
||||
const fenced = CODE_FENCE_PATTERN.exec(trimmed)?.[1]?.trim();
|
||||
if (fenced) {
|
||||
candidates.add(fenced);
|
||||
}
|
||||
|
||||
const srtBlock = SRT_BLOCK_PATTERN.exec(trimmed)?.[1]?.trim();
|
||||
if (srtBlock) {
|
||||
candidates.add(srtBlock);
|
||||
}
|
||||
|
||||
return [...candidates];
|
||||
}
|
||||
|
||||
function parseTextOnlyCueBatch(original: SrtCue[], content: string): SrtCue[] {
|
||||
const paragraphBlocks = content
|
||||
.split(/\n{2,}/)
|
||||
.map((block) => block.trim())
|
||||
.filter((block) => block.length > 0);
|
||||
if (paragraphBlocks.length === original.length) {
|
||||
return original.map((cue, index) => ({
|
||||
...cue,
|
||||
text: paragraphBlocks[index]!,
|
||||
}));
|
||||
}
|
||||
|
||||
const lineBlocks = content
|
||||
.split('\n')
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line.length > 0);
|
||||
if (lineBlocks.length === original.length) {
|
||||
return original.map((cue, index) => ({
|
||||
...cue,
|
||||
text: lineBlocks[index]!,
|
||||
}));
|
||||
}
|
||||
|
||||
throw new Error('Fixed subtitle batch must preserve cue count.');
|
||||
}
|
||||
|
||||
function countPatternMatches(content: string, pattern: RegExp): number {
|
||||
pattern.lastIndex = 0;
|
||||
return [...content.matchAll(pattern)].length;
|
||||
}
|
||||
|
||||
function isJapaneseLanguageCode(language: string | undefined): boolean {
|
||||
if (!language) return false;
|
||||
const normalized = language.trim().toLowerCase();
|
||||
return normalized === 'ja' || normalized === 'jp' || normalized === 'jpn';
|
||||
}
|
||||
|
||||
function validateExpectedLanguage(
|
||||
original: SrtCue[],
|
||||
fixed: SrtCue[],
|
||||
expectedLanguage: string | undefined,
|
||||
): void {
|
||||
if (!isJapaneseLanguageCode(expectedLanguage)) return;
|
||||
|
||||
const originalText = original.map((cue) => cue.text).join('\n');
|
||||
const fixedText = fixed.map((cue) => cue.text).join('\n');
|
||||
const originalJapaneseChars = countPatternMatches(originalText, JAPANESE_CHAR_PATTERN);
|
||||
if (originalJapaneseChars < 4) return;
|
||||
|
||||
const fixedJapaneseChars = countPatternMatches(fixedText, JAPANESE_CHAR_PATTERN);
|
||||
const fixedLatinLetters = countPatternMatches(fixedText, LATIN_LETTER_PATTERN);
|
||||
if (fixedJapaneseChars === 0 && fixedLatinLetters >= 4) {
|
||||
throw new Error('Fixed subtitle batch changed language away from expected Japanese.');
|
||||
}
|
||||
}
|
||||
|
||||
export function parseAiSubtitleFixResponse(
|
||||
original: SrtCue[],
|
||||
content: string,
|
||||
expectedLanguage?: string,
|
||||
): SrtCue[] {
|
||||
const candidates = normalizeAiSubtitleFixCandidates(content);
|
||||
let lastError: Error | null = null;
|
||||
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
const parsed = parseSrt(candidate);
|
||||
validateExpectedLanguage(original, parsed, expectedLanguage);
|
||||
return parsed;
|
||||
} catch (error) {
|
||||
lastError = error as Error;
|
||||
}
|
||||
}
|
||||
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
const parsed = parseTextOnlyCueBatch(original, candidate);
|
||||
validateExpectedLanguage(original, parsed, expectedLanguage);
|
||||
return parsed;
|
||||
} catch (error) {
|
||||
lastError = error as Error;
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError ?? new Error('AI subtitle fix returned empty content.');
|
||||
}
|
||||
|
||||
export async function fixSubtitleWithAi(
|
||||
subtitleContent: string,
|
||||
aiConfig: LauncherAiConfig,
|
||||
logWarning: (message: string) => void,
|
||||
expectedLanguage?: string,
|
||||
): Promise<string | null> {
|
||||
if (aiConfig.enabled !== true) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const apiKey = await resolveAiApiKey(aiConfig);
|
||||
if (!apiKey) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const cues = parseSrt(subtitleContent);
|
||||
if (cues.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const fixedChunks: SrtCue[] = [];
|
||||
for (const chunk of chunkCues(cues, 25)) {
|
||||
const fixedContent = await requestAiChatCompletion(
|
||||
{
|
||||
apiKey,
|
||||
baseUrl: aiConfig.baseUrl,
|
||||
model: aiConfig.model,
|
||||
timeoutMs: aiConfig.requestTimeoutMs,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: aiConfig.systemPrompt?.trim() || DEFAULT_SUBTITLE_FIX_PROMPT,
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: stringifySrt(chunk),
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
logWarning,
|
||||
},
|
||||
);
|
||||
if (!fixedContent) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let parsedFixed: SrtCue[];
|
||||
try {
|
||||
parsedFixed = parseAiSubtitleFixResponse(chunk, fixedContent, expectedLanguage);
|
||||
} catch (error) {
|
||||
logWarning(`AI subtitle fix returned invalid SRT: ${(error as Error).message}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
fixedChunks.push(...applyFixedCueBatch(chunk, parsedFixed));
|
||||
} catch (error) {
|
||||
logWarning(`AI subtitle fix validation failed: ${(error as Error).message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return stringifySrt(fixedChunks);
|
||||
}
|
||||
Reference in New Issue
Block a user