fix(subtitle-ws): send tokenized payloads to texthooker

This commit is contained in:
2026-02-19 17:21:26 -08:00
parent d5d71816ac
commit 7795cc3d69
5 changed files with 376 additions and 179 deletions

View File

@@ -7,7 +7,7 @@ function flushMicrotasks(): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, 0));
}
test('subtitle processing emits plain subtitle immediately before tokenized payload', async () => {
test('subtitle processing emits tokenized payload when tokenization succeeds', async () => {
const emitted: SubtitleData[] = [];
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
@@ -15,13 +15,11 @@ test('subtitle processing emits plain subtitle immediately before tokenized payl
});
controller.onSubtitleChange('字幕');
assert.deepEqual(emitted[0], { text: '字幕', tokens: null });
await flushMicrotasks();
assert.deepEqual(emitted[1], { text: '字幕', tokens: [] });
assert.deepEqual(emitted, [{ text: '字幕', tokens: [] }]);
});
test('subtitle processing drops stale tokenization and delivers latest subtitle only', async () => {
test('subtitle processing drops stale tokenization and delivers latest subtitle only once', async () => {
const emitted: SubtitleData[] = [];
let firstResolve: ((value: SubtitleData | null) => void) | undefined;
const controller = createSubtitleProcessingController({
@@ -43,14 +41,10 @@ test('subtitle processing drops stale tokenization and delivers latest subtitle
await flushMicrotasks();
await flushMicrotasks();
assert.deepEqual(emitted, [
{ text: 'first', tokens: null },
{ text: 'second', tokens: null },
{ text: 'second', tokens: [] },
]);
assert.deepEqual(emitted, [{ text: 'second', tokens: [] }]);
});
test('subtitle processing skips duplicate plain subtitle emission', async () => {
test('subtitle processing skips duplicate subtitle emission', async () => {
const emitted: SubtitleData[] = [];
let tokenizeCalls = 0;
const controller = createSubtitleProcessingController({
@@ -66,7 +60,19 @@ test('subtitle processing skips duplicate plain subtitle emission', async () =>
controller.onSubtitleChange('same');
await flushMicrotasks();
const plainEmits = emitted.filter((entry) => entry.tokens === null);
assert.equal(plainEmits.length, 1);
assert.equal(emitted.length, 1);
assert.equal(tokenizeCalls, 1);
});
test('subtitle processing falls back to plain subtitle when tokenization returns null', async () => {
const emitted: SubtitleData[] = [];
const controller = createSubtitleProcessingController({
tokenizeSubtitle: async () => null,
emitSubtitle: (payload) => emitted.push(payload),
});
controller.onSubtitleChange('fallback');
await flushMicrotasks();
assert.deepEqual(emitted, [{ text: 'fallback', tokens: null }]);
});

View File

@@ -15,19 +15,11 @@ export function createSubtitleProcessingController(
deps: SubtitleProcessingControllerDeps,
): SubtitleProcessingController {
let latestText = '';
let lastPlainText = '';
let lastEmittedText = '';
let processing = false;
let staleDropCount = 0;
const now = deps.now ?? (() => Date.now());
const emitPlainSubtitle = (text: string): void => {
if (text === lastPlainText) {
return;
}
lastPlainText = text;
deps.emitSubtitle({ text, tokens: null });
};
const processLatest = (): void => {
if (processing) {
return;
@@ -38,14 +30,20 @@ export function createSubtitleProcessingController(
void (async () => {
while (true) {
const text = latestText;
const startedAtMs = now();
if (!text.trim()) {
deps.emitSubtitle({ text, tokens: null });
lastEmittedText = text;
break;
}
const startedAtMs = now();
let tokenized: SubtitleData | null = null;
let output: SubtitleData = { text, tokens: null };
try {
tokenized = await deps.tokenizeSubtitle(text);
const tokenized = await deps.tokenizeSubtitle(text);
if (tokenized) {
output = tokenized;
}
} catch (error) {
deps.logDebug?.(`Subtitle tokenization failed: ${(error as Error).message}`);
}
@@ -58,12 +56,11 @@ export function createSubtitleProcessingController(
continue;
}
if (tokenized) {
deps.emitSubtitle(tokenized);
deps.logDebug?.(
`Subtitle tokenization delivered; elapsed=${now() - startedAtMs}ms, staleDrops=${staleDropCount}`,
);
}
deps.emitSubtitle(output);
lastEmittedText = text;
deps.logDebug?.(
`Subtitle tokenization delivered; elapsed=${now() - startedAtMs}ms, staleDrops=${staleDropCount}`,
);
break;
}
})()
@@ -72,7 +69,7 @@ export function createSubtitleProcessingController(
})
.finally(() => {
processing = false;
if (latestText !== lastPlainText) {
if (latestText !== lastEmittedText) {
processLatest();
}
});
@@ -83,13 +80,7 @@ export function createSubtitleProcessingController(
if (text === latestText) {
return;
}
const plainStartedAtMs = now();
latestText = text;
emitPlainSubtitle(text);
deps.logDebug?.(`Subtitle plain emit completed in ${now() - plainStartedAtMs}ms`);
if (!text.trim()) {
return;
}
processLatest();
},
};

View File

@@ -0,0 +1,89 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import { serializeSubtitleMarkup, serializeSubtitleWebsocketMessage } from './subtitle-ws';
import { PartOfSpeech, type SubtitleData } from '../../types';
const frequencyOptions = {
enabled: true,
topX: 1000,
mode: 'banded' as const,
};
test('serializeSubtitleMarkup escapes plain text and preserves line breaks', () => {
const payload: SubtitleData = {
text: 'a < b\nx & y',
tokens: null,
};
assert.equal(serializeSubtitleMarkup(payload, frequencyOptions), 'a &lt; b<br>x &amp; y');
});
test('serializeSubtitleMarkup includes known, n+1, jlpt, and frequency classes', () => {
const payload: SubtitleData = {
text: 'ignored',
tokens: [
{
surface: '既知',
reading: '',
headword: '',
startPos: 0,
endPos: 2,
partOfSpeech: PartOfSpeech.other,
isMerged: false,
isKnown: true,
isNPlusOneTarget: false,
},
{
surface: '新語',
reading: '',
headword: '',
startPos: 2,
endPos: 4,
partOfSpeech: PartOfSpeech.other,
isMerged: false,
isKnown: false,
isNPlusOneTarget: true,
},
{
surface: '級',
reading: '',
headword: '',
startPos: 4,
endPos: 5,
partOfSpeech: PartOfSpeech.other,
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
jlptLevel: 'N3',
},
{
surface: '頻度',
reading: '',
headword: '',
startPos: 5,
endPos: 7,
partOfSpeech: PartOfSpeech.other,
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
frequencyRank: 10,
},
],
};
const markup = serializeSubtitleMarkup(payload, frequencyOptions);
assert.match(markup, /word word-known/);
assert.match(markup, /word word-n-plus-one/);
assert.match(markup, /word word-jlpt-n3/);
assert.match(markup, /word word-frequency-band-1/);
});
test('serializeSubtitleWebsocketMessage emits sentence payload', () => {
const payload: SubtitleData = {
text: '字幕',
tokens: null,
};
const raw = serializeSubtitleWebsocketMessage(payload, frequencyOptions);
assert.deepEqual(JSON.parse(raw), { sentence: '字幕' });
});

View File

@@ -3,6 +3,7 @@ import * as os from 'os';
import * as path from 'path';
import WebSocket from 'ws';
import { createLogger } from '../../logger';
import type { MergedToken, SubtitleData } from '../../types';
const logger = createLogger('main:subtitle-ws');
@@ -11,18 +12,117 @@ export function hasMpvWebsocketPlugin(): boolean {
return fs.existsSync(mpvWebsocketPath);
}
export type SubtitleWebsocketFrequencyOptions = {
enabled: boolean;
topX: number;
mode: 'single' | 'banded';
};
function escapeHtml(text: string): string {
return text
.replaceAll('&', '&amp;')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&#39;');
}
function computeFrequencyClass(
token: MergedToken,
options: SubtitleWebsocketFrequencyOptions,
): string | null {
if (!options.enabled) return null;
if (typeof token.frequencyRank !== 'number' || !Number.isFinite(token.frequencyRank)) return null;
const rank = Math.max(1, Math.floor(token.frequencyRank));
const topX = Math.max(1, Math.floor(options.topX));
if (rank > topX) return null;
if (options.mode === 'banded') {
const band = Math.min(5, Math.max(1, Math.ceil((rank / topX) * 5)));
return `word-frequency-band-${band}`;
}
return 'word-frequency-single';
}
function computeWordClass(token: MergedToken, options: SubtitleWebsocketFrequencyOptions): string {
const classes = ['word'];
if (token.isNPlusOneTarget) {
classes.push('word-n-plus-one');
} else if (token.isKnown) {
classes.push('word-known');
}
if (token.jlptLevel) {
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
}
if (!token.isKnown && !token.isNPlusOneTarget) {
const frequencyClass = computeFrequencyClass(token, options);
if (frequencyClass) {
classes.push(frequencyClass);
}
}
return classes.join(' ');
}
export function serializeSubtitleMarkup(
payload: SubtitleData,
options: SubtitleWebsocketFrequencyOptions,
): string {
if (!payload.tokens || payload.tokens.length === 0) {
return escapeHtml(payload.text).replaceAll('\n', '<br>');
}
const chunks: string[] = [];
for (const token of payload.tokens) {
const klass = computeWordClass(token, options);
const parts = token.surface.split('\n');
for (let index = 0; index < parts.length; index += 1) {
if (parts[index]) {
chunks.push(`<span class="${klass}">${escapeHtml(parts[index])}</span>`);
}
if (index < parts.length - 1) {
chunks.push('<br>');
}
}
}
return chunks.join('');
}
export function serializeSubtitleWebsocketMessage(
payload: SubtitleData,
options: SubtitleWebsocketFrequencyOptions,
): string {
return JSON.stringify({ sentence: serializeSubtitleMarkup(payload, options) });
}
export class SubtitleWebSocket {
private server: WebSocket.Server | null = null;
private latestMessage = '';
public isRunning(): boolean {
return this.server !== null;
}
public hasClients(): boolean {
return (this.server?.clients.size ?? 0) > 0;
}
public start(port: number, getCurrentSubtitleText: () => string): void {
this.server = new WebSocket.Server({ port, host: '127.0.0.1' });
this.server.on('connection', (ws: WebSocket) => {
logger.info('WebSocket client connected');
if (this.latestMessage) {
ws.send(this.latestMessage);
return;
}
const currentText = getCurrentSubtitleText();
if (currentText) {
ws.send(JSON.stringify({ sentence: currentText }));
@@ -36,9 +136,10 @@ export class SubtitleWebSocket {
logger.info(`Subtitle WebSocket server running on ws://127.0.0.1:${port}`);
}
public broadcast(text: string): void {
public broadcast(payload: SubtitleData, options: SubtitleWebsocketFrequencyOptions): void {
if (!this.server) return;
const message = JSON.stringify({ sentence: text });
const message = serializeSubtitleWebsocketMessage(payload, options);
this.latestMessage = message;
for (const client of this.server.clients) {
if (client.readyState === WebSocket.OPEN) {
client.send(message);
@@ -51,5 +152,6 @@ export class SubtitleWebSocket {
this.server.close();
this.server = null;
}
this.latestMessage = '';
}
}