mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-28 06:22:45 -08:00
fix(subtitle-ws): send tokenized payloads to texthooker
This commit is contained in:
@@ -7,7 +7,7 @@ function flushMicrotasks(): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, 0));
|
||||
}
|
||||
|
||||
test('subtitle processing emits plain subtitle immediately before tokenized payload', async () => {
|
||||
test('subtitle processing emits tokenized payload when tokenization succeeds', async () => {
|
||||
const emitted: SubtitleData[] = [];
|
||||
const controller = createSubtitleProcessingController({
|
||||
tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
|
||||
@@ -15,13 +15,11 @@ test('subtitle processing emits plain subtitle immediately before tokenized payl
|
||||
});
|
||||
|
||||
controller.onSubtitleChange('字幕');
|
||||
assert.deepEqual(emitted[0], { text: '字幕', tokens: null });
|
||||
|
||||
await flushMicrotasks();
|
||||
assert.deepEqual(emitted[1], { text: '字幕', tokens: [] });
|
||||
assert.deepEqual(emitted, [{ text: '字幕', tokens: [] }]);
|
||||
});
|
||||
|
||||
test('subtitle processing drops stale tokenization and delivers latest subtitle only', async () => {
|
||||
test('subtitle processing drops stale tokenization and delivers latest subtitle only once', async () => {
|
||||
const emitted: SubtitleData[] = [];
|
||||
let firstResolve: ((value: SubtitleData | null) => void) | undefined;
|
||||
const controller = createSubtitleProcessingController({
|
||||
@@ -43,14 +41,10 @@ test('subtitle processing drops stale tokenization and delivers latest subtitle
|
||||
await flushMicrotasks();
|
||||
await flushMicrotasks();
|
||||
|
||||
assert.deepEqual(emitted, [
|
||||
{ text: 'first', tokens: null },
|
||||
{ text: 'second', tokens: null },
|
||||
{ text: 'second', tokens: [] },
|
||||
]);
|
||||
assert.deepEqual(emitted, [{ text: 'second', tokens: [] }]);
|
||||
});
|
||||
|
||||
test('subtitle processing skips duplicate plain subtitle emission', async () => {
|
||||
test('subtitle processing skips duplicate subtitle emission', async () => {
|
||||
const emitted: SubtitleData[] = [];
|
||||
let tokenizeCalls = 0;
|
||||
const controller = createSubtitleProcessingController({
|
||||
@@ -66,7 +60,19 @@ test('subtitle processing skips duplicate plain subtitle emission', async () =>
|
||||
controller.onSubtitleChange('same');
|
||||
await flushMicrotasks();
|
||||
|
||||
const plainEmits = emitted.filter((entry) => entry.tokens === null);
|
||||
assert.equal(plainEmits.length, 1);
|
||||
assert.equal(emitted.length, 1);
|
||||
assert.equal(tokenizeCalls, 1);
|
||||
});
|
||||
|
||||
test('subtitle processing falls back to plain subtitle when tokenization returns null', async () => {
|
||||
const emitted: SubtitleData[] = [];
|
||||
const controller = createSubtitleProcessingController({
|
||||
tokenizeSubtitle: async () => null,
|
||||
emitSubtitle: (payload) => emitted.push(payload),
|
||||
});
|
||||
|
||||
controller.onSubtitleChange('fallback');
|
||||
await flushMicrotasks();
|
||||
|
||||
assert.deepEqual(emitted, [{ text: 'fallback', tokens: null }]);
|
||||
});
|
||||
|
||||
@@ -15,19 +15,11 @@ export function createSubtitleProcessingController(
|
||||
deps: SubtitleProcessingControllerDeps,
|
||||
): SubtitleProcessingController {
|
||||
let latestText = '';
|
||||
let lastPlainText = '';
|
||||
let lastEmittedText = '';
|
||||
let processing = false;
|
||||
let staleDropCount = 0;
|
||||
const now = deps.now ?? (() => Date.now());
|
||||
|
||||
const emitPlainSubtitle = (text: string): void => {
|
||||
if (text === lastPlainText) {
|
||||
return;
|
||||
}
|
||||
lastPlainText = text;
|
||||
deps.emitSubtitle({ text, tokens: null });
|
||||
};
|
||||
|
||||
const processLatest = (): void => {
|
||||
if (processing) {
|
||||
return;
|
||||
@@ -38,14 +30,20 @@ export function createSubtitleProcessingController(
|
||||
void (async () => {
|
||||
while (true) {
|
||||
const text = latestText;
|
||||
const startedAtMs = now();
|
||||
|
||||
if (!text.trim()) {
|
||||
deps.emitSubtitle({ text, tokens: null });
|
||||
lastEmittedText = text;
|
||||
break;
|
||||
}
|
||||
|
||||
const startedAtMs = now();
|
||||
let tokenized: SubtitleData | null = null;
|
||||
let output: SubtitleData = { text, tokens: null };
|
||||
try {
|
||||
tokenized = await deps.tokenizeSubtitle(text);
|
||||
const tokenized = await deps.tokenizeSubtitle(text);
|
||||
if (tokenized) {
|
||||
output = tokenized;
|
||||
}
|
||||
} catch (error) {
|
||||
deps.logDebug?.(`Subtitle tokenization failed: ${(error as Error).message}`);
|
||||
}
|
||||
@@ -58,12 +56,11 @@ export function createSubtitleProcessingController(
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokenized) {
|
||||
deps.emitSubtitle(tokenized);
|
||||
deps.logDebug?.(
|
||||
`Subtitle tokenization delivered; elapsed=${now() - startedAtMs}ms, staleDrops=${staleDropCount}`,
|
||||
);
|
||||
}
|
||||
deps.emitSubtitle(output);
|
||||
lastEmittedText = text;
|
||||
deps.logDebug?.(
|
||||
`Subtitle tokenization delivered; elapsed=${now() - startedAtMs}ms, staleDrops=${staleDropCount}`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
})()
|
||||
@@ -72,7 +69,7 @@ export function createSubtitleProcessingController(
|
||||
})
|
||||
.finally(() => {
|
||||
processing = false;
|
||||
if (latestText !== lastPlainText) {
|
||||
if (latestText !== lastEmittedText) {
|
||||
processLatest();
|
||||
}
|
||||
});
|
||||
@@ -83,13 +80,7 @@ export function createSubtitleProcessingController(
|
||||
if (text === latestText) {
|
||||
return;
|
||||
}
|
||||
const plainStartedAtMs = now();
|
||||
latestText = text;
|
||||
emitPlainSubtitle(text);
|
||||
deps.logDebug?.(`Subtitle plain emit completed in ${now() - plainStartedAtMs}ms`);
|
||||
if (!text.trim()) {
|
||||
return;
|
||||
}
|
||||
processLatest();
|
||||
},
|
||||
};
|
||||
|
||||
89
src/core/services/subtitle-ws.test.ts
Normal file
89
src/core/services/subtitle-ws.test.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { serializeSubtitleMarkup, serializeSubtitleWebsocketMessage } from './subtitle-ws';
|
||||
import { PartOfSpeech, type SubtitleData } from '../../types';
|
||||
|
||||
const frequencyOptions = {
|
||||
enabled: true,
|
||||
topX: 1000,
|
||||
mode: 'banded' as const,
|
||||
};
|
||||
|
||||
test('serializeSubtitleMarkup escapes plain text and preserves line breaks', () => {
|
||||
const payload: SubtitleData = {
|
||||
text: 'a < b\nx & y',
|
||||
tokens: null,
|
||||
};
|
||||
|
||||
assert.equal(serializeSubtitleMarkup(payload, frequencyOptions), 'a < b<br>x & y');
|
||||
});
|
||||
|
||||
test('serializeSubtitleMarkup includes known, n+1, jlpt, and frequency classes', () => {
|
||||
const payload: SubtitleData = {
|
||||
text: 'ignored',
|
||||
tokens: [
|
||||
{
|
||||
surface: '既知',
|
||||
reading: '',
|
||||
headword: '',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
{
|
||||
surface: '新語',
|
||||
reading: '',
|
||||
headword: '',
|
||||
startPos: 2,
|
||||
endPos: 4,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: true,
|
||||
},
|
||||
{
|
||||
surface: '級',
|
||||
reading: '',
|
||||
headword: '',
|
||||
startPos: 4,
|
||||
endPos: 5,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
jlptLevel: 'N3',
|
||||
},
|
||||
{
|
||||
surface: '頻度',
|
||||
reading: '',
|
||||
headword: '',
|
||||
startPos: 5,
|
||||
endPos: 7,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
frequencyRank: 10,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const markup = serializeSubtitleMarkup(payload, frequencyOptions);
|
||||
assert.match(markup, /word word-known/);
|
||||
assert.match(markup, /word word-n-plus-one/);
|
||||
assert.match(markup, /word word-jlpt-n3/);
|
||||
assert.match(markup, /word word-frequency-band-1/);
|
||||
});
|
||||
|
||||
test('serializeSubtitleWebsocketMessage emits sentence payload', () => {
|
||||
const payload: SubtitleData = {
|
||||
text: '字幕',
|
||||
tokens: null,
|
||||
};
|
||||
|
||||
const raw = serializeSubtitleWebsocketMessage(payload, frequencyOptions);
|
||||
assert.deepEqual(JSON.parse(raw), { sentence: '字幕' });
|
||||
});
|
||||
@@ -3,6 +3,7 @@ import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import WebSocket from 'ws';
|
||||
import { createLogger } from '../../logger';
|
||||
import type { MergedToken, SubtitleData } from '../../types';
|
||||
|
||||
const logger = createLogger('main:subtitle-ws');
|
||||
|
||||
@@ -11,18 +12,117 @@ export function hasMpvWebsocketPlugin(): boolean {
|
||||
return fs.existsSync(mpvWebsocketPath);
|
||||
}
|
||||
|
||||
export type SubtitleWebsocketFrequencyOptions = {
|
||||
enabled: boolean;
|
||||
topX: number;
|
||||
mode: 'single' | 'banded';
|
||||
};
|
||||
|
||||
function escapeHtml(text: string): string {
|
||||
return text
|
||||
.replaceAll('&', '&')
|
||||
.replaceAll('<', '<')
|
||||
.replaceAll('>', '>')
|
||||
.replaceAll('"', '"')
|
||||
.replaceAll("'", ''');
|
||||
}
|
||||
|
||||
function computeFrequencyClass(
|
||||
token: MergedToken,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): string | null {
|
||||
if (!options.enabled) return null;
|
||||
if (typeof token.frequencyRank !== 'number' || !Number.isFinite(token.frequencyRank)) return null;
|
||||
|
||||
const rank = Math.max(1, Math.floor(token.frequencyRank));
|
||||
const topX = Math.max(1, Math.floor(options.topX));
|
||||
if (rank > topX) return null;
|
||||
|
||||
if (options.mode === 'banded') {
|
||||
const band = Math.min(5, Math.max(1, Math.ceil((rank / topX) * 5)));
|
||||
return `word-frequency-band-${band}`;
|
||||
}
|
||||
|
||||
return 'word-frequency-single';
|
||||
}
|
||||
|
||||
function computeWordClass(token: MergedToken, options: SubtitleWebsocketFrequencyOptions): string {
|
||||
const classes = ['word'];
|
||||
|
||||
if (token.isNPlusOneTarget) {
|
||||
classes.push('word-n-plus-one');
|
||||
} else if (token.isKnown) {
|
||||
classes.push('word-known');
|
||||
}
|
||||
|
||||
if (token.jlptLevel) {
|
||||
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
|
||||
}
|
||||
|
||||
if (!token.isKnown && !token.isNPlusOneTarget) {
|
||||
const frequencyClass = computeFrequencyClass(token, options);
|
||||
if (frequencyClass) {
|
||||
classes.push(frequencyClass);
|
||||
}
|
||||
}
|
||||
|
||||
return classes.join(' ');
|
||||
}
|
||||
|
||||
export function serializeSubtitleMarkup(
|
||||
payload: SubtitleData,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): string {
|
||||
if (!payload.tokens || payload.tokens.length === 0) {
|
||||
return escapeHtml(payload.text).replaceAll('\n', '<br>');
|
||||
}
|
||||
|
||||
const chunks: string[] = [];
|
||||
for (const token of payload.tokens) {
|
||||
const klass = computeWordClass(token, options);
|
||||
const parts = token.surface.split('\n');
|
||||
for (let index = 0; index < parts.length; index += 1) {
|
||||
if (parts[index]) {
|
||||
chunks.push(`<span class="${klass}">${escapeHtml(parts[index])}</span>`);
|
||||
}
|
||||
if (index < parts.length - 1) {
|
||||
chunks.push('<br>');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return chunks.join('');
|
||||
}
|
||||
|
||||
export function serializeSubtitleWebsocketMessage(
|
||||
payload: SubtitleData,
|
||||
options: SubtitleWebsocketFrequencyOptions,
|
||||
): string {
|
||||
return JSON.stringify({ sentence: serializeSubtitleMarkup(payload, options) });
|
||||
}
|
||||
|
||||
export class SubtitleWebSocket {
|
||||
private server: WebSocket.Server | null = null;
|
||||
private latestMessage = '';
|
||||
|
||||
public isRunning(): boolean {
|
||||
return this.server !== null;
|
||||
}
|
||||
|
||||
public hasClients(): boolean {
|
||||
return (this.server?.clients.size ?? 0) > 0;
|
||||
}
|
||||
|
||||
public start(port: number, getCurrentSubtitleText: () => string): void {
|
||||
this.server = new WebSocket.Server({ port, host: '127.0.0.1' });
|
||||
|
||||
this.server.on('connection', (ws: WebSocket) => {
|
||||
logger.info('WebSocket client connected');
|
||||
if (this.latestMessage) {
|
||||
ws.send(this.latestMessage);
|
||||
return;
|
||||
}
|
||||
|
||||
const currentText = getCurrentSubtitleText();
|
||||
if (currentText) {
|
||||
ws.send(JSON.stringify({ sentence: currentText }));
|
||||
@@ -36,9 +136,10 @@ export class SubtitleWebSocket {
|
||||
logger.info(`Subtitle WebSocket server running on ws://127.0.0.1:${port}`);
|
||||
}
|
||||
|
||||
public broadcast(text: string): void {
|
||||
public broadcast(payload: SubtitleData, options: SubtitleWebsocketFrequencyOptions): void {
|
||||
if (!this.server) return;
|
||||
const message = JSON.stringify({ sentence: text });
|
||||
const message = serializeSubtitleWebsocketMessage(payload, options);
|
||||
this.latestMessage = message;
|
||||
for (const client of this.server.clients) {
|
||||
if (client.readyState === WebSocket.OPEN) {
|
||||
client.send(message);
|
||||
@@ -51,5 +152,6 @@ export class SubtitleWebSocket {
|
||||
this.server.close();
|
||||
this.server = null;
|
||||
}
|
||||
this.latestMessage = '';
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user