fix(subtitle-ws): send tokenized payloads to texthooker

2026-05-01 16:19:24 -07:00 · 2026-02-19 17:21:26 -08:00
parent d5d71816ac
commit 7795cc3d69
5 changed files with 376 additions and 179 deletions
--- a/src/core/services/subtitle-processing-controller.test.ts
+++ b/src/core/services/subtitle-processing-controller.test.ts
@@ -7,7 +7,7 @@ function flushMicrotasks(): Promise<void> {
  return new Promise((resolve) => setTimeout(resolve, 0));
 }

-test('subtitle processing emits plain subtitle immediately before tokenized payload', async () => {
+test('subtitle processing emits tokenized payload when tokenization succeeds', async () => {
  const emitted: SubtitleData[] = [];
  const controller = createSubtitleProcessingController({
    tokenizeSubtitle: async (text) => ({ text, tokens: [] }),
@@ -15,13 +15,11 @@ test('subtitle processing emits plain subtitle immediately before tokenized payl
  });

  controller.onSubtitleChange('字幕');
-  assert.deepEqual(emitted[0], { text: '字幕', tokens: null });
-
  await flushMicrotasks();
-  assert.deepEqual(emitted[1], { text: '字幕', tokens: [] });
+  assert.deepEqual(emitted, [{ text: '字幕', tokens: [] }]);
 });

-test('subtitle processing drops stale tokenization and delivers latest subtitle only', async () => {
+test('subtitle processing drops stale tokenization and delivers latest subtitle only once', async () => {
  const emitted: SubtitleData[] = [];
  let firstResolve: ((value: SubtitleData | null) => void) | undefined;
  const controller = createSubtitleProcessingController({
@@ -43,14 +41,10 @@ test('subtitle processing drops stale tokenization and delivers latest subtitle
  await flushMicrotasks();
  await flushMicrotasks();

-  assert.deepEqual(emitted, [
-    { text: 'first', tokens: null },
-    { text: 'second', tokens: null },
-    { text: 'second', tokens: [] },
-  ]);
+  assert.deepEqual(emitted, [{ text: 'second', tokens: [] }]);
 });

-test('subtitle processing skips duplicate plain subtitle emission', async () => {
+test('subtitle processing skips duplicate subtitle emission', async () => {
  const emitted: SubtitleData[] = [];
  let tokenizeCalls = 0;
  const controller = createSubtitleProcessingController({
@@ -66,7 +60,19 @@ test('subtitle processing skips duplicate plain subtitle emission', async () =>
  controller.onSubtitleChange('same');
  await flushMicrotasks();

-  const plainEmits = emitted.filter((entry) => entry.tokens === null);
-  assert.equal(plainEmits.length, 1);
+  assert.equal(emitted.length, 1);
  assert.equal(tokenizeCalls, 1);
 });
+
+test('subtitle processing falls back to plain subtitle when tokenization returns null', async () => {
+  const emitted: SubtitleData[] = [];
+  const controller = createSubtitleProcessingController({
+    tokenizeSubtitle: async () => null,
+    emitSubtitle: (payload) => emitted.push(payload),
+  });
+
+  controller.onSubtitleChange('fallback');
+  await flushMicrotasks();
+
+  assert.deepEqual(emitted, [{ text: 'fallback', tokens: null }]);
+});
--- a/src/core/services/subtitle-processing-controller.ts
+++ b/src/core/services/subtitle-processing-controller.ts
@@ -15,19 +15,11 @@ export function createSubtitleProcessingController(
  deps: SubtitleProcessingControllerDeps,
 ): SubtitleProcessingController {
  let latestText = '';
-  let lastPlainText = '';
+  let lastEmittedText = '';
  let processing = false;
  let staleDropCount = 0;
  const now = deps.now ?? (() => Date.now());

-  const emitPlainSubtitle = (text: string): void => {
-    if (text === lastPlainText) {
-      return;
-    }
-    lastPlainText = text;
-    deps.emitSubtitle({ text, tokens: null });
-  };
-
  const processLatest = (): void => {
    if (processing) {
      return;
@@ -38,14 +30,20 @@ export function createSubtitleProcessingController(
    void (async () => {
      while (true) {
        const text = latestText;
+        const startedAtMs = now();
+
        if (!text.trim()) {
+          deps.emitSubtitle({ text, tokens: null });
+          lastEmittedText = text;
          break;
        }

-        const startedAtMs = now();
-        let tokenized: SubtitleData | null = null;
+        let output: SubtitleData = { text, tokens: null };
        try {
-          tokenized = await deps.tokenizeSubtitle(text);
+          const tokenized = await deps.tokenizeSubtitle(text);
+          if (tokenized) {
+            output = tokenized;
+          }
        } catch (error) {
          deps.logDebug?.(`Subtitle tokenization failed: ${(error as Error).message}`);
        }
@@ -58,12 +56,11 @@ export function createSubtitleProcessingController(
          continue;
        }

-        if (tokenized) {
-          deps.emitSubtitle(tokenized);
-          deps.logDebug?.(
-            `Subtitle tokenization delivered; elapsed=${now() - startedAtMs}ms, staleDrops=${staleDropCount}`,
-          );
-        }
+        deps.emitSubtitle(output);
+        lastEmittedText = text;
+        deps.logDebug?.(
+          `Subtitle tokenization delivered; elapsed=${now() - startedAtMs}ms, staleDrops=${staleDropCount}`,
+        );
        break;
      }
    })()
@@ -72,7 +69,7 @@ export function createSubtitleProcessingController(
      })
      .finally(() => {
        processing = false;
-        if (latestText !== lastPlainText) {
+        if (latestText !== lastEmittedText) {
          processLatest();
        }
      });
@@ -83,13 +80,7 @@ export function createSubtitleProcessingController(
      if (text === latestText) {
        return;
      }
-      const plainStartedAtMs = now();
      latestText = text;
-      emitPlainSubtitle(text);
-      deps.logDebug?.(`Subtitle plain emit completed in ${now() - plainStartedAtMs}ms`);
-      if (!text.trim()) {
-        return;
-      }
      processLatest();
    },
  };
--- a/src/core/services/subtitle-ws.test.ts
+++ b/src/core/services/subtitle-ws.test.ts
@@ -0,0 +1,89 @@
+import test from 'node:test';
+import assert from 'node:assert/strict';
+import { serializeSubtitleMarkup, serializeSubtitleWebsocketMessage } from './subtitle-ws';
+import { PartOfSpeech, type SubtitleData } from '../../types';
+
+const frequencyOptions = {
+  enabled: true,
+  topX: 1000,
+  mode: 'banded' as const,
+};
+
+test('serializeSubtitleMarkup escapes plain text and preserves line breaks', () => {
+  const payload: SubtitleData = {
+    text: 'a < b\nx & y',
+    tokens: null,
+  };
+
+  assert.equal(serializeSubtitleMarkup(payload, frequencyOptions), 'a &lt; b<br>x &amp; y');
+});
+
+test('serializeSubtitleMarkup includes known, n+1, jlpt, and frequency classes', () => {
+  const payload: SubtitleData = {
+    text: 'ignored',
+    tokens: [
+      {
+        surface: '既知',
+        reading: '',
+        headword: '',
+        startPos: 0,
+        endPos: 2,
+        partOfSpeech: PartOfSpeech.other,
+        isMerged: false,
+        isKnown: true,
+        isNPlusOneTarget: false,
+      },
+      {
+        surface: '新語',
+        reading: '',
+        headword: '',
+        startPos: 2,
+        endPos: 4,
+        partOfSpeech: PartOfSpeech.other,
+        isMerged: false,
+        isKnown: false,
+        isNPlusOneTarget: true,
+      },
+      {
+        surface: '級',
+        reading: '',
+        headword: '',
+        startPos: 4,
+        endPos: 5,
+        partOfSpeech: PartOfSpeech.other,
+        isMerged: false,
+        isKnown: false,
+        isNPlusOneTarget: false,
+        jlptLevel: 'N3',
+      },
+      {
+        surface: '頻度',
+        reading: '',
+        headword: '',
+        startPos: 5,
+        endPos: 7,
+        partOfSpeech: PartOfSpeech.other,
+        isMerged: false,
+        isKnown: false,
+        isNPlusOneTarget: false,
+        frequencyRank: 10,
+      },
+    ],
+  };
+
+  const markup = serializeSubtitleMarkup(payload, frequencyOptions);
+  assert.match(markup, /word word-known/);
+  assert.match(markup, /word word-n-plus-one/);
+  assert.match(markup, /word word-jlpt-n3/);
+  assert.match(markup, /word word-frequency-band-1/);
+});
+
+test('serializeSubtitleWebsocketMessage emits sentence payload', () => {
+  const payload: SubtitleData = {
+    text: '字幕',
+    tokens: null,
+  };
+
+  const raw = serializeSubtitleWebsocketMessage(payload, frequencyOptions);
+  assert.deepEqual(JSON.parse(raw), { sentence: '字幕' });
+});
--- a/src/core/services/subtitle-ws.ts
+++ b/src/core/services/subtitle-ws.ts
@@ -3,6 +3,7 @@ import * as os from 'os';
 import * as path from 'path';
 import WebSocket from 'ws';
 import { createLogger } from '../../logger';
+import type { MergedToken, SubtitleData } from '../../types';

 const logger = createLogger('main:subtitle-ws');

@@ -11,18 +12,117 @@ export function hasMpvWebsocketPlugin(): boolean {
  return fs.existsSync(mpvWebsocketPath);
 }

+export type SubtitleWebsocketFrequencyOptions = {
+  enabled: boolean;
+  topX: number;
+  mode: 'single' | 'banded';
+};
+
+function escapeHtml(text: string): string {
+  return text
+    .replaceAll('&', '&amp;')
+    .replaceAll('<', '&lt;')
+    .replaceAll('>', '&gt;')
+    .replaceAll('"', '&quot;')
+    .replaceAll("'", '&#39;');
+}
+
+function computeFrequencyClass(
+  token: MergedToken,
+  options: SubtitleWebsocketFrequencyOptions,
+): string | null {
+  if (!options.enabled) return null;
+  if (typeof token.frequencyRank !== 'number' || !Number.isFinite(token.frequencyRank)) return null;
+
+  const rank = Math.max(1, Math.floor(token.frequencyRank));
+  const topX = Math.max(1, Math.floor(options.topX));
+  if (rank > topX) return null;
+
+  if (options.mode === 'banded') {
+    const band = Math.min(5, Math.max(1, Math.ceil((rank / topX) * 5)));
+    return `word-frequency-band-${band}`;
+  }
+
+  return 'word-frequency-single';
+}
+
+function computeWordClass(token: MergedToken, options: SubtitleWebsocketFrequencyOptions): string {
+  const classes = ['word'];
+
+  if (token.isNPlusOneTarget) {
+    classes.push('word-n-plus-one');
+  } else if (token.isKnown) {
+    classes.push('word-known');
+  }
+
+  if (token.jlptLevel) {
+    classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
+  }
+
+  if (!token.isKnown && !token.isNPlusOneTarget) {
+    const frequencyClass = computeFrequencyClass(token, options);
+    if (frequencyClass) {
+      classes.push(frequencyClass);
+    }
+  }
+
+  return classes.join(' ');
+}
+
+export function serializeSubtitleMarkup(
+  payload: SubtitleData,
+  options: SubtitleWebsocketFrequencyOptions,
+): string {
+  if (!payload.tokens || payload.tokens.length === 0) {
+    return escapeHtml(payload.text).replaceAll('\n', '<br>');
+  }
+
+  const chunks: string[] = [];
+  for (const token of payload.tokens) {
+    const klass = computeWordClass(token, options);
+    const parts = token.surface.split('\n');
+    for (let index = 0; index < parts.length; index += 1) {
+      if (parts[index]) {
+        chunks.push(`<span class="${klass}">${escapeHtml(parts[index])}</span>`);
+      }
+      if (index < parts.length - 1) {
+        chunks.push('<br>');
+      }
+    }
+  }
+
+  return chunks.join('');
+}
+
+export function serializeSubtitleWebsocketMessage(
+  payload: SubtitleData,
+  options: SubtitleWebsocketFrequencyOptions,
+): string {
+  return JSON.stringify({ sentence: serializeSubtitleMarkup(payload, options) });
+}
+
 export class SubtitleWebSocket {
  private server: WebSocket.Server | null = null;
+  private latestMessage = '';

  public isRunning(): boolean {
    return this.server !== null;
  }

+  public hasClients(): boolean {
+    return (this.server?.clients.size ?? 0) > 0;
+  }
+
  public start(port: number, getCurrentSubtitleText: () => string): void {
    this.server = new WebSocket.Server({ port, host: '127.0.0.1' });

    this.server.on('connection', (ws: WebSocket) => {
      logger.info('WebSocket client connected');
+      if (this.latestMessage) {
+        ws.send(this.latestMessage);
+        return;
+      }
+
      const currentText = getCurrentSubtitleText();
      if (currentText) {
        ws.send(JSON.stringify({ sentence: currentText }));
@@ -36,9 +136,10 @@ export class SubtitleWebSocket {
    logger.info(`Subtitle WebSocket server running on ws://127.0.0.1:${port}`);
  }

-  public broadcast(text: string): void {
+  public broadcast(payload: SubtitleData, options: SubtitleWebsocketFrequencyOptions): void {
    if (!this.server) return;
-    const message = JSON.stringify({ sentence: text });
+    const message = serializeSubtitleWebsocketMessage(payload, options);
+    this.latestMessage = message;
    for (const client of this.server.clients) {
      if (client.readyState === WebSocket.OPEN) {
        client.send(message);
@@ -51,5 +152,6 @@ export class SubtitleWebSocket {
      this.server.close();
      this.server = null;
    }
+    this.latestMessage = '';
  }
 }