refactor: split startup lifecycle and Anki service architecture

2026-05-01 04:19:26 -07:00 · 2026-02-14 22:31:21 -08:00
parent 41f7d754cd
commit 162223943d
30 changed files with 1603 additions and 312 deletions
--- a/src/core/services/anki-jimaku-service.test.ts
+++ b/src/core/services/anki-jimaku-service.test.ts
@@ -45,6 +45,8 @@ function createHarness(): RuntimeHarness {
    setAnkiIntegration: (integration) => {
      state.ankiIntegration = integration;
    },
+    getKnownWordCacheStatePath: () =>
+      "/tmp/subminer-known-words-cache.json",
    showDesktopNotification: () => {},
    createFieldGroupingCallback: () => async () => ({
      keepNoteId: 1,
--- a/src/core/services/anki-jimaku-service.ts
+++ b/src/core/services/anki-jimaku-service.ts
@@ -33,6 +33,7 @@ export interface AnkiJimakuIpcRuntimeOptions {
  getMpvClient: () => MpvClientLike | null;
  getAnkiIntegration: () => AnkiIntegration | null;
  setAnkiIntegration: (integration: AnkiIntegration | null) => void;
+  getKnownWordCacheStatePath: () => string;
  showDesktopNotification: (title: string, options: { body?: string; icon?: string }) => void;
  createFieldGroupingCallback: () => (
    data: KikuFieldGroupingRequestData,
@@ -87,6 +88,7 @@ export function registerAnkiJimakuIpcRuntimeService(
          },
          options.showDesktopNotification,
          options.createFieldGroupingCallback(),
+          options.getKnownWordCacheStatePath(),
        );
        integration.start();
        options.setAnkiIntegration(integration);
--- a/src/core/services/overlay-runtime-init-service.ts
+++ b/src/core/services/overlay-runtime-init-service.ts
@@ -35,6 +35,7 @@ export function initializeOverlayRuntimeService(options: {
  createFieldGroupingCallback: () => (
    data: KikuFieldGroupingRequestData,
  ) => Promise<KikuFieldGroupingChoice>;
+  getKnownWordCacheStatePath: () => string;
 }): {
  invisibleOverlayVisible: boolean;
 } {
@@ -98,6 +99,7 @@ export function initializeOverlayRuntimeService(options: {
      },
      options.showDesktopNotification,
      options.createFieldGroupingCallback(),
+      options.getKnownWordCacheStatePath(),
    );
    integration.start();
    options.setAnkiIntegration(integration);
--- a/src/core/services/tokenizer-service.test.ts
+++ b/src/core/services/tokenizer-service.test.ts
@@ -14,6 +14,8 @@ function makeDeps(
    setYomitanParserReadyPromise: () => {},
    getYomitanParserInitPromise: () => null,
    setYomitanParserInitPromise: () => {},
+    isKnownWord: () => false,
+    getKnownWordMatchMode: () => "headword",
    tokenizeWithMecab: async () => null,
    ...overrides,
  };
@@ -32,7 +34,7 @@ test("tokenizeSubtitleService normalizes newlines before mecab fallback", async
      tokenizeWithMecab: async (text) => {
        tokenizeInput = text;
        return [
-          {
+        {
            surface: "猫ですね",
            reading: "ネコデスネ",
            headword: "猫ですね",
@@ -40,6 +42,7 @@ test("tokenizeSubtitleService normalizes newlines before mecab fallback", async
            endPos: 4,
            partOfSpeech: PartOfSpeech.other,
            isMerged: true,
+            isKnown: false,
          },
        ];
      },
@@ -64,6 +67,7 @@ test("tokenizeSubtitleService falls back to mecab tokens when available", async
          endPos: 1,
          partOfSpeech: PartOfSpeech.noun,
          isMerged: false,
+          isKnown: false,
        },
      ],
    }),
@@ -126,4 +130,78 @@ test("tokenizeSubtitleService uses Yomitan parser result when available", async
  assert.equal(result.tokens?.length, 1);
  assert.equal(result.tokens?.[0]?.surface, "猫です");
  assert.equal(result.tokens?.[0]?.reading, "ねこです");
+  assert.equal(result.tokens?.[0]?.isKnown, false);
+});
+
+test("tokenizeSubtitleService marks tokens as known using callback", async () => {
+  const result = await tokenizeSubtitleService(
+    "猫です",
+    makeDeps({
+      isKnownWord: (text) => text === "猫",
+      tokenizeWithMecab: async () => [
+        {
+          surface: "猫",
+          reading: "ネコ",
+          headword: "猫",
+          startPos: 0,
+          endPos: 1,
+          partOfSpeech: PartOfSpeech.noun,
+          isMerged: false,
+          isKnown: false,
+        },
+      ],
+    }),
+  );
+
+  assert.equal(result.text, "猫です");
+  assert.equal(result.tokens?.[0]?.isKnown, true);
+});
+
+test("tokenizeSubtitleService checks known words by headword, not surface", async () => {
+  const result = await tokenizeSubtitleService(
+    "猫です",
+    makeDeps({
+      isKnownWord: (text) => text === "猫です",
+      tokenizeWithMecab: async () => [
+        {
+          surface: "猫",
+          reading: "ネコ",
+          headword: "猫です",
+          startPos: 0,
+          endPos: 1,
+          partOfSpeech: PartOfSpeech.noun,
+          isMerged: false,
+          isKnown: false,
+        },
+      ],
+    }),
+  );
+
+  assert.equal(result.text, "猫です");
+  assert.equal(result.tokens?.[0]?.isKnown, true);
+});
+
+test("tokenizeSubtitleService checks known words by surface when configured", async () => {
+  const result = await tokenizeSubtitleService(
+    "猫です",
+    makeDeps({
+      getKnownWordMatchMode: () => "surface",
+      isKnownWord: (text) => text === "猫",
+      tokenizeWithMecab: async () => [
+        {
+          surface: "猫",
+          reading: "ネコ",
+          headword: "猫です",
+          startPos: 0,
+          endPos: 1,
+          partOfSpeech: PartOfSpeech.noun,
+          isMerged: false,
+          isKnown: false,
+        },
+      ],
+    }),
+  );
+
+  assert.equal(result.text, "猫です");
+  assert.equal(result.tokens?.[0]?.isKnown, true);
 });
--- a/src/core/services/tokenizer-service.ts
+++ b/src/core/services/tokenizer-service.ts
@@ -1,6 +1,12 @@
 import { BrowserWindow, Extension, session } from "electron";
 import { mergeTokens } from "../../token-merger";
-import { MergedToken, PartOfSpeech, SubtitleData, Token } from "../../types";
+import {
+  MergedToken,
+  NPlusOneMatchMode,
+  PartOfSpeech,
+  SubtitleData,
+  Token,
+} from "../../types";

 interface YomitanParseHeadword {
  term?: unknown;
@@ -26,6 +32,8 @@ export interface TokenizerServiceDeps {
  setYomitanParserReadyPromise: (promise: Promise<void> | null) => void;
  getYomitanParserInitPromise: () => Promise<boolean> | null;
  setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
+  isKnownWord: (text: string) => boolean;
+  getKnownWordMatchMode: () => NPlusOneMatchMode;
  tokenizeWithMecab: (text: string) => Promise<MergedToken[] | null>;
 }

@@ -41,6 +49,8 @@ export interface TokenizerDepsRuntimeOptions {
  setYomitanParserReadyPromise: (promise: Promise<void> | null) => void;
  getYomitanParserInitPromise: () => Promise<boolean> | null;
  setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
+  isKnownWord: (text: string) => boolean;
+  getKnownWordMatchMode: () => NPlusOneMatchMode;
  getMecabTokenizer: () => MecabTokenizerLike | null;
 }

@@ -55,6 +65,8 @@ export function createTokenizerDepsRuntimeService(
    setYomitanParserReadyPromise: options.setYomitanParserReadyPromise,
    getYomitanParserInitPromise: options.getYomitanParserInitPromise,
    setYomitanParserInitPromise: options.setYomitanParserInitPromise,
+    isKnownWord: options.isKnownWord,
+    getKnownWordMatchMode: options.getKnownWordMatchMode,
    tokenizeWithMecab: async (text) => {
      const mecabTokenizer = options.getMecabTokenizer();
      if (!mecabTokenizer) {
@@ -64,11 +76,23 @@ export function createTokenizerDepsRuntimeService(
      if (!rawTokens || rawTokens.length === 0) {
        return null;
      }
-      return mergeTokens(rawTokens);
+      return mergeTokens(
+        rawTokens,
+        options.isKnownWord,
+        options.getKnownWordMatchMode(),
+      );
    },
  };
 }

+function resolveKnownWordText(
+  surface: string,
+  headword: string,
+  matchMode: NPlusOneMatchMode,
+): string {
+  return matchMode === "surface" ? surface : headword;
+}
+
 function extractYomitanHeadword(segment: YomitanParseSegment): string {
  const headwords = segment.headwords;
  if (!Array.isArray(headwords) || headwords.length === 0) {
@@ -86,6 +110,8 @@ function extractYomitanHeadword(segment: YomitanParseSegment): string {

 function mapYomitanParseResultsToMergedTokens(
  parseResults: unknown,
+  isKnownWord: (text: string) => boolean,
+  knownWordMatchMode: NPlusOneMatchMode,
 ): MergedToken[] | null {
  if (!Array.isArray(parseResults) || parseResults.length === 0) {
    return null;
@@ -161,6 +187,14 @@ function mapYomitanParseResultsToMergedTokens(
      endPos: end,
      partOfSpeech: PartOfSpeech.other,
      isMerged: true,
+      isKnown: (() => {
+        const matchText = resolveKnownWordText(
+          surface,
+          headword,
+          knownWordMatchMode,
+        );
+        return matchText ? isKnownWord(matchText) : false;
+      })(),
    });
  }

@@ -302,7 +336,11 @@ async function parseWithYomitanInternalParser(
      script,
      true,
    );
-    return mapYomitanParseResultsToMergedTokens(parseResults);
+    return mapYomitanParseResultsToMergedTokens(
+      parseResults,
+      deps.isKnownWord,
+      deps.getKnownWordMatchMode(),
+    );
  } catch (err) {
    console.error("Yomitan parser request failed:", (err as Error).message);
    return null;