From 8aa2a45c7c6a0b94d90b4004e829f03210497945 Mon Sep 17 00:00:00 2001 From: sudacode Date: Fri, 27 Feb 2026 21:25:26 -0800 Subject: [PATCH] feat(anki): add proxy transport and tokenizer annotation controls --- ...d-bind-visible-overlay-to-mpv-subtitles.md | 44 +++ ...ransport-for-push-based-auto-enrichment.md | 44 +++ ...w-full-warning-details-in-native-dialog.md | 35 ++ config.example.jsonc | 13 +- docs/README.md | 2 +- docs/anki-integration.md | 42 ++- docs/configuration.md | 29 ++ docs/mining-workflow.md | 10 +- docs/public/config.example.jsonc | 13 +- docs/troubleshooting.md | 14 +- .../anki-connect-proxy.test.ts | 133 ++++++++ src/anki-integration/anki-connect-proxy.ts | 314 ++++++++++++++++++ .../definitions/defaults-integrations.ts | 6 + .../definitions/options-integrations.ts | 24 ++ src/config/resolve/anki-connect.test.ts | 41 +++ src/config/resolve/anki-connect.ts | 66 ++++ .../services/overlay-runtime-init.test.ts | 16 +- src/core/services/tokenizer.test.ts | 166 +++++++++ src/core/services/tokenizer.ts | 129 +++++-- .../services/tokenizer/annotation-stage.ts | 22 +- .../parser-enrichment-worker-runtime.ts | 147 ++++++++ .../parser-enrichment-worker-thread.ts | 25 ++ .../tokenizer/yomitan-parser-runtime.test.ts | 83 +++++ .../tokenizer/yomitan-parser-runtime.ts | 88 +++++ .../subtitle-tokenization-main-deps.test.ts | 2 + .../subtitle-tokenization-main-deps.ts | 5 + 26 files changed, 1453 insertions(+), 60 deletions(-) create mode 100644 backlog/tasks/task-70 - Overlay-runtime-refactor-remove-invisible-mode-and-bind-visible-overlay-to-mpv-subtitles.md create mode 100644 backlog/tasks/task-71 - Anki-integration-add-local-AnkiConnect-proxy-transport-for-push-based-auto-enrichment.md create mode 100644 backlog/tasks/task-72 - macOS-config-validation-UX-show-full-warning-details-in-native-dialog.md create mode 100644 src/anki-integration/anki-connect-proxy.test.ts create mode 100644 src/anki-integration/anki-connect-proxy.ts create mode 100644 src/core/services/tokenizer/parser-enrichment-worker-runtime.ts create mode 100644 src/core/services/tokenizer/parser-enrichment-worker-thread.ts create mode 100644 src/core/services/tokenizer/yomitan-parser-runtime.test.ts diff --git a/backlog/tasks/task-70 - Overlay-runtime-refactor-remove-invisible-mode-and-bind-visible-overlay-to-mpv-subtitles.md b/backlog/tasks/task-70 - Overlay-runtime-refactor-remove-invisible-mode-and-bind-visible-overlay-to-mpv-subtitles.md new file mode 100644 index 0000000..0013712 --- /dev/null +++ b/backlog/tasks/task-70 - Overlay-runtime-refactor-remove-invisible-mode-and-bind-visible-overlay-to-mpv-subtitles.md @@ -0,0 +1,44 @@ +--- +id: TASK-70 +title: >- + Overlay runtime refactor: remove invisible mode and bind visible overlay to + mpv subtitles +status: Done +assignee: [] +created_date: '2026-02-28 02:38' +labels: [] +dependencies: [] +references: + - 'commit:a14c9da' + - 'commit:74554a3' + - 'commit:75442a4' + - 'commit:dde51f8' + - 'commit:9e4e588' + - src/main/overlay-runtime.ts + - src/main/runtime/overlay-mpv-sub-visibility.ts + - src/renderer/renderer.ts + - docs/plans/2026-02-26-secondary-subtitles-main-overlay.md +priority: medium +--- + +## Description + + +Scope: Branch-only commits main..HEAD on refactor-overlay (a14c9da through 9e4e588) rebuilt overlay behavior around visible overlay mode and removed legacy invisible overlay paths. + +Delivered behavior: +- Removed renderer invisible overlay layout/offset helpers and main hover-highlight runtime code paths. +- Added explicit overlay-to-mpv subtitle visibility synchronization so visible overlay state controls primary subtitle visibility consistently. +- Hardened overlay runtime/bootstrap lifecycle around modal fallback open state and bridge send path edge cases. +- Updated plugin/config/docs defaults to reflect visible-overlay-first behavior and subtitle binding controls. + +Risk/impact context: +- Large cross-layer refactor touching runtime wiring, renderer event handling, and plugin behavior. +- Regression coverage added/updated for overlay runtime, mpv protocol handling, renderer cleanup, and subtitle rendering paths. + + +## Final Summary + + +Completed and validated in branch commit set before merge. Refactor reduces dead overlay modes, centralizes subtitle visibility behavior, and documents new defaults/constraints. + diff --git a/backlog/tasks/task-71 - Anki-integration-add-local-AnkiConnect-proxy-transport-for-push-based-auto-enrichment.md b/backlog/tasks/task-71 - Anki-integration-add-local-AnkiConnect-proxy-transport-for-push-based-auto-enrichment.md new file mode 100644 index 0000000..92f580a --- /dev/null +++ b/backlog/tasks/task-71 - Anki-integration-add-local-AnkiConnect-proxy-transport-for-push-based-auto-enrichment.md @@ -0,0 +1,44 @@ +--- +id: TASK-71 +title: >- + Anki integration: add local AnkiConnect proxy transport for push-based + auto-enrichment +status: Done +assignee: [] +created_date: '2026-02-28 02:38' +labels: [] +dependencies: [] +references: + - src/anki-integration/anki-connect-proxy.ts + - src/anki-integration/anki-connect-proxy.test.ts + - src/anki-integration.ts + - src/config/resolve/anki-connect.ts + - src/core/services/tokenizer/yomitan-parser-runtime.ts + - src/core/services/tokenizer/yomitan-parser-runtime.test.ts + - docs/anki-integration.md + - config.example.jsonc +priority: medium +--- + +## Description + + +Scope: Current unmerged working-tree changes implement an optional local AnkiConnect-compatible proxy and transport switching for card enrichment. + +Delivered behavior: +- Added proxy server that forwards AnkiConnect requests and enqueues addNote/addNotes note IDs for post-create enrichment, with de-duplication and loop-configuration protection. +- Added config schema/defaults/resolution for ankiConnect.proxy (enabled, host, port, upstreamUrl) with validation warnings and fallback behavior. +- Runtime now supports transport switching (polling vs proxy) and restarts transport when runtime config patches change transport keys. +- Added Yomitan default-profile server sync helper to keep bundled parser profile aligned with configured Anki endpoint. +- Updated user docs/config examples for proxy mode setup, troubleshooting, and mining workflow behavior. + +Risk/impact context: +- New network surface on local host/port; correctness depends on safe proxy upstream configuration and robust response handling. +- Tests added for proxy queue behavior, config resolution, and parser sync routines. + + +## Final Summary + + +Completed implementation in branch working tree; ready to merge once local changes are committed and test gate passes. + diff --git a/backlog/tasks/task-72 - macOS-config-validation-UX-show-full-warning-details-in-native-dialog.md b/backlog/tasks/task-72 - macOS-config-validation-UX-show-full-warning-details-in-native-dialog.md new file mode 100644 index 0000000..4c819a7 --- /dev/null +++ b/backlog/tasks/task-72 - macOS-config-validation-UX-show-full-warning-details-in-native-dialog.md @@ -0,0 +1,35 @@ +--- +id: TASK-72 +title: 'macOS config validation UX: show full warning details in native dialog' +status: Done +assignee: [] +created_date: '2026-02-28 02:38' +labels: [] +dependencies: [] +references: + - 'commit:cc2f9ef' + - src/main/config-validation.ts + - src/main/runtime/startup-config.ts + - docs/configuration.md +priority: low +--- + +## Description + + +Scope: Commit cc2f9ef improves startup config-warning visibility on macOS by ensuring full details are surfaced in the native UI path and reflected in docs. + +Delivered behavior: +- Config validation/runtime wiring updated so macOS users can access complete warning details instead of truncated notification-only text. +- Added/updated tests around config validation and startup config warning flows. +- Updated configuration docs to clarify platform-specific warning presentation behavior. + +Risk/impact context: +- Low runtime risk; primarily user-facing diagnostics clarity improvement. + + +## Final Summary + + +Completed small follow-up fix to reduce config-debug friction on macOS. + diff --git a/config.example.jsonc b/config.example.jsonc index 2c61a92..31a3590 100644 --- a/config.example.jsonc +++ b/config.example.jsonc @@ -12,13 +12,6 @@ // ========================================== "auto_start_overlay": false, // When overlay connects to mpv, automatically show overlay and hide mpv subtitles. Values: true | false - // ========================================== - // Visible Overlay Subtitle Binding - // Control whether visible overlay toggles also toggle MPV subtitle visibility. - // When enabled, visible overlay hides MPV subtitles; when disabled, MPV subtitles are left unchanged. - // ========================================== - "bind_visible_overlay_to_mpv_sub_visibility": true, // Link visible overlay toggles to MPV primary subtitle visibility. Values: true | false - // ========================================== // Texthooker Server // Control whether browser opens automatically for texthooker. @@ -179,6 +172,12 @@ "enabled": false, // Enable AnkiConnect integration. Values: true | false "url": "http://127.0.0.1:8765", // Url setting. "pollingRate": 3000, // Polling interval in milliseconds. + "proxy": { + "enabled": false, // Enable local AnkiConnect-compatible proxy for push-based auto-enrichment. Values: true | false + "host": "127.0.0.1", // Bind host for local AnkiConnect proxy. + "port": 8766, // Bind port for local AnkiConnect proxy. + "upstreamUrl": "http://127.0.0.1:8765" // Upstream AnkiConnect URL proxied by local AnkiConnect proxy. + }, // Proxy setting. "tags": [ "SubMiner" ], // Tags to add to cards mined or updated by SubMiner. Provide an empty array to disable automatic tagging. diff --git a/docs/README.md b/docs/README.md index 1cd8d65..ee495b5 100644 --- a/docs/README.md +++ b/docs/README.md @@ -22,7 +22,7 @@ make docs-preview # Preview built site at http://localhost:4173 - [Configuration](/configuration) — Full config file reference and option details - [Keyboard Shortcuts](/shortcuts) — All global, overlay, mining, and plugin chord shortcuts in one place -- [Anki Integration](/anki-integration) — AnkiConnect setup, field mapping, media generation, field grouping +- [Anki Integration](/anki-integration) — AnkiConnect setup, proxy/polling transport, field mapping, media generation, field grouping - [Jellyfin Integration](/jellyfin-integration) — Optional Jellyfin auth, cast discovery, remote control, and playback launch - [Immersion Tracking](/immersion-tracking) — SQLite schema, retention/rollup policies, query templates, and extension points - [Performance & Tuning](/troubleshooting#performance-and-resource-impact) — Resource usage and practical low-impact profile diff --git a/docs/anki-integration.md b/docs/anki-integration.md index 51a0e8f..2ea3354 100644 --- a/docs/anki-integration.md +++ b/docs/anki-integration.md @@ -10,9 +10,14 @@ SubMiner uses the [AnkiConnect](https://ankiweb.net/shared/info/2055492159) add- AnkiConnect listens on `http://127.0.0.1:8765` by default. If you changed the port in AnkiConnect's settings, update `ankiConnect.url` in your SubMiner config. -## How Polling Works +## Auto-Enrichment Transport -SubMiner polls AnkiConnect at a regular interval (default: 3 seconds, configurable via `ankiConnect.pollingRate`) to detect new cards. When it finds a card that was added since the last poll: +SubMiner supports two auto-enrichment transport modes: + +1. `polling` (default): polls AnkiConnect at `ankiConnect.pollingRate` (default: 3s). +2. `proxy` (optional): runs a local AnkiConnect-compatible proxy and enriches cards immediately after successful `addNote` / `addNotes` responses. + +In both modes, the enrichment workflow is the same: 1. Checks if a duplicate expression already exists (for field grouping). 2. Updates the sentence field with the current subtitle. @@ -20,7 +25,32 @@ SubMiner polls AnkiConnect at a regular interval (default: 3 seconds, configurab 4. Fills the translation field from the secondary subtitle or AI. 5. Writes metadata to the miscInfo field. -Polling uses the query `"deck:" added:1` to find recently added cards. If no deck is configured, it searches all decks. +Polling mode uses the query `"deck:" added:1` to find recently added cards. If no deck is configured, it searches all decks. + +### Proxy Mode Setup (Yomitan / Texthooker) + +```jsonc +"ankiConnect": { + "url": "http://127.0.0.1:8765", // real AnkiConnect + "proxy": { + "enabled": true, + "host": "127.0.0.1", + "port": 8766, + "upstreamUrl": "http://127.0.0.1:8765" + } +} +``` + +Then point Yomitan/clients to `http://127.0.0.1:8766` instead of `8765`. + +When SubMiner loads the bundled Yomitan extension, it also attempts to update the **default Yomitan profile** (`profiles[0].options.anki.server`) to the active SubMiner endpoint: + +- proxy URL when `ankiConnect.proxy.enabled` is `true` +- direct `ankiConnect.url` when proxy mode is disabled + +To avoid clobbering custom setups, this auto-update only changes the default profile when its current server is blank or the stock Yomitan default (`http://127.0.0.1:8765`). + +For browser-based Yomitan or other external clients (for example texthooker in a normal browser profile), set their Anki server to the same proxy URL separately. ## Field Mapping @@ -214,6 +244,12 @@ When you mine the same word multiple times, SubMiner can merge the cards instead "enabled": true, "url": "http://127.0.0.1:8765", "pollingRate": 3000, + "proxy": { + "enabled": false, + "host": "127.0.0.1", + "port": 8766, + "upstreamUrl": "http://127.0.0.1:8765" + }, "fields": { "audio": "ExpressionAudio", "image": "Picture", diff --git a/docs/configuration.md b/docs/configuration.md index 2ebb69f..af6503b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -47,6 +47,8 @@ Malformed config syntax (invalid JSON/JSONC) is startup-blocking: SubMiner shows For valid JSON/JSONC with invalid option values, SubMiner uses warn-and-fallback behavior: it logs the bad key/value and continues with the default for that option. +On macOS, these validation warnings also open a native dialog with full details (desktop notification banners can truncate long messages). + ### Hot-Reload Behavior SubMiner watches the active config file (`config.jsonc` or `config.json`) while running and applies supported updates automatically. @@ -87,6 +89,7 @@ The configuration file includes several main sections: - [**Subtitle Style**](#subtitle-style) - Appearance customization - [**Texthooker**](#texthooker) - Control browser opening behavior - [**WebSocket Server**](#websocket-server) - Built-in subtitle broadcasting server +- [**Startup Warmups**](#startup-warmups) - Control what preloads on startup vs first-use defer - [**Immersion Tracking**](#immersion-tracking) - Track subtitle sessions and mining activity in SQLite - [**YouTube Subtitle Generation**](#youtube-subtitle-generation) - Launcher defaults for yt-dlp + local whisper fallback @@ -826,6 +829,32 @@ See `config.example.jsonc` for detailed configuration options. | `enabled` | `true`, `false`, `"auto"` | `"auto"` (default) disables if mpv_websocket is detected | | `port` | number | WebSocket server port (default: 6677) | +### Startup Warmups + +Control which startup warmups run in the background versus deferring to first real usage: + +```json +{ + "startupWarmups": { + "lowPowerMode": false, + "mecab": true, + "yomitanExtension": true, + "subtitleDictionaries": true, + "jellyfinRemoteSession": true + } +} +``` + +| Option | Values | Description | +| ------------------------ | --------------- | ------------------------------------------------------------------------------------------------ | +| `lowPowerMode` | `true`, `false` | Defer all warmups except Yomitan extension | +| `mecab` | `true`, `false` | Warm up MeCab tokenizer at startup | +| `yomitanExtension` | `true`, `false` | Warm up Yomitan extension at startup | +| `subtitleDictionaries` | `true`, `false` | Warm up JLPT + frequency dictionaries at startup | +| `jellyfinRemoteSession` | `true`, `false` | Warm up Jellyfin remote session at startup (still requires Jellyfin remote auto-connect settings) | + +Defaults warm everything (`true` for all toggles, `lowPowerMode: false`). Setting a warmup toggle to `false` defers that work until first usage. + ### Immersion Tracking Enable or disable local immersion analytics stored in SQLite for mined subtitles and media sessions: diff --git a/docs/mining-workflow.md b/docs/mining-workflow.md index 25b3b95..58be627 100644 --- a/docs/mining-workflow.md +++ b/docs/mining-workflow.md @@ -20,7 +20,7 @@ SubMiner prioritizes subtitle responsiveness over heavy initialization: 1. The first subtitle render is **plain text first** (no tokenization wait). 2. Tokenized enrichment (word spans, known-word flags, JLPT/frequency metadata) is applied right after parsing completes. 3. Under rapid subtitle churn, SubMiner uses a **latest-only tokenization queue** so stale lines are dropped instead of building lag. -4. MeCab, Yomitan extension load, and dictionary prewarm run as background warmups after overlay initialization. +4. MeCab, Yomitan extension load, and dictionary prewarm run as background warmups after overlay initialization (configurable via `startupWarmups`, including low-power mode). This keeps early playback snappy and avoids mpv-side sluggishness while startup work completes. @@ -72,11 +72,13 @@ There are three ways to create cards, depending on your workflow. ### 1. Auto-Update from Yomitan -This is the most common flow. Yomitan creates a card in Anki, and SubMiner detects it via polling and enriches it automatically. +This is the most common flow. Yomitan creates a card in Anki, and SubMiner enriches it automatically. 1. Click a word → Yomitan popup appears. 2. Click the Anki icon in Yomitan to add the word. -3. SubMiner detects the new card (polls AnkiConnect every 3 seconds by default). +3. SubMiner receives or detects the new card: + - **Proxy mode** (`ankiConnect.proxy.enabled: true`): immediate enrich after successful `addNote` / `addNotes`. + - **Polling mode** (default): detects via AnkiConnect polling (`ankiConnect.pollingRate`, default 3 seconds). 4. SubMiner updates the card with: - **Sentence**: The current subtitle line. - **Audio**: Extracted from the video using the subtitle's start/end timing (plus configurable padding). @@ -95,7 +97,7 @@ If you prefer a hands-on approach (animecards-style), you can copy the current s - For multiple lines: press `Ctrl/Cmd+Shift+C`, then a digit `1`–`9` to select how many recent subtitle lines to combine. The combined text is copied to the clipboard. 3. Press `Ctrl/Cmd+V` to update the last-added card with the clipboard contents plus audio, image, and translation — the same fields auto-update would fill. -This is useful when auto-update polling is disabled or when you want explicit control over which subtitle line gets attached to the card. +This is useful when auto-update is disabled or when you want explicit control over which subtitle line gets attached to the card. | Shortcut | Action | Config key | | --------------------------- | ----------------------------------------- | ------------------------------------- | diff --git a/docs/public/config.example.jsonc b/docs/public/config.example.jsonc index 2c61a92..31a3590 100644 --- a/docs/public/config.example.jsonc +++ b/docs/public/config.example.jsonc @@ -12,13 +12,6 @@ // ========================================== "auto_start_overlay": false, // When overlay connects to mpv, automatically show overlay and hide mpv subtitles. Values: true | false - // ========================================== - // Visible Overlay Subtitle Binding - // Control whether visible overlay toggles also toggle MPV subtitle visibility. - // When enabled, visible overlay hides MPV subtitles; when disabled, MPV subtitles are left unchanged. - // ========================================== - "bind_visible_overlay_to_mpv_sub_visibility": true, // Link visible overlay toggles to MPV primary subtitle visibility. Values: true | false - // ========================================== // Texthooker Server // Control whether browser opens automatically for texthooker. @@ -179,6 +172,12 @@ "enabled": false, // Enable AnkiConnect integration. Values: true | false "url": "http://127.0.0.1:8765", // Url setting. "pollingRate": 3000, // Polling interval in milliseconds. + "proxy": { + "enabled": false, // Enable local AnkiConnect-compatible proxy for push-based auto-enrichment. Values: true | false + "host": "127.0.0.1", // Bind host for local AnkiConnect proxy. + "port": 8766, // Bind port for local AnkiConnect proxy. + "upstreamUrl": "http://127.0.0.1:8765" // Upstream AnkiConnect URL proxied by local AnkiConnect proxy. + }, // Proxy setting. "tags": [ "SubMiner" ], // Tags to add to cards mined or updated by SubMiner. Provide an empty array to disable automatic tagging. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index ebf70f3..bf424dd 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -30,7 +30,7 @@ SubMiner retries the connection automatically with increasing delays (200 ms, 50 - first subtitle parse/tokenization bursts - media generation (`ffmpeg` audio/image and AVIF paths) - media sync and subtitle tooling (`alass`, `ffsubsync`, `whisper` fallback path) - - `ankiConnect` enrichment and frequent polling + - `ankiConnect` enrichment (plus polling overhead when proxy mode is disabled) ### If playback feels sluggish @@ -104,11 +104,17 @@ Logged when a malformed JSON line arrives from the mpv socket. Usually harmless **"AnkiConnect: unable to connect"** -SubMiner polls AnkiConnect at `http://127.0.0.1:8765` (configurable via `ankiConnect.url`). This error means Anki is not running or the AnkiConnect add-on is not installed. +SubMiner connects to the active Anki endpoint: + +- `ankiConnect.url` (direct mode, default `http://127.0.0.1:8765`) +- `http://:` (proxy mode) + +This error means the active endpoint is unavailable, or (in proxy mode) the proxy cannot reach `ankiConnect.proxy.upstreamUrl`. - Install the [AnkiConnect](https://ankiweb.net/shared/info/2055492159) add-on in Anki. - Make sure Anki is running before you start mining. -- If you changed the AnkiConnect port, update `ankiConnect.url` in your config. +- If you changed the AnkiConnect port, update `ankiConnect.url` (or `ankiConnect.proxy.upstreamUrl` if using proxy mode). +- If using external Yomitan/browser clients, confirm they point to your SubMiner proxy URL. SubMiner retries with exponential backoff (up to 5 s) and suppresses repeated error logs after 5 consecutive failures. When Anki comes back, you will see "AnkiConnect connection restored". @@ -122,7 +128,7 @@ See [Anki Integration](/anki-integration) for the full field mapping reference. Shown when SubMiner tries to update a card that no longer exists, or when AnkiConnect rejects the update. Common causes: -- The card was deleted in Anki between polling and update. +- The card was deleted in Anki between creation and enrichment update. - The note type changed and a mapped field no longer exists. ## Overlay diff --git a/src/anki-integration/anki-connect-proxy.test.ts b/src/anki-integration/anki-connect-proxy.test.ts new file mode 100644 index 0000000..a1dd8a2 --- /dev/null +++ b/src/anki-integration/anki-connect-proxy.test.ts @@ -0,0 +1,133 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; +import { AnkiConnectProxyServer } from './anki-connect-proxy'; + +async function waitForCondition( + condition: () => boolean, + timeoutMs = 2000, + intervalMs = 10, +): Promise { + const startedAt = Date.now(); + while (Date.now() - startedAt < timeoutMs) { + if (condition()) return; + await new Promise((resolve) => setTimeout(resolve, intervalMs)); + } + throw new Error('Timed out waiting for condition'); +} + +test('proxy enqueues addNote result for enrichment', async () => { + const processed: number[] = []; + const proxy = new AnkiConnectProxyServer({ + shouldAutoUpdateNewCards: () => true, + processNewCard: async (noteId) => { + processed.push(noteId); + }, + logInfo: () => undefined, + logWarn: () => undefined, + logError: () => undefined, + }); + + (proxy as unknown as { + maybeEnqueueFromRequest: (request: Record, responseBody: Buffer) => void; + }).maybeEnqueueFromRequest( + { action: 'addNote' }, + Buffer.from(JSON.stringify({ result: 42, error: null }), 'utf8'), + ); + + await waitForCondition(() => processed.length === 1); + assert.deepEqual(processed, [42]); +}); + +test('proxy de-duplicates addNotes IDs within the same response', async () => { + const processed: number[] = []; + const proxy = new AnkiConnectProxyServer({ + shouldAutoUpdateNewCards: () => true, + processNewCard: async (noteId) => { + processed.push(noteId); + await new Promise((resolve) => setTimeout(resolve, 5)); + }, + logInfo: () => undefined, + logWarn: () => undefined, + logError: () => undefined, + }); + + (proxy as unknown as { + maybeEnqueueFromRequest: (request: Record, responseBody: Buffer) => void; + }).maybeEnqueueFromRequest( + { action: 'addNotes' }, + Buffer.from(JSON.stringify({ result: [101, 102, 101, null], error: null }), 'utf8'), + ); + + await waitForCondition(() => processed.length === 2); + assert.deepEqual(processed, [101, 102]); +}); + +test('proxy skips auto-enrichment when auto-update is disabled', async () => { + const processed: number[] = []; + const proxy = new AnkiConnectProxyServer({ + shouldAutoUpdateNewCards: () => false, + processNewCard: async (noteId) => { + processed.push(noteId); + }, + logInfo: () => undefined, + logWarn: () => undefined, + logError: () => undefined, + }); + + (proxy as unknown as { + maybeEnqueueFromRequest: (request: Record, responseBody: Buffer) => void; + }).maybeEnqueueFromRequest( + { action: 'addNote' }, + Buffer.from(JSON.stringify({ result: 303, error: null }), 'utf8'), + ); + + await new Promise((resolve) => setTimeout(resolve, 30)); + assert.deepEqual(processed, []); +}); + +test('proxy ignores addNote when upstream response reports error', async () => { + const processed: number[] = []; + const proxy = new AnkiConnectProxyServer({ + shouldAutoUpdateNewCards: () => true, + processNewCard: async (noteId) => { + processed.push(noteId); + }, + logInfo: () => undefined, + logWarn: () => undefined, + logError: () => undefined, + }); + + (proxy as unknown as { + maybeEnqueueFromRequest: (request: Record, responseBody: Buffer) => void; + }).maybeEnqueueFromRequest( + { action: 'addNote' }, + Buffer.from(JSON.stringify({ result: 123, error: 'duplicate' }), 'utf8'), + ); + + await new Promise((resolve) => setTimeout(resolve, 30)); + assert.deepEqual(processed, []); +}); + +test('proxy detects self-referential loop configuration', () => { + const proxy = new AnkiConnectProxyServer({ + shouldAutoUpdateNewCards: () => true, + processNewCard: async () => undefined, + logInfo: () => undefined, + logWarn: () => undefined, + logError: () => undefined, + }); + + const result = (proxy as unknown as { + isSelfReferentialProxy: (options: { + host: string; + port: number; + upstreamUrl: string; + }) => boolean; + }).isSelfReferentialProxy({ + host: '127.0.0.1', + port: 8766, + upstreamUrl: 'http://localhost:8766', + }); + + assert.equal(result, true); +}); diff --git a/src/anki-integration/anki-connect-proxy.ts b/src/anki-integration/anki-connect-proxy.ts new file mode 100644 index 0000000..97175ef --- /dev/null +++ b/src/anki-integration/anki-connect-proxy.ts @@ -0,0 +1,314 @@ +import http, { IncomingMessage, ServerResponse } from 'node:http'; +import axios, { AxiosInstance } from 'axios'; + +interface StartProxyOptions { + host: string; + port: number; + upstreamUrl: string; +} + +interface AnkiConnectEnvelope { + result: unknown; + error: unknown; +} + +export interface AnkiConnectProxyServerDeps { + shouldAutoUpdateNewCards: () => boolean; + processNewCard: (noteId: number) => Promise; + logInfo: (message: string, ...args: unknown[]) => void; + logWarn: (message: string, ...args: unknown[]) => void; + logError: (message: string, ...args: unknown[]) => void; +} + +export class AnkiConnectProxyServer { + private server: http.Server | null = null; + private client: AxiosInstance; + private pendingNoteIds: number[] = []; + private pendingNoteIdSet = new Set(); + private inFlightNoteIds = new Set(); + private processingQueue = false; + + constructor(private readonly deps: AnkiConnectProxyServerDeps) { + this.client = axios.create({ + timeout: 15000, + validateStatus: () => true, + responseType: 'arraybuffer', + }); + } + + get isRunning(): boolean { + return this.server !== null; + } + + start(options: StartProxyOptions): void { + this.stop(); + + if (this.isSelfReferentialProxy(options)) { + this.deps.logError( + '[anki-proxy] Proxy upstream points to proxy host/port; refusing to start to avoid loop.', + ); + return; + } + + this.server = http.createServer((req, res) => { + void this.handleRequest(req, res, options.upstreamUrl); + }); + + this.server.on('error', (error) => { + this.deps.logError('[anki-proxy] Server error:', (error as Error).message); + }); + + this.server.listen(options.port, options.host, () => { + this.deps.logInfo( + `[anki-proxy] Listening on http://${options.host}:${options.port} -> ${options.upstreamUrl}`, + ); + }); + } + + stop(): void { + if (this.server) { + this.server.close(); + this.server = null; + this.deps.logInfo('[anki-proxy] Stopped'); + } + this.pendingNoteIds = []; + this.pendingNoteIdSet.clear(); + this.inFlightNoteIds.clear(); + this.processingQueue = false; + } + + private isSelfReferentialProxy(options: StartProxyOptions): boolean { + try { + const upstream = new URL(options.upstreamUrl); + const normalizedUpstreamHost = upstream.hostname.toLowerCase(); + const normalizedBindHost = options.host.toLowerCase(); + const upstreamPort = + upstream.port.length > 0 + ? Number(upstream.port) + : upstream.protocol === 'https:' + ? 443 + : 80; + const hostMatches = + normalizedUpstreamHost === normalizedBindHost || + (normalizedUpstreamHost === 'localhost' && normalizedBindHost === '127.0.0.1') || + (normalizedUpstreamHost === '127.0.0.1' && normalizedBindHost === 'localhost'); + return hostMatches && upstreamPort === options.port; + } catch { + return false; + } + } + + private async handleRequest( + req: IncomingMessage, + res: ServerResponse, + upstreamUrl: string, + ): Promise { + this.setCorsHeaders(res); + + if (req.method === 'OPTIONS') { + res.statusCode = 204; + res.end(); + return; + } + + if (!req.method || (req.method !== 'GET' && req.method !== 'POST')) { + res.statusCode = 405; + res.end('Method Not Allowed'); + return; + } + + let rawBody: Buffer = Buffer.alloc(0); + if (req.method === 'POST') { + rawBody = await this.readRequestBody(req); + } + + let requestJson: Record | null = null; + if (req.method === 'POST' && rawBody.length > 0) { + requestJson = this.tryParseJson(rawBody); + } + + try { + const targetUrl = new URL(req.url || '/', upstreamUrl).toString(); + const contentType = + typeof req.headers['content-type'] === 'string' + ? req.headers['content-type'] + : 'application/json'; + const upstreamResponse = await this.client.request({ + url: targetUrl, + method: req.method, + data: req.method === 'POST' ? rawBody : undefined, + headers: { + 'content-type': contentType, + }, + }); + + const responseBody: Buffer = Buffer.isBuffer(upstreamResponse.data) + ? upstreamResponse.data + : Buffer.from(new Uint8Array(upstreamResponse.data)); + this.copyUpstreamHeaders(res, upstreamResponse.headers as Record); + res.statusCode = upstreamResponse.status; + res.end(responseBody); + + if (req.method === 'POST') { + this.maybeEnqueueFromRequest(requestJson, responseBody); + } + } catch (error) { + this.deps.logWarn('[anki-proxy] Failed to forward request:', (error as Error).message); + res.statusCode = 502; + res.end('Bad Gateway'); + } + } + + private maybeEnqueueFromRequest( + requestJson: Record | null, + responseBody: Buffer, + ): void { + if (!requestJson || !this.deps.shouldAutoUpdateNewCards()) { + return; + } + + const action = + typeof requestJson.action === 'string' ? requestJson.action : String(requestJson.action ?? ''); + if (action !== 'addNote' && action !== 'addNotes') { + return; + } + + const responseJson = this.tryParseJson(responseBody) as AnkiConnectEnvelope | null; + if (!responseJson || responseJson.error !== null) { + return; + } + + const noteIds = + action === 'addNote' + ? this.collectSingleResultId(responseJson.result) + : this.collectBatchResultIds(responseJson.result); + if (noteIds.length === 0) { + return; + } + + this.enqueueNotes(noteIds); + } + + private collectSingleResultId(value: unknown): number[] { + if (typeof value === 'number' && Number.isInteger(value) && value > 0) { + return [value]; + } + return []; + } + + private collectBatchResultIds(value: unknown): number[] { + if (!Array.isArray(value)) { + return []; + } + return value.filter((entry): entry is number => { + return typeof entry === 'number' && Number.isInteger(entry) && entry > 0; + }); + } + + private enqueueNotes(noteIds: number[]): void { + let enqueuedCount = 0; + for (const noteId of noteIds) { + if (this.pendingNoteIdSet.has(noteId) || this.inFlightNoteIds.has(noteId)) { + continue; + } + this.pendingNoteIds.push(noteId); + this.pendingNoteIdSet.add(noteId); + enqueuedCount += 1; + } + + if (enqueuedCount === 0) { + return; + } + + this.deps.logInfo(`[anki-proxy] Enqueued ${enqueuedCount} note(s) for enrichment`); + this.processQueue(); + } + + private processQueue(): void { + if (this.processingQueue) { + return; + } + this.processingQueue = true; + + void (async () => { + try { + while (this.pendingNoteIds.length > 0) { + const noteId = this.pendingNoteIds.shift(); + if (noteId === undefined) { + continue; + } + this.pendingNoteIdSet.delete(noteId); + + if (!this.deps.shouldAutoUpdateNewCards()) { + continue; + } + + this.inFlightNoteIds.add(noteId); + try { + await this.deps.processNewCard(noteId); + } catch (error) { + this.deps.logWarn( + `[anki-proxy] Failed to auto-enrich note ${noteId}:`, + (error as Error).message, + ); + } finally { + this.inFlightNoteIds.delete(noteId); + } + } + } finally { + this.processingQueue = false; + if (this.pendingNoteIds.length > 0) { + this.processQueue(); + } + } + })(); + } + + private async readRequestBody(req: IncomingMessage): Promise { + const chunks: Buffer[] = []; + for await (const chunk of req) { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + } + return Buffer.concat(chunks); + } + + private tryParseJson(rawBody: Buffer): Record | null { + if (rawBody.length === 0) { + return null; + } + try { + const parsed = JSON.parse(rawBody.toString('utf8')); + return parsed && typeof parsed === 'object' ? (parsed as Record) : null; + } catch { + return null; + } + } + + private setCorsHeaders(res: ServerResponse): void { + res.setHeader('Access-Control-Allow-Origin', '*'); + res.setHeader('Access-Control-Allow-Headers', 'Content-Type'); + res.setHeader('Access-Control-Allow-Methods', 'POST, GET, OPTIONS'); + } + + private copyUpstreamHeaders( + res: ServerResponse, + headers: Record, + ): void { + for (const [key, value] of Object.entries(headers)) { + if (value === undefined) { + continue; + } + if (key.toLowerCase() === 'content-length') { + continue; + } + if (Array.isArray(value)) { + res.setHeader( + key, + value.map((entry) => String(entry)), + ); + } else { + res.setHeader(key, String(value)); + } + } + } +} diff --git a/src/config/definitions/defaults-integrations.ts b/src/config/definitions/defaults-integrations.ts index 662265a..c49bec7 100644 --- a/src/config/definitions/defaults-integrations.ts +++ b/src/config/definitions/defaults-integrations.ts @@ -8,6 +8,12 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick< enabled: false, url: 'http://127.0.0.1:8765', pollingRate: 3000, + proxy: { + enabled: false, + host: '127.0.0.1', + port: 8766, + upstreamUrl: 'http://127.0.0.1:8765', + }, tags: ['SubMiner'], fields: { audio: 'ExpressionAudio', diff --git a/src/config/definitions/options-integrations.ts b/src/config/definitions/options-integrations.ts index f102207..7e48ce9 100644 --- a/src/config/definitions/options-integrations.ts +++ b/src/config/definitions/options-integrations.ts @@ -18,6 +18,30 @@ export function buildIntegrationConfigOptionRegistry( defaultValue: defaultConfig.ankiConnect.pollingRate, description: 'Polling interval in milliseconds.', }, + { + path: 'ankiConnect.proxy.enabled', + kind: 'boolean', + defaultValue: defaultConfig.ankiConnect.proxy.enabled, + description: 'Enable local AnkiConnect-compatible proxy for push-based auto-enrichment.', + }, + { + path: 'ankiConnect.proxy.host', + kind: 'string', + defaultValue: defaultConfig.ankiConnect.proxy.host, + description: 'Bind host for local AnkiConnect proxy.', + }, + { + path: 'ankiConnect.proxy.port', + kind: 'number', + defaultValue: defaultConfig.ankiConnect.proxy.port, + description: 'Bind port for local AnkiConnect proxy.', + }, + { + path: 'ankiConnect.proxy.upstreamUrl', + kind: 'string', + defaultValue: defaultConfig.ankiConnect.proxy.upstreamUrl, + description: 'Upstream AnkiConnect URL proxied by local AnkiConnect proxy.', + }, { path: 'ankiConnect.tags', kind: 'array', diff --git a/src/config/resolve/anki-connect.test.ts b/src/config/resolve/anki-connect.test.ts index 0b7a1cd..247ec15 100644 --- a/src/config/resolve/anki-connect.test.ts +++ b/src/config/resolve/anki-connect.test.ts @@ -66,3 +66,44 @@ test('warns and falls back for invalid nPlusOne.decks entries', () => { ); assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.decks')); }); + +test('accepts valid proxy settings', () => { + const { context, warnings } = makeContext({ + proxy: { + enabled: true, + host: '127.0.0.1', + port: 9999, + upstreamUrl: 'http://127.0.0.1:8765', + }, + }); + + applyAnkiConnectResolution(context); + + assert.equal(context.resolved.ankiConnect.proxy.enabled, true); + assert.equal(context.resolved.ankiConnect.proxy.host, '127.0.0.1'); + assert.equal(context.resolved.ankiConnect.proxy.port, 9999); + assert.equal(context.resolved.ankiConnect.proxy.upstreamUrl, 'http://127.0.0.1:8765'); + assert.equal( + warnings.some((warning) => warning.path.startsWith('ankiConnect.proxy')), + false, + ); +}); + +test('warns and falls back for invalid proxy settings', () => { + const { context, warnings } = makeContext({ + proxy: { + enabled: 'yes', + host: '', + port: -1, + upstreamUrl: '', + }, + }); + + applyAnkiConnectResolution(context); + + assert.deepEqual(context.resolved.ankiConnect.proxy, DEFAULT_CONFIG.ankiConnect.proxy); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.enabled')); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.host')); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.port')); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.upstreamUrl')); +}); diff --git a/src/config/resolve/anki-connect.ts b/src/config/resolve/anki-connect.ts index f88d7e6..181fd37 100644 --- a/src/config/resolve/anki-connect.ts +++ b/src/config/resolve/anki-connect.ts @@ -12,6 +12,7 @@ export function applyAnkiConnectResolution(context: ResolveContext): void { const fields = isObject(ac.fields) ? (ac.fields as Record) : {}; const media = isObject(ac.media) ? (ac.media as Record) : {}; const metadata = isObject(ac.metadata) ? (ac.metadata as Record) : {}; + const proxy = isObject(ac.proxy) ? (ac.proxy as Record) : {}; const aiSource = isObject(ac.ai) ? ac.ai : isObject(ac.openRouter) ? ac.openRouter : {}; const legacyKeys = new Set([ 'audioField', @@ -85,6 +86,9 @@ export function applyAnkiConnectResolution(context: ResolveContext): void { ? (ac.behavior as (typeof context.resolved)['ankiConnect']['behavior']) : {}), }, + proxy: { + ...context.resolved.ankiConnect.proxy, + }, metadata: { ...context.resolved.ankiConnect.metadata, ...(isObject(ac.metadata) @@ -153,6 +157,68 @@ export function applyAnkiConnectResolution(context: ResolveContext): void { ); } + if (isObject(ac.proxy)) { + const proxyEnabled = asBoolean(proxy.enabled); + if (proxyEnabled !== undefined) { + context.resolved.ankiConnect.proxy.enabled = proxyEnabled; + } else if (proxy.enabled !== undefined) { + context.warn( + 'ankiConnect.proxy.enabled', + proxy.enabled, + context.resolved.ankiConnect.proxy.enabled, + 'Expected boolean.', + ); + } + + const proxyHost = asString(proxy.host); + if (proxyHost !== undefined && proxyHost.trim().length > 0) { + context.resolved.ankiConnect.proxy.host = proxyHost.trim(); + } else if (proxy.host !== undefined) { + context.warn( + 'ankiConnect.proxy.host', + proxy.host, + context.resolved.ankiConnect.proxy.host, + 'Expected non-empty string.', + ); + } + + const proxyUpstreamUrl = asString(proxy.upstreamUrl); + if (proxyUpstreamUrl !== undefined && proxyUpstreamUrl.trim().length > 0) { + context.resolved.ankiConnect.proxy.upstreamUrl = proxyUpstreamUrl.trim(); + } else if (proxy.upstreamUrl !== undefined) { + context.warn( + 'ankiConnect.proxy.upstreamUrl', + proxy.upstreamUrl, + context.resolved.ankiConnect.proxy.upstreamUrl, + 'Expected non-empty string.', + ); + } + + const proxyPort = asNumber(proxy.port); + if ( + proxyPort !== undefined && + Number.isInteger(proxyPort) && + proxyPort >= 1 && + proxyPort <= 65535 + ) { + context.resolved.ankiConnect.proxy.port = proxyPort; + } else if (proxy.port !== undefined) { + context.warn( + 'ankiConnect.proxy.port', + proxy.port, + context.resolved.ankiConnect.proxy.port, + 'Expected integer between 1 and 65535.', + ); + } + } else if (ac.proxy !== undefined) { + context.warn( + 'ankiConnect.proxy', + ac.proxy, + context.resolved.ankiConnect.proxy, + 'Expected object.', + ); + } + if (Array.isArray(ac.tags)) { const normalizedTags = ac.tags .filter((entry): entry is string => typeof entry === 'string') diff --git a/src/core/services/overlay-runtime-init.test.ts b/src/core/services/overlay-runtime-init.test.ts index 946e278..489f7ba 100644 --- a/src/core/services/overlay-runtime-init.test.ts +++ b/src/core/services/overlay-runtime-init.test.ts @@ -41,7 +41,13 @@ test('initializeOverlayRuntime skips Anki integration when ankiConnect.enabled i setIntegrationCalls += 1; }, showDesktopNotification: () => {}, - createFieldGroupingCallback: () => async () => 'auto', + createFieldGroupingCallback: () => + async () => ({ + keepNoteId: 1, + deleteNoteId: 2, + deleteDuplicate: false, + cancelled: false, + }), getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json', }); @@ -90,7 +96,13 @@ test('initializeOverlayRuntime starts Anki integration when ankiConnect.enabled setIntegrationCalls += 1; }, showDesktopNotification: () => {}, - createFieldGroupingCallback: () => async () => 'manual', + createFieldGroupingCallback: () => + async () => ({ + keepNoteId: 3, + deleteNoteId: 4, + deleteDuplicate: false, + cancelled: false, + }), getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json', }); diff --git a/src/core/services/tokenizer.test.ts b/src/core/services/tokenizer.test.ts index eccdc3f..ae6b8c7 100644 --- a/src/core/services/tokenizer.test.ts +++ b/src/core/services/tokenizer.test.ts @@ -1696,3 +1696,169 @@ test('createTokenizerDepsRuntime checks MeCab availability before first tokenize assert.equal(first?.[0]?.surface, '仮面'); assert.equal(second?.[0]?.surface, '仮面'); }); + +test('tokenizeSubtitle uses async MeCab enrichment override when provided', async () => { + const result = await tokenizeSubtitle( + '猫', + makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], { + tokenizeWithMecab: async () => [ + { + headword: '猫', + surface: '猫', + reading: 'ネコ', + startPos: 0, + endPos: 1, + partOfSpeech: PartOfSpeech.noun, + pos1: '名詞', + isMerged: true, + isKnown: false, + isNPlusOneTarget: false, + }, + ], + enrichTokensWithMecab: async (tokens) => + tokens.map((token) => ({ + ...token, + pos1: 'override-pos', + })), + }), + ); + + assert.equal(result.tokens?.length, 1); + assert.equal(result.tokens?.[0]?.pos1, 'override-pos'); +}); + +test('createTokenizerDepsRuntime exposes async MeCab enrichment helper', async () => { + const deps = createTokenizerDepsRuntime({ + getYomitanExt: () => null, + getYomitanParserWindow: () => null, + setYomitanParserWindow: () => {}, + getYomitanParserReadyPromise: () => null, + setYomitanParserReadyPromise: () => {}, + getYomitanParserInitPromise: () => null, + setYomitanParserInitPromise: () => {}, + isKnownWord: () => false, + getKnownWordMatchMode: () => 'headword', + getJlptLevel: () => null, + getMecabTokenizer: () => null, + }); + + const enriched = await deps.enrichTokensWithMecab?.( + [ + { + headword: 'は', + surface: 'は', + reading: 'は', + startPos: 0, + endPos: 1, + partOfSpeech: PartOfSpeech.other, + isMerged: true, + isKnown: false, + isNPlusOneTarget: false, + }, + ], + [ + { + headword: 'は', + surface: 'は', + reading: 'ハ', + startPos: 0, + endPos: 1, + partOfSpeech: PartOfSpeech.particle, + pos1: '助詞', + isMerged: false, + isKnown: false, + isNPlusOneTarget: false, + }, + ], + ); + + assert.equal(enriched?.[0]?.pos1, '助詞'); +}); + +test('tokenizeSubtitle skips all enrichment stages when disabled', async () => { + let knownCalls = 0; + let mecabCalls = 0; + let jlptCalls = 0; + let frequencyCalls = 0; + + const result = await tokenizeSubtitle( + '猫', + makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], { + isKnownWord: () => { + knownCalls += 1; + return true; + }, + getNPlusOneEnabled: () => false, + getJlptEnabled: () => false, + getFrequencyDictionaryEnabled: () => false, + getJlptLevel: () => { + jlptCalls += 1; + return 'N5'; + }, + getFrequencyRank: () => { + frequencyCalls += 1; + return 10; + }, + tokenizeWithMecab: async () => { + mecabCalls += 1; + return null; + }, + }), + ); + + assert.equal(result.tokens?.length, 1); + assert.equal(result.tokens?.[0]?.isKnown, false); + assert.equal(result.tokens?.[0]?.isNPlusOneTarget, false); + assert.equal(result.tokens?.[0]?.jlptLevel, undefined); + assert.equal(result.tokens?.[0]?.frequencyRank, undefined); + assert.equal(knownCalls, 0); + assert.equal(mecabCalls, 0); + assert.equal(jlptCalls, 0); + assert.equal(frequencyCalls, 0); +}); + +test('tokenizeSubtitle keeps frequency enrichment while n+1 is disabled', async () => { + let knownCalls = 0; + let mecabCalls = 0; + let frequencyCalls = 0; + + const result = await tokenizeSubtitle( + '猫', + makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], { + isKnownWord: () => { + knownCalls += 1; + return true; + }, + getNPlusOneEnabled: () => false, + getJlptEnabled: () => false, + getFrequencyDictionaryEnabled: () => true, + getFrequencyRank: () => { + frequencyCalls += 1; + return 7; + }, + tokenizeWithMecab: async () => { + mecabCalls += 1; + return [ + { + headword: '猫', + surface: '猫', + reading: 'ネコ', + startPos: 0, + endPos: 1, + partOfSpeech: PartOfSpeech.noun, + pos1: '名詞', + isMerged: false, + isKnown: false, + isNPlusOneTarget: false, + }, + ]; + }, + }), + ); + + assert.equal(result.tokens?.[0]?.frequencyRank, 7); + assert.equal(result.tokens?.[0]?.isKnown, false); + assert.equal(knownCalls, 0); + assert.equal(mecabCalls, 1); + assert.equal(frequencyCalls, 1); +}); diff --git a/src/core/services/tokenizer.ts b/src/core/services/tokenizer.ts index 0634cf0..baeb4aa 100644 --- a/src/core/services/tokenizer.ts +++ b/src/core/services/tokenizer.ts @@ -9,13 +9,16 @@ import { FrequencyDictionaryLookup, JlptLevel, } from '../../types'; -import { annotateTokens } from './tokenizer/annotation-stage'; -import { enrichTokensWithMecabPos1 } from './tokenizer/parser-enrichment-stage'; import { selectYomitanParseTokens } from './tokenizer/parser-selection-stage'; import { requestYomitanParseResults } from './tokenizer/yomitan-parser-runtime'; const logger = createLogger('main:tokenizer'); +type MecabTokenEnrichmentFn = ( + tokens: MergedToken[], + mecabTokens: MergedToken[] | null, +) => Promise; + export interface TokenizerServiceDeps { getYomitanExt: () => Extension | null; getYomitanParserWindow: () => BrowserWindow | null; @@ -27,12 +30,14 @@ export interface TokenizerServiceDeps { isKnownWord: (text: string) => boolean; getKnownWordMatchMode: () => NPlusOneMatchMode; getJlptLevel: (text: string) => JlptLevel | null; + getNPlusOneEnabled?: () => boolean; getJlptEnabled?: () => boolean; getFrequencyDictionaryEnabled?: () => boolean; getFrequencyRank?: FrequencyDictionaryLookup; getMinSentenceWordsForNPlusOne?: () => number; getYomitanGroupDebugEnabled?: () => boolean; tokenizeWithMecab: (text: string) => Promise; + enrichTokensWithMecab?: MecabTokenEnrichmentFn; } interface MecabTokenizerLike { @@ -52,6 +57,7 @@ export interface TokenizerDepsRuntimeOptions { isKnownWord: (text: string) => boolean; getKnownWordMatchMode: () => NPlusOneMatchMode; getJlptLevel: (text: string) => JlptLevel | null; + getNPlusOneEnabled?: () => boolean; getJlptEnabled?: () => boolean; getFrequencyDictionaryEnabled?: () => boolean; getFrequencyRank?: FrequencyDictionaryLookup; @@ -60,6 +66,82 @@ export interface TokenizerDepsRuntimeOptions { getMecabTokenizer: () => MecabTokenizerLike | null; } +interface TokenizerAnnotationOptions { + nPlusOneEnabled: boolean; + jlptEnabled: boolean; + frequencyEnabled: boolean; + minSentenceWordsForNPlusOne: number | undefined; +} + +let parserEnrichmentWorkerRuntimeModulePromise: + | Promise + | null = null; +let annotationStageModulePromise: Promise | null = null; +let parserEnrichmentFallbackModulePromise: + | Promise + | null = null; + +function getKnownWordLookup(deps: TokenizerServiceDeps, options: TokenizerAnnotationOptions): (text: string) => boolean { + if (!options.nPlusOneEnabled) { + return () => false; + } + return deps.isKnownWord; +} + +function needsMecabPosEnrichment(options: TokenizerAnnotationOptions): boolean { + return options.jlptEnabled || options.frequencyEnabled; +} + +function hasAnyAnnotationEnabled(options: TokenizerAnnotationOptions): boolean { + return options.nPlusOneEnabled || options.jlptEnabled || options.frequencyEnabled; +} + +async function enrichTokensWithMecabAsync( + tokens: MergedToken[], + mecabTokens: MergedToken[] | null, +): Promise { + if (!parserEnrichmentWorkerRuntimeModulePromise) { + parserEnrichmentWorkerRuntimeModulePromise = import('./tokenizer/parser-enrichment-worker-runtime'); + } + + try { + const runtime = await parserEnrichmentWorkerRuntimeModulePromise; + return await runtime.enrichTokensWithMecabPos1Async(tokens, mecabTokens); + } catch { + if (!parserEnrichmentFallbackModulePromise) { + parserEnrichmentFallbackModulePromise = import('./tokenizer/parser-enrichment-stage'); + } + const fallback = await parserEnrichmentFallbackModulePromise; + return fallback.enrichTokensWithMecabPos1(tokens, mecabTokens); + } +} + +async function applyAnnotationStage( + tokens: MergedToken[], + deps: TokenizerServiceDeps, + options: TokenizerAnnotationOptions, +): Promise { + if (!hasAnyAnnotationEnabled(options)) { + return tokens; + } + + if (!annotationStageModulePromise) { + annotationStageModulePromise = import('./tokenizer/annotation-stage'); + } + + const annotationStage = await annotationStageModulePromise; + return annotationStage.annotateTokens( + tokens, + { + isKnownWord: getKnownWordLookup(deps, options), + knownWordMatchMode: deps.getKnownWordMatchMode(), + getJlptLevel: deps.getJlptLevel, + getFrequencyRank: deps.getFrequencyRank, + }, + options, + ); +} + export function createTokenizerDepsRuntime( options: TokenizerDepsRuntimeOptions, ): TokenizerServiceDeps { @@ -76,6 +158,7 @@ export function createTokenizerDepsRuntime( isKnownWord: options.isKnownWord, getKnownWordMatchMode: options.getKnownWordMatchMode, getJlptLevel: options.getJlptLevel, + getNPlusOneEnabled: options.getNPlusOneEnabled, getJlptEnabled: options.getJlptEnabled, getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled, getFrequencyRank: options.getFrequencyRank, @@ -104,8 +187,11 @@ export function createTokenizerDepsRuntime( return null; } - return mergeTokens(rawTokens, options.isKnownWord, options.getKnownWordMatchMode()); + const isKnownWordLookup = options.getNPlusOneEnabled?.() === false ? () => false : options.isKnownWord; + return mergeTokens(rawTokens, isKnownWordLookup, options.getKnownWordMatchMode()); }, + enrichTokensWithMecab: async (tokens, mecabTokens) => + enrichTokensWithMecabAsync(tokens, mecabTokens), }; } @@ -128,36 +214,19 @@ function logSelectedYomitanGroups(text: string, tokens: MergedToken[]): void { }); } -function getAnnotationOptions(deps: TokenizerServiceDeps): { - jlptEnabled: boolean; - frequencyEnabled: boolean; - minSentenceWordsForNPlusOne: number | undefined; -} { +function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOptions { return { + nPlusOneEnabled: deps.getNPlusOneEnabled?.() !== false, jlptEnabled: deps.getJlptEnabled?.() !== false, frequencyEnabled: deps.getFrequencyDictionaryEnabled?.() !== false, minSentenceWordsForNPlusOne: deps.getMinSentenceWordsForNPlusOne?.(), }; } -function applyAnnotationStage(tokens: MergedToken[], deps: TokenizerServiceDeps): MergedToken[] { - const options = getAnnotationOptions(deps); - - return annotateTokens( - tokens, - { - isKnownWord: deps.isKnownWord, - knownWordMatchMode: deps.getKnownWordMatchMode(), - getJlptLevel: deps.getJlptLevel, - getFrequencyRank: deps.getFrequencyRank, - }, - options, - ); -} - async function parseWithYomitanInternalParser( text: string, deps: TokenizerServiceDeps, + options: TokenizerAnnotationOptions, ): Promise { const parseResults = await requestYomitanParseResults(text, deps, logger); if (!parseResults) { @@ -166,7 +235,7 @@ async function parseWithYomitanInternalParser( const selectedTokens = selectYomitanParseTokens( parseResults, - deps.isKnownWord, + getKnownWordLookup(deps, options), deps.getKnownWordMatchMode(), ); if (!selectedTokens || selectedTokens.length === 0) { @@ -177,9 +246,14 @@ async function parseWithYomitanInternalParser( logSelectedYomitanGroups(text, selectedTokens); } + if (!needsMecabPosEnrichment(options)) { + return selectedTokens; + } + try { const mecabTokens = await deps.tokenizeWithMecab(text); - return enrichTokensWithMecabPos1(selectedTokens, mecabTokens); + const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync; + return await enrichTokensWithMecab(selectedTokens, mecabTokens); } catch (err) { const error = err as Error; logger.warn( @@ -207,12 +281,13 @@ export async function tokenizeSubtitle( } const tokenizeText = displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim(); + const annotationOptions = getAnnotationOptions(deps); - const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps); + const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions); if (yomitanTokens && yomitanTokens.length > 0) { return { text: displayText, - tokens: applyAnnotationStage(yomitanTokens, deps), + tokens: await applyAnnotationStage(yomitanTokens, deps, annotationOptions), }; } diff --git a/src/core/services/tokenizer/annotation-stage.ts b/src/core/services/tokenizer/annotation-stage.ts index d40bf08..7d099dd 100644 --- a/src/core/services/tokenizer/annotation-stage.ts +++ b/src/core/services/tokenizer/annotation-stage.ts @@ -31,6 +31,7 @@ export interface AnnotationStageDeps { } export interface AnnotationStageOptions { + nPlusOneEnabled?: boolean; jlptEnabled?: boolean; frequencyEnabled?: boolean; minSentenceWordsForNPlusOne?: number; @@ -340,11 +341,14 @@ export function annotateTokens( deps: AnnotationStageDeps, options: AnnotationStageOptions = {}, ): MergedToken[] { - const knownMarkedTokens = applyKnownWordMarking( - tokens, - deps.isKnownWord, - deps.knownWordMatchMode, - ); + const nPlusOneEnabled = options.nPlusOneEnabled !== false; + const knownMarkedTokens = nPlusOneEnabled + ? applyKnownWordMarking(tokens, deps.isKnownWord, deps.knownWordMatchMode) + : tokens.map((token) => ({ + ...token, + isKnown: false, + isNPlusOneTarget: false, + })); const frequencyEnabled = options.frequencyEnabled !== false; const frequencyMarkedTokens = @@ -363,6 +367,14 @@ export function annotateTokens( jlptLevel: undefined, })); + if (!nPlusOneEnabled) { + return jlptMarkedTokens.map((token) => ({ + ...token, + isKnown: false, + isNPlusOneTarget: false, + })); + } + const minSentenceWordsForNPlusOne = options.minSentenceWordsForNPlusOne; const sanitizedMinSentenceWordsForNPlusOne = minSentenceWordsForNPlusOne !== undefined && diff --git a/src/core/services/tokenizer/parser-enrichment-worker-runtime.ts b/src/core/services/tokenizer/parser-enrichment-worker-runtime.ts new file mode 100644 index 0000000..1450d19 --- /dev/null +++ b/src/core/services/tokenizer/parser-enrichment-worker-runtime.ts @@ -0,0 +1,147 @@ +import type { MergedToken } from '../../../types'; +import { createLogger } from '../../../logger'; +import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage'; + +const logger = createLogger('main:tokenizer'); +const DISABLE_WORKER_ENV = 'SUBMINER_DISABLE_MECAB_ENRICHMENT_WORKER'; + +interface WorkerRequest { + id: number; + tokens: MergedToken[]; + mecabTokens: MergedToken[] | null; +} + +interface WorkerResponse { + id?: unknown; + result?: unknown; + error?: unknown; +} + +type PendingRequest = { + resolve: (value: MergedToken[]) => void; + reject: (reason?: unknown) => void; +}; + +class ParserEnrichmentWorkerRuntime { + private worker: import('node:worker_threads').Worker | null = null; + private nextRequestId = 1; + private pending = new Map(); + private initAttempted = false; + + async enrichTokens( + tokens: MergedToken[], + mecabTokens: MergedToken[] | null, + ): Promise { + const worker = await this.getWorker(); + if (!worker) { + return enrichTokensWithMecabPos1(tokens, mecabTokens); + } + + return new Promise((resolve, reject) => { + const id = this.nextRequestId++; + this.pending.set(id, { resolve, reject }); + const request: WorkerRequest = { id, tokens, mecabTokens }; + worker.postMessage(request); + }); + } + + private async getWorker(): Promise { + if (process.env[DISABLE_WORKER_ENV] === '1') { + return null; + } + if (this.worker) { + return this.worker; + } + if (this.initAttempted) { + return null; + } + + this.initAttempted = true; + + let workerThreads: typeof import('node:worker_threads'); + try { + workerThreads = await import('node:worker_threads'); + } catch { + return null; + } + + let workerPath = ''; + try { + workerPath = require.resolve('./parser-enrichment-worker-thread.js'); + } catch { + return null; + } + + try { + const worker = new workerThreads.Worker(workerPath); + worker.on('message', (message: WorkerResponse) => this.handleWorkerMessage(message)); + worker.on('error', (error: Error) => this.handleWorkerFailure(error)); + worker.on('exit', (code: number) => { + if (code !== 0) { + this.handleWorkerFailure(new Error(`parser enrichment worker exited with code ${code}`)); + } else { + this.worker = null; + } + }); + this.worker = worker; + return worker; + } catch (error) { + logger.debug(`Failed to start parser enrichment worker: ${(error as Error).message}`); + return null; + } + } + + private handleWorkerMessage(message: WorkerResponse): void { + if (typeof message.id !== 'number') { + return; + } + + const request = this.pending.get(message.id); + if (!request) { + return; + } + this.pending.delete(message.id); + + if (typeof message.error === 'string' && message.error.length > 0) { + request.reject(new Error(message.error)); + return; + } + + if (!Array.isArray(message.result)) { + request.reject(new Error('Parser enrichment worker returned invalid payload')); + return; + } + + request.resolve(message.result as MergedToken[]); + } + + private handleWorkerFailure(error: Error): void { + logger.debug(`Parser enrichment worker unavailable, falling back to main thread: ${error.message}`); + for (const pending of this.pending.values()) { + pending.reject(error); + } + this.pending.clear(); + + if (this.worker) { + this.worker.removeAllListeners(); + this.worker = null; + } + } +} + +let runtime: ParserEnrichmentWorkerRuntime | null = null; + +export async function enrichTokensWithMecabPos1Async( + tokens: MergedToken[], + mecabTokens: MergedToken[] | null, +): Promise { + if (!runtime) { + runtime = new ParserEnrichmentWorkerRuntime(); + } + + try { + return await runtime.enrichTokens(tokens, mecabTokens); + } catch { + return enrichTokensWithMecabPos1(tokens, mecabTokens); + } +} diff --git a/src/core/services/tokenizer/parser-enrichment-worker-thread.ts b/src/core/services/tokenizer/parser-enrichment-worker-thread.ts new file mode 100644 index 0000000..54d9b8c --- /dev/null +++ b/src/core/services/tokenizer/parser-enrichment-worker-thread.ts @@ -0,0 +1,25 @@ +import { parentPort } from 'node:worker_threads'; +import type { MergedToken } from '../../../types'; +import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage'; + +interface WorkerRequest { + id: number; + tokens: MergedToken[]; + mecabTokens: MergedToken[] | null; +} + +if (!parentPort) { + throw new Error('parser-enrichment worker missing parent port'); +} + +const port = parentPort; + +port.on('message', (message: WorkerRequest) => { + try { + const result = enrichTokensWithMecabPos1(message.tokens, message.mecabTokens); + port.postMessage({ id: message.id, result }); + } catch (error) { + const messageText = error instanceof Error ? error.message : String(error); + port.postMessage({ id: message.id, error: messageText }); + } +}); diff --git a/src/core/services/tokenizer/yomitan-parser-runtime.test.ts b/src/core/services/tokenizer/yomitan-parser-runtime.test.ts new file mode 100644 index 0000000..9e30a57 --- /dev/null +++ b/src/core/services/tokenizer/yomitan-parser-runtime.test.ts @@ -0,0 +1,83 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; +import { syncYomitanDefaultAnkiServer } from './yomitan-parser-runtime'; + +function createDeps(executeJavaScript: (script: string) => Promise) { + const parserWindow = { + isDestroyed: () => false, + webContents: { + executeJavaScript: async (script: string) => await executeJavaScript(script), + }, + }; + + return { + getYomitanExt: () => ({ id: 'ext-id' }) as never, + getYomitanParserWindow: () => parserWindow as never, + setYomitanParserWindow: () => undefined, + getYomitanParserReadyPromise: () => null, + setYomitanParserReadyPromise: () => undefined, + getYomitanParserInitPromise: () => null, + setYomitanParserInitPromise: () => undefined, + }; +} + +test('syncYomitanDefaultAnkiServer updates default profile server when script reports update', async () => { + let scriptValue = ''; + const deps = createDeps(async (script) => { + scriptValue = script; + return { updated: true }; + }); + + const infoLogs: string[] = []; + const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, { + error: () => undefined, + info: (message) => infoLogs.push(message), + }); + + assert.equal(updated, true); + assert.match(scriptValue, /optionsGetFull/); + assert.match(scriptValue, /setAllSettings/); + assert.equal(infoLogs.length, 1); +}); + +test('syncYomitanDefaultAnkiServer returns false when script reports no change', async () => { + const deps = createDeps(async () => ({ updated: false })); + + const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, { + error: () => undefined, + info: () => undefined, + }); + + assert.equal(updated, false); +}); + +test('syncYomitanDefaultAnkiServer logs and returns false on script failure', async () => { + const deps = createDeps(async () => { + throw new Error('execute failed'); + }); + + const errorLogs: string[] = []; + const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, { + error: (message) => errorLogs.push(message), + info: () => undefined, + }); + + assert.equal(updated, false); + assert.equal(errorLogs.length, 1); +}); + +test('syncYomitanDefaultAnkiServer no-ops for empty target url', async () => { + let executeCount = 0; + const deps = createDeps(async () => { + executeCount += 1; + return { updated: true }; + }); + + const updated = await syncYomitanDefaultAnkiServer(' ', deps, { + error: () => undefined, + info: () => undefined, + }); + + assert.equal(updated, false); + assert.equal(executeCount, 0); +}); diff --git a/src/core/services/tokenizer/yomitan-parser-runtime.ts b/src/core/services/tokenizer/yomitan-parser-runtime.ts index 5955cfa..f947189 100644 --- a/src/core/services/tokenizer/yomitan-parser-runtime.ts +++ b/src/core/services/tokenizer/yomitan-parser-runtime.ts @@ -2,6 +2,7 @@ import type { BrowserWindow, Extension } from 'electron'; interface LoggerLike { error: (message: string, ...args: unknown[]) => void; + info?: (message: string, ...args: unknown[]) => void; } interface YomitanParserRuntimeDeps { @@ -152,3 +153,90 @@ export async function requestYomitanParseResults( return null; } } + +export async function syncYomitanDefaultAnkiServer( + serverUrl: string, + deps: YomitanParserRuntimeDeps, + logger: LoggerLike, +): Promise { + const normalizedTargetServer = serverUrl.trim(); + if (!normalizedTargetServer) { + return false; + } + + const isReady = await ensureYomitanParserWindow(deps, logger); + const parserWindow = deps.getYomitanParserWindow(); + if (!isReady || !parserWindow || parserWindow.isDestroyed()) { + return false; + } + + const script = ` + (async () => { + const invoke = (action, params) => + new Promise((resolve, reject) => { + chrome.runtime.sendMessage({ action, params }, (response) => { + if (chrome.runtime.lastError) { + reject(new Error(chrome.runtime.lastError.message)); + return; + } + if (!response || typeof response !== "object") { + reject(new Error("Invalid response from Yomitan backend")); + return; + } + if (response.error) { + reject(new Error(response.error.message || "Yomitan backend error")); + return; + } + resolve(response.result); + }); + }); + + const targetServer = ${JSON.stringify(normalizedTargetServer)}; + const optionsFull = await invoke("optionsGetFull", undefined); + const profiles = Array.isArray(optionsFull.profiles) ? optionsFull.profiles : []; + if (profiles.length === 0) { + return { updated: false, reason: "no-profiles" }; + } + + const defaultProfile = profiles[0]; + if (!defaultProfile || typeof defaultProfile !== "object") { + return { updated: false, reason: "invalid-default-profile" }; + } + + defaultProfile.options = defaultProfile.options && typeof defaultProfile.options === "object" + ? defaultProfile.options + : {}; + defaultProfile.options.anki = defaultProfile.options.anki && typeof defaultProfile.options.anki === "object" + ? defaultProfile.options.anki + : {}; + + const currentServerRaw = defaultProfile.options.anki.server; + const currentServer = typeof currentServerRaw === "string" ? currentServerRaw.trim() : ""; + const canReplaceDefault = + currentServer.length === 0 || currentServer === "http://127.0.0.1:8765"; + if (!canReplaceDefault || currentServer === targetServer) { + return { updated: false, reason: "no-change", currentServer, targetServer }; + } + + defaultProfile.options.anki.server = targetServer; + await invoke("setAllSettings", { value: optionsFull, source: "subminer" }); + return { updated: true, currentServer, targetServer }; + })(); + `; + + try { + const result = await parserWindow.webContents.executeJavaScript(script, true); + const updated = + typeof result === 'object' && + result !== null && + (result as { updated?: unknown }).updated === true; + if (updated) { + logger.info?.(`Updated Yomitan default profile Anki server to ${normalizedTargetServer}`); + return true; + } + return false; + } catch (err) { + logger.error('Failed to sync Yomitan default profile Anki server:', (err as Error).message); + return false; + } +} diff --git a/src/main/runtime/subtitle-tokenization-main-deps.test.ts b/src/main/runtime/subtitle-tokenization-main-deps.test.ts index fc82152..53e2472 100644 --- a/src/main/runtime/subtitle-tokenization-main-deps.test.ts +++ b/src/main/runtime/subtitle-tokenization-main-deps.test.ts @@ -19,6 +19,7 @@ test('tokenizer deps builder records known-word lookups and maps readers', () => isKnownWord: (text) => text === 'known', recordLookup: (hit) => calls.push(`lookup:${hit}`), getKnownWordMatchMode: () => 'surface', + getNPlusOneEnabled: () => true, getMinSentenceWordsForNPlusOne: () => 3, getJlptLevel: () => 'N2', getJlptEnabled: () => true, @@ -33,6 +34,7 @@ test('tokenizer deps builder records known-word lookups and maps readers', () => deps.setYomitanParserWindow(null); deps.setYomitanParserReadyPromise(null); deps.setYomitanParserInitPromise(null); + assert.equal(deps.getNPlusOneEnabled?.(), true); assert.equal(deps.getMinSentenceWordsForNPlusOne?.(), 3); assert.deepEqual(calls, ['lookup:true', 'lookup:false', 'set-window', 'set-ready', 'set-init']); }); diff --git a/src/main/runtime/subtitle-tokenization-main-deps.ts b/src/main/runtime/subtitle-tokenization-main-deps.ts index 7370298..b9381bc 100644 --- a/src/main/runtime/subtitle-tokenization-main-deps.ts +++ b/src/main/runtime/subtitle-tokenization-main-deps.ts @@ -32,6 +32,11 @@ export function createBuildTokenizerDepsMainHandler(deps: TokenizerMainDeps) { return hit; }, getKnownWordMatchMode: () => deps.getKnownWordMatchMode(), + ...(deps.getNPlusOneEnabled + ? { + getNPlusOneEnabled: () => deps.getNPlusOneEnabled!(), + } + : {}), getMinSentenceWordsForNPlusOne: () => deps.getMinSentenceWordsForNPlusOne(), getJlptLevel: (text: string) => deps.getJlptLevel(text), getJlptEnabled: () => deps.getJlptEnabled(),