feat(anki): add proxy transport and tokenizer annotation controls

This commit is contained in:
2026-02-27 21:25:26 -08:00
parent 34a0feae71
commit f8e961d105
26 changed files with 1453 additions and 60 deletions

View File

@@ -0,0 +1,44 @@
---
id: TASK-70
title: >-
Overlay runtime refactor: remove invisible mode and bind visible overlay to
mpv subtitles
status: Done
assignee: []
created_date: '2026-02-28 02:38'
labels: []
dependencies: []
references:
- 'commit:a14c9da'
- 'commit:74554a3'
- 'commit:75442a4'
- 'commit:dde51f8'
- 'commit:9e4e588'
- src/main/overlay-runtime.ts
- src/main/runtime/overlay-mpv-sub-visibility.ts
- src/renderer/renderer.ts
- docs/plans/2026-02-26-secondary-subtitles-main-overlay.md
priority: medium
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Scope: Branch-only commits main..HEAD on refactor-overlay (a14c9da through 9e4e588) rebuilt overlay behavior around visible overlay mode and removed legacy invisible overlay paths.
Delivered behavior:
- Removed renderer invisible overlay layout/offset helpers and main hover-highlight runtime code paths.
- Added explicit overlay-to-mpv subtitle visibility synchronization so visible overlay state controls primary subtitle visibility consistently.
- Hardened overlay runtime/bootstrap lifecycle around modal fallback open state and bridge send path edge cases.
- Updated plugin/config/docs defaults to reflect visible-overlay-first behavior and subtitle binding controls.
Risk/impact context:
- Large cross-layer refactor touching runtime wiring, renderer event handling, and plugin behavior.
- Regression coverage added/updated for overlay runtime, mpv protocol handling, renderer cleanup, and subtitle rendering paths.
<!-- SECTION:DESCRIPTION:END -->
## Final Summary
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
Completed and validated in branch commit set before merge. Refactor reduces dead overlay modes, centralizes subtitle visibility behavior, and documents new defaults/constraints.
<!-- SECTION:FINAL_SUMMARY:END -->

View File

@@ -0,0 +1,44 @@
---
id: TASK-71
title: >-
Anki integration: add local AnkiConnect proxy transport for push-based
auto-enrichment
status: Done
assignee: []
created_date: '2026-02-28 02:38'
labels: []
dependencies: []
references:
- src/anki-integration/anki-connect-proxy.ts
- src/anki-integration/anki-connect-proxy.test.ts
- src/anki-integration.ts
- src/config/resolve/anki-connect.ts
- src/core/services/tokenizer/yomitan-parser-runtime.ts
- src/core/services/tokenizer/yomitan-parser-runtime.test.ts
- docs/anki-integration.md
- config.example.jsonc
priority: medium
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Scope: Current unmerged working-tree changes implement an optional local AnkiConnect-compatible proxy and transport switching for card enrichment.
Delivered behavior:
- Added proxy server that forwards AnkiConnect requests and enqueues addNote/addNotes note IDs for post-create enrichment, with de-duplication and loop-configuration protection.
- Added config schema/defaults/resolution for ankiConnect.proxy (enabled, host, port, upstreamUrl) with validation warnings and fallback behavior.
- Runtime now supports transport switching (polling vs proxy) and restarts transport when runtime config patches change transport keys.
- Added Yomitan default-profile server sync helper to keep bundled parser profile aligned with configured Anki endpoint.
- Updated user docs/config examples for proxy mode setup, troubleshooting, and mining workflow behavior.
Risk/impact context:
- New network surface on local host/port; correctness depends on safe proxy upstream configuration and robust response handling.
- Tests added for proxy queue behavior, config resolution, and parser sync routines.
<!-- SECTION:DESCRIPTION:END -->
## Final Summary
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
Completed implementation in branch working tree; ready to merge once local changes are committed and test gate passes.
<!-- SECTION:FINAL_SUMMARY:END -->

View File

@@ -0,0 +1,35 @@
---
id: TASK-72
title: 'macOS config validation UX: show full warning details in native dialog'
status: Done
assignee: []
created_date: '2026-02-28 02:38'
labels: []
dependencies: []
references:
- 'commit:cc2f9ef'
- src/main/config-validation.ts
- src/main/runtime/startup-config.ts
- docs/configuration.md
priority: low
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Scope: Commit cc2f9ef improves startup config-warning visibility on macOS by ensuring full details are surfaced in the native UI path and reflected in docs.
Delivered behavior:
- Config validation/runtime wiring updated so macOS users can access complete warning details instead of truncated notification-only text.
- Added/updated tests around config validation and startup config warning flows.
- Updated configuration docs to clarify platform-specific warning presentation behavior.
Risk/impact context:
- Low runtime risk; primarily user-facing diagnostics clarity improvement.
<!-- SECTION:DESCRIPTION:END -->
## Final Summary
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
Completed small follow-up fix to reduce config-debug friction on macOS.
<!-- SECTION:FINAL_SUMMARY:END -->

View File

@@ -12,13 +12,6 @@
// ==========================================
"auto_start_overlay": false, // When overlay connects to mpv, automatically show overlay and hide mpv subtitles. Values: true | false
// ==========================================
// Visible Overlay Subtitle Binding
// Control whether visible overlay toggles also toggle MPV subtitle visibility.
// When enabled, visible overlay hides MPV subtitles; when disabled, MPV subtitles are left unchanged.
// ==========================================
"bind_visible_overlay_to_mpv_sub_visibility": true, // Link visible overlay toggles to MPV primary subtitle visibility. Values: true | false
// ==========================================
// Texthooker Server
// Control whether browser opens automatically for texthooker.
@@ -179,6 +172,12 @@
"enabled": false, // Enable AnkiConnect integration. Values: true | false
"url": "http://127.0.0.1:8765", // Url setting.
"pollingRate": 3000, // Polling interval in milliseconds.
"proxy": {
"enabled": false, // Enable local AnkiConnect-compatible proxy for push-based auto-enrichment. Values: true | false
"host": "127.0.0.1", // Bind host for local AnkiConnect proxy.
"port": 8766, // Bind port for local AnkiConnect proxy.
"upstreamUrl": "http://127.0.0.1:8765" // Upstream AnkiConnect URL proxied by local AnkiConnect proxy.
}, // Proxy setting.
"tags": [
"SubMiner"
], // Tags to add to cards mined or updated by SubMiner. Provide an empty array to disable automatic tagging.

View File

@@ -22,7 +22,7 @@ make docs-preview # Preview built site at http://localhost:4173
- [Configuration](/configuration) — Full config file reference and option details
- [Keyboard Shortcuts](/shortcuts) — All global, overlay, mining, and plugin chord shortcuts in one place
- [Anki Integration](/anki-integration) — AnkiConnect setup, field mapping, media generation, field grouping
- [Anki Integration](/anki-integration) — AnkiConnect setup, proxy/polling transport, field mapping, media generation, field grouping
- [Jellyfin Integration](/jellyfin-integration) — Optional Jellyfin auth, cast discovery, remote control, and playback launch
- [Immersion Tracking](/immersion-tracking) — SQLite schema, retention/rollup policies, query templates, and extension points
- [Performance & Tuning](/troubleshooting#performance-and-resource-impact) — Resource usage and practical low-impact profile

View File

@@ -10,9 +10,14 @@ SubMiner uses the [AnkiConnect](https://ankiweb.net/shared/info/2055492159) add-
AnkiConnect listens on `http://127.0.0.1:8765` by default. If you changed the port in AnkiConnect's settings, update `ankiConnect.url` in your SubMiner config.
## How Polling Works
## Auto-Enrichment Transport
SubMiner polls AnkiConnect at a regular interval (default: 3 seconds, configurable via `ankiConnect.pollingRate`) to detect new cards. When it finds a card that was added since the last poll:
SubMiner supports two auto-enrichment transport modes:
1. `polling` (default): polls AnkiConnect at `ankiConnect.pollingRate` (default: 3s).
2. `proxy` (optional): runs a local AnkiConnect-compatible proxy and enriches cards immediately after successful `addNote` / `addNotes` responses.
In both modes, the enrichment workflow is the same:
1. Checks if a duplicate expression already exists (for field grouping).
2. Updates the sentence field with the current subtitle.
@@ -20,7 +25,32 @@ SubMiner polls AnkiConnect at a regular interval (default: 3 seconds, configurab
4. Fills the translation field from the secondary subtitle or AI.
5. Writes metadata to the miscInfo field.
Polling uses the query `"deck:<your-deck>" added:1` to find recently added cards. If no deck is configured, it searches all decks.
Polling mode uses the query `"deck:<your-deck>" added:1` to find recently added cards. If no deck is configured, it searches all decks.
### Proxy Mode Setup (Yomitan / Texthooker)
```jsonc
"ankiConnect": {
"url": "http://127.0.0.1:8765", // real AnkiConnect
"proxy": {
"enabled": true,
"host": "127.0.0.1",
"port": 8766,
"upstreamUrl": "http://127.0.0.1:8765"
}
}
```
Then point Yomitan/clients to `http://127.0.0.1:8766` instead of `8765`.
When SubMiner loads the bundled Yomitan extension, it also attempts to update the **default Yomitan profile** (`profiles[0].options.anki.server`) to the active SubMiner endpoint:
- proxy URL when `ankiConnect.proxy.enabled` is `true`
- direct `ankiConnect.url` when proxy mode is disabled
To avoid clobbering custom setups, this auto-update only changes the default profile when its current server is blank or the stock Yomitan default (`http://127.0.0.1:8765`).
For browser-based Yomitan or other external clients (for example texthooker in a normal browser profile), set their Anki server to the same proxy URL separately.
## Field Mapping
@@ -214,6 +244,12 @@ When you mine the same word multiple times, SubMiner can merge the cards instead
"enabled": true,
"url": "http://127.0.0.1:8765",
"pollingRate": 3000,
"proxy": {
"enabled": false,
"host": "127.0.0.1",
"port": 8766,
"upstreamUrl": "http://127.0.0.1:8765"
},
"fields": {
"audio": "ExpressionAudio",
"image": "Picture",

View File

@@ -47,6 +47,8 @@ Malformed config syntax (invalid JSON/JSONC) is startup-blocking: SubMiner shows
For valid JSON/JSONC with invalid option values, SubMiner uses warn-and-fallback behavior: it logs the bad key/value and continues with the default for that option.
On macOS, these validation warnings also open a native dialog with full details (desktop notification banners can truncate long messages).
### Hot-Reload Behavior
SubMiner watches the active config file (`config.jsonc` or `config.json`) while running and applies supported updates automatically.
@@ -87,6 +89,7 @@ The configuration file includes several main sections:
- [**Subtitle Style**](#subtitle-style) - Appearance customization
- [**Texthooker**](#texthooker) - Control browser opening behavior
- [**WebSocket Server**](#websocket-server) - Built-in subtitle broadcasting server
- [**Startup Warmups**](#startup-warmups) - Control what preloads on startup vs first-use defer
- [**Immersion Tracking**](#immersion-tracking) - Track subtitle sessions and mining activity in SQLite
- [**YouTube Subtitle Generation**](#youtube-subtitle-generation) - Launcher defaults for yt-dlp + local whisper fallback
@@ -826,6 +829,32 @@ See `config.example.jsonc` for detailed configuration options.
| `enabled` | `true`, `false`, `"auto"` | `"auto"` (default) disables if mpv_websocket is detected |
| `port` | number | WebSocket server port (default: 6677) |
### Startup Warmups
Control which startup warmups run in the background versus deferring to first real usage:
```json
{
"startupWarmups": {
"lowPowerMode": false,
"mecab": true,
"yomitanExtension": true,
"subtitleDictionaries": true,
"jellyfinRemoteSession": true
}
}
```
| Option | Values | Description |
| ------------------------ | --------------- | ------------------------------------------------------------------------------------------------ |
| `lowPowerMode` | `true`, `false` | Defer all warmups except Yomitan extension |
| `mecab` | `true`, `false` | Warm up MeCab tokenizer at startup |
| `yomitanExtension` | `true`, `false` | Warm up Yomitan extension at startup |
| `subtitleDictionaries` | `true`, `false` | Warm up JLPT + frequency dictionaries at startup |
| `jellyfinRemoteSession` | `true`, `false` | Warm up Jellyfin remote session at startup (still requires Jellyfin remote auto-connect settings) |
Defaults warm everything (`true` for all toggles, `lowPowerMode: false`). Setting a warmup toggle to `false` defers that work until first usage.
### Immersion Tracking
Enable or disable local immersion analytics stored in SQLite for mined subtitles and media sessions:

View File

@@ -20,7 +20,7 @@ SubMiner prioritizes subtitle responsiveness over heavy initialization:
1. The first subtitle render is **plain text first** (no tokenization wait).
2. Tokenized enrichment (word spans, known-word flags, JLPT/frequency metadata) is applied right after parsing completes.
3. Under rapid subtitle churn, SubMiner uses a **latest-only tokenization queue** so stale lines are dropped instead of building lag.
4. MeCab, Yomitan extension load, and dictionary prewarm run as background warmups after overlay initialization.
4. MeCab, Yomitan extension load, and dictionary prewarm run as background warmups after overlay initialization (configurable via `startupWarmups`, including low-power mode).
This keeps early playback snappy and avoids mpv-side sluggishness while startup work completes.
@@ -72,11 +72,13 @@ There are three ways to create cards, depending on your workflow.
### 1. Auto-Update from Yomitan
This is the most common flow. Yomitan creates a card in Anki, and SubMiner detects it via polling and enriches it automatically.
This is the most common flow. Yomitan creates a card in Anki, and SubMiner enriches it automatically.
1. Click a word → Yomitan popup appears.
2. Click the Anki icon in Yomitan to add the word.
3. SubMiner detects the new card (polls AnkiConnect every 3 seconds by default).
3. SubMiner receives or detects the new card:
- **Proxy mode** (`ankiConnect.proxy.enabled: true`): immediate enrich after successful `addNote` / `addNotes`.
- **Polling mode** (default): detects via AnkiConnect polling (`ankiConnect.pollingRate`, default 3 seconds).
4. SubMiner updates the card with:
- **Sentence**: The current subtitle line.
- **Audio**: Extracted from the video using the subtitle's start/end timing (plus configurable padding).
@@ -95,7 +97,7 @@ If you prefer a hands-on approach (animecards-style), you can copy the current s
- For multiple lines: press `Ctrl/Cmd+Shift+C`, then a digit `1``9` to select how many recent subtitle lines to combine. The combined text is copied to the clipboard.
3. Press `Ctrl/Cmd+V` to update the last-added card with the clipboard contents plus audio, image, and translation — the same fields auto-update would fill.
This is useful when auto-update polling is disabled or when you want explicit control over which subtitle line gets attached to the card.
This is useful when auto-update is disabled or when you want explicit control over which subtitle line gets attached to the card.
| Shortcut | Action | Config key |
| --------------------------- | ----------------------------------------- | ------------------------------------- |

View File

@@ -12,13 +12,6 @@
// ==========================================
"auto_start_overlay": false, // When overlay connects to mpv, automatically show overlay and hide mpv subtitles. Values: true | false
// ==========================================
// Visible Overlay Subtitle Binding
// Control whether visible overlay toggles also toggle MPV subtitle visibility.
// When enabled, visible overlay hides MPV subtitles; when disabled, MPV subtitles are left unchanged.
// ==========================================
"bind_visible_overlay_to_mpv_sub_visibility": true, // Link visible overlay toggles to MPV primary subtitle visibility. Values: true | false
// ==========================================
// Texthooker Server
// Control whether browser opens automatically for texthooker.
@@ -179,6 +172,12 @@
"enabled": false, // Enable AnkiConnect integration. Values: true | false
"url": "http://127.0.0.1:8765", // Url setting.
"pollingRate": 3000, // Polling interval in milliseconds.
"proxy": {
"enabled": false, // Enable local AnkiConnect-compatible proxy for push-based auto-enrichment. Values: true | false
"host": "127.0.0.1", // Bind host for local AnkiConnect proxy.
"port": 8766, // Bind port for local AnkiConnect proxy.
"upstreamUrl": "http://127.0.0.1:8765" // Upstream AnkiConnect URL proxied by local AnkiConnect proxy.
}, // Proxy setting.
"tags": [
"SubMiner"
], // Tags to add to cards mined or updated by SubMiner. Provide an empty array to disable automatic tagging.

View File

@@ -30,7 +30,7 @@ SubMiner retries the connection automatically with increasing delays (200 ms, 50
- first subtitle parse/tokenization bursts
- media generation (`ffmpeg` audio/image and AVIF paths)
- media sync and subtitle tooling (`alass`, `ffsubsync`, `whisper` fallback path)
- `ankiConnect` enrichment and frequent polling
- `ankiConnect` enrichment (plus polling overhead when proxy mode is disabled)
### If playback feels sluggish
@@ -104,11 +104,17 @@ Logged when a malformed JSON line arrives from the mpv socket. Usually harmless
**"AnkiConnect: unable to connect"**
SubMiner polls AnkiConnect at `http://127.0.0.1:8765` (configurable via `ankiConnect.url`). This error means Anki is not running or the AnkiConnect add-on is not installed.
SubMiner connects to the active Anki endpoint:
- `ankiConnect.url` (direct mode, default `http://127.0.0.1:8765`)
- `http://<ankiConnect.proxy.host>:<ankiConnect.proxy.port>` (proxy mode)
This error means the active endpoint is unavailable, or (in proxy mode) the proxy cannot reach `ankiConnect.proxy.upstreamUrl`.
- Install the [AnkiConnect](https://ankiweb.net/shared/info/2055492159) add-on in Anki.
- Make sure Anki is running before you start mining.
- If you changed the AnkiConnect port, update `ankiConnect.url` in your config.
- If you changed the AnkiConnect port, update `ankiConnect.url` (or `ankiConnect.proxy.upstreamUrl` if using proxy mode).
- If using external Yomitan/browser clients, confirm they point to your SubMiner proxy URL.
SubMiner retries with exponential backoff (up to 5 s) and suppresses repeated error logs after 5 consecutive failures. When Anki comes back, you will see "AnkiConnect connection restored".
@@ -122,7 +128,7 @@ See [Anki Integration](/anki-integration) for the full field mapping reference.
Shown when SubMiner tries to update a card that no longer exists, or when AnkiConnect rejects the update. Common causes:
- The card was deleted in Anki between polling and update.
- The card was deleted in Anki between creation and enrichment update.
- The note type changed and a mapped field no longer exists.
## Overlay

View File

@@ -0,0 +1,133 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { AnkiConnectProxyServer } from './anki-connect-proxy';
async function waitForCondition(
condition: () => boolean,
timeoutMs = 2000,
intervalMs = 10,
): Promise<void> {
const startedAt = Date.now();
while (Date.now() - startedAt < timeoutMs) {
if (condition()) return;
await new Promise((resolve) => setTimeout(resolve, intervalMs));
}
throw new Error('Timed out waiting for condition');
}
test('proxy enqueues addNote result for enrichment', async () => {
const processed: number[] = [];
const proxy = new AnkiConnectProxyServer({
shouldAutoUpdateNewCards: () => true,
processNewCard: async (noteId) => {
processed.push(noteId);
},
logInfo: () => undefined,
logWarn: () => undefined,
logError: () => undefined,
});
(proxy as unknown as {
maybeEnqueueFromRequest: (request: Record<string, unknown>, responseBody: Buffer) => void;
}).maybeEnqueueFromRequest(
{ action: 'addNote' },
Buffer.from(JSON.stringify({ result: 42, error: null }), 'utf8'),
);
await waitForCondition(() => processed.length === 1);
assert.deepEqual(processed, [42]);
});
test('proxy de-duplicates addNotes IDs within the same response', async () => {
const processed: number[] = [];
const proxy = new AnkiConnectProxyServer({
shouldAutoUpdateNewCards: () => true,
processNewCard: async (noteId) => {
processed.push(noteId);
await new Promise((resolve) => setTimeout(resolve, 5));
},
logInfo: () => undefined,
logWarn: () => undefined,
logError: () => undefined,
});
(proxy as unknown as {
maybeEnqueueFromRequest: (request: Record<string, unknown>, responseBody: Buffer) => void;
}).maybeEnqueueFromRequest(
{ action: 'addNotes' },
Buffer.from(JSON.stringify({ result: [101, 102, 101, null], error: null }), 'utf8'),
);
await waitForCondition(() => processed.length === 2);
assert.deepEqual(processed, [101, 102]);
});
test('proxy skips auto-enrichment when auto-update is disabled', async () => {
const processed: number[] = [];
const proxy = new AnkiConnectProxyServer({
shouldAutoUpdateNewCards: () => false,
processNewCard: async (noteId) => {
processed.push(noteId);
},
logInfo: () => undefined,
logWarn: () => undefined,
logError: () => undefined,
});
(proxy as unknown as {
maybeEnqueueFromRequest: (request: Record<string, unknown>, responseBody: Buffer) => void;
}).maybeEnqueueFromRequest(
{ action: 'addNote' },
Buffer.from(JSON.stringify({ result: 303, error: null }), 'utf8'),
);
await new Promise((resolve) => setTimeout(resolve, 30));
assert.deepEqual(processed, []);
});
test('proxy ignores addNote when upstream response reports error', async () => {
const processed: number[] = [];
const proxy = new AnkiConnectProxyServer({
shouldAutoUpdateNewCards: () => true,
processNewCard: async (noteId) => {
processed.push(noteId);
},
logInfo: () => undefined,
logWarn: () => undefined,
logError: () => undefined,
});
(proxy as unknown as {
maybeEnqueueFromRequest: (request: Record<string, unknown>, responseBody: Buffer) => void;
}).maybeEnqueueFromRequest(
{ action: 'addNote' },
Buffer.from(JSON.stringify({ result: 123, error: 'duplicate' }), 'utf8'),
);
await new Promise((resolve) => setTimeout(resolve, 30));
assert.deepEqual(processed, []);
});
test('proxy detects self-referential loop configuration', () => {
const proxy = new AnkiConnectProxyServer({
shouldAutoUpdateNewCards: () => true,
processNewCard: async () => undefined,
logInfo: () => undefined,
logWarn: () => undefined,
logError: () => undefined,
});
const result = (proxy as unknown as {
isSelfReferentialProxy: (options: {
host: string;
port: number;
upstreamUrl: string;
}) => boolean;
}).isSelfReferentialProxy({
host: '127.0.0.1',
port: 8766,
upstreamUrl: 'http://localhost:8766',
});
assert.equal(result, true);
});

View File

@@ -0,0 +1,314 @@
import http, { IncomingMessage, ServerResponse } from 'node:http';
import axios, { AxiosInstance } from 'axios';
interface StartProxyOptions {
host: string;
port: number;
upstreamUrl: string;
}
interface AnkiConnectEnvelope {
result: unknown;
error: unknown;
}
export interface AnkiConnectProxyServerDeps {
shouldAutoUpdateNewCards: () => boolean;
processNewCard: (noteId: number) => Promise<void>;
logInfo: (message: string, ...args: unknown[]) => void;
logWarn: (message: string, ...args: unknown[]) => void;
logError: (message: string, ...args: unknown[]) => void;
}
export class AnkiConnectProxyServer {
private server: http.Server | null = null;
private client: AxiosInstance;
private pendingNoteIds: number[] = [];
private pendingNoteIdSet = new Set<number>();
private inFlightNoteIds = new Set<number>();
private processingQueue = false;
constructor(private readonly deps: AnkiConnectProxyServerDeps) {
this.client = axios.create({
timeout: 15000,
validateStatus: () => true,
responseType: 'arraybuffer',
});
}
get isRunning(): boolean {
return this.server !== null;
}
start(options: StartProxyOptions): void {
this.stop();
if (this.isSelfReferentialProxy(options)) {
this.deps.logError(
'[anki-proxy] Proxy upstream points to proxy host/port; refusing to start to avoid loop.',
);
return;
}
this.server = http.createServer((req, res) => {
void this.handleRequest(req, res, options.upstreamUrl);
});
this.server.on('error', (error) => {
this.deps.logError('[anki-proxy] Server error:', (error as Error).message);
});
this.server.listen(options.port, options.host, () => {
this.deps.logInfo(
`[anki-proxy] Listening on http://${options.host}:${options.port} -> ${options.upstreamUrl}`,
);
});
}
stop(): void {
if (this.server) {
this.server.close();
this.server = null;
this.deps.logInfo('[anki-proxy] Stopped');
}
this.pendingNoteIds = [];
this.pendingNoteIdSet.clear();
this.inFlightNoteIds.clear();
this.processingQueue = false;
}
private isSelfReferentialProxy(options: StartProxyOptions): boolean {
try {
const upstream = new URL(options.upstreamUrl);
const normalizedUpstreamHost = upstream.hostname.toLowerCase();
const normalizedBindHost = options.host.toLowerCase();
const upstreamPort =
upstream.port.length > 0
? Number(upstream.port)
: upstream.protocol === 'https:'
? 443
: 80;
const hostMatches =
normalizedUpstreamHost === normalizedBindHost ||
(normalizedUpstreamHost === 'localhost' && normalizedBindHost === '127.0.0.1') ||
(normalizedUpstreamHost === '127.0.0.1' && normalizedBindHost === 'localhost');
return hostMatches && upstreamPort === options.port;
} catch {
return false;
}
}
private async handleRequest(
req: IncomingMessage,
res: ServerResponse<IncomingMessage>,
upstreamUrl: string,
): Promise<void> {
this.setCorsHeaders(res);
if (req.method === 'OPTIONS') {
res.statusCode = 204;
res.end();
return;
}
if (!req.method || (req.method !== 'GET' && req.method !== 'POST')) {
res.statusCode = 405;
res.end('Method Not Allowed');
return;
}
let rawBody: Buffer = Buffer.alloc(0);
if (req.method === 'POST') {
rawBody = await this.readRequestBody(req);
}
let requestJson: Record<string, unknown> | null = null;
if (req.method === 'POST' && rawBody.length > 0) {
requestJson = this.tryParseJson(rawBody);
}
try {
const targetUrl = new URL(req.url || '/', upstreamUrl).toString();
const contentType =
typeof req.headers['content-type'] === 'string'
? req.headers['content-type']
: 'application/json';
const upstreamResponse = await this.client.request<ArrayBuffer>({
url: targetUrl,
method: req.method,
data: req.method === 'POST' ? rawBody : undefined,
headers: {
'content-type': contentType,
},
});
const responseBody: Buffer = Buffer.isBuffer(upstreamResponse.data)
? upstreamResponse.data
: Buffer.from(new Uint8Array(upstreamResponse.data));
this.copyUpstreamHeaders(res, upstreamResponse.headers as Record<string, unknown>);
res.statusCode = upstreamResponse.status;
res.end(responseBody);
if (req.method === 'POST') {
this.maybeEnqueueFromRequest(requestJson, responseBody);
}
} catch (error) {
this.deps.logWarn('[anki-proxy] Failed to forward request:', (error as Error).message);
res.statusCode = 502;
res.end('Bad Gateway');
}
}
private maybeEnqueueFromRequest(
requestJson: Record<string, unknown> | null,
responseBody: Buffer,
): void {
if (!requestJson || !this.deps.shouldAutoUpdateNewCards()) {
return;
}
const action =
typeof requestJson.action === 'string' ? requestJson.action : String(requestJson.action ?? '');
if (action !== 'addNote' && action !== 'addNotes') {
return;
}
const responseJson = this.tryParseJson(responseBody) as AnkiConnectEnvelope | null;
if (!responseJson || responseJson.error !== null) {
return;
}
const noteIds =
action === 'addNote'
? this.collectSingleResultId(responseJson.result)
: this.collectBatchResultIds(responseJson.result);
if (noteIds.length === 0) {
return;
}
this.enqueueNotes(noteIds);
}
private collectSingleResultId(value: unknown): number[] {
if (typeof value === 'number' && Number.isInteger(value) && value > 0) {
return [value];
}
return [];
}
private collectBatchResultIds(value: unknown): number[] {
if (!Array.isArray(value)) {
return [];
}
return value.filter((entry): entry is number => {
return typeof entry === 'number' && Number.isInteger(entry) && entry > 0;
});
}
private enqueueNotes(noteIds: number[]): void {
let enqueuedCount = 0;
for (const noteId of noteIds) {
if (this.pendingNoteIdSet.has(noteId) || this.inFlightNoteIds.has(noteId)) {
continue;
}
this.pendingNoteIds.push(noteId);
this.pendingNoteIdSet.add(noteId);
enqueuedCount += 1;
}
if (enqueuedCount === 0) {
return;
}
this.deps.logInfo(`[anki-proxy] Enqueued ${enqueuedCount} note(s) for enrichment`);
this.processQueue();
}
private processQueue(): void {
if (this.processingQueue) {
return;
}
this.processingQueue = true;
void (async () => {
try {
while (this.pendingNoteIds.length > 0) {
const noteId = this.pendingNoteIds.shift();
if (noteId === undefined) {
continue;
}
this.pendingNoteIdSet.delete(noteId);
if (!this.deps.shouldAutoUpdateNewCards()) {
continue;
}
this.inFlightNoteIds.add(noteId);
try {
await this.deps.processNewCard(noteId);
} catch (error) {
this.deps.logWarn(
`[anki-proxy] Failed to auto-enrich note ${noteId}:`,
(error as Error).message,
);
} finally {
this.inFlightNoteIds.delete(noteId);
}
}
} finally {
this.processingQueue = false;
if (this.pendingNoteIds.length > 0) {
this.processQueue();
}
}
})();
}
private async readRequestBody(req: IncomingMessage): Promise<Buffer> {
const chunks: Buffer[] = [];
for await (const chunk of req) {
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
}
return Buffer.concat(chunks);
}
private tryParseJson(rawBody: Buffer): Record<string, unknown> | null {
if (rawBody.length === 0) {
return null;
}
try {
const parsed = JSON.parse(rawBody.toString('utf8'));
return parsed && typeof parsed === 'object' ? (parsed as Record<string, unknown>) : null;
} catch {
return null;
}
}
private setCorsHeaders(res: ServerResponse<IncomingMessage>): void {
res.setHeader('Access-Control-Allow-Origin', '*');
res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
res.setHeader('Access-Control-Allow-Methods', 'POST, GET, OPTIONS');
}
private copyUpstreamHeaders(
res: ServerResponse<IncomingMessage>,
headers: Record<string, unknown>,
): void {
for (const [key, value] of Object.entries(headers)) {
if (value === undefined) {
continue;
}
if (key.toLowerCase() === 'content-length') {
continue;
}
if (Array.isArray(value)) {
res.setHeader(
key,
value.map((entry) => String(entry)),
);
} else {
res.setHeader(key, String(value));
}
}
}
}

View File

@@ -8,6 +8,12 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick<
enabled: false,
url: 'http://127.0.0.1:8765',
pollingRate: 3000,
proxy: {
enabled: false,
host: '127.0.0.1',
port: 8766,
upstreamUrl: 'http://127.0.0.1:8765',
},
tags: ['SubMiner'],
fields: {
audio: 'ExpressionAudio',

View File

@@ -18,6 +18,30 @@ export function buildIntegrationConfigOptionRegistry(
defaultValue: defaultConfig.ankiConnect.pollingRate,
description: 'Polling interval in milliseconds.',
},
{
path: 'ankiConnect.proxy.enabled',
kind: 'boolean',
defaultValue: defaultConfig.ankiConnect.proxy.enabled,
description: 'Enable local AnkiConnect-compatible proxy for push-based auto-enrichment.',
},
{
path: 'ankiConnect.proxy.host',
kind: 'string',
defaultValue: defaultConfig.ankiConnect.proxy.host,
description: 'Bind host for local AnkiConnect proxy.',
},
{
path: 'ankiConnect.proxy.port',
kind: 'number',
defaultValue: defaultConfig.ankiConnect.proxy.port,
description: 'Bind port for local AnkiConnect proxy.',
},
{
path: 'ankiConnect.proxy.upstreamUrl',
kind: 'string',
defaultValue: defaultConfig.ankiConnect.proxy.upstreamUrl,
description: 'Upstream AnkiConnect URL proxied by local AnkiConnect proxy.',
},
{
path: 'ankiConnect.tags',
kind: 'array',

View File

@@ -66,3 +66,44 @@ test('warns and falls back for invalid nPlusOne.decks entries', () => {
);
assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.decks'));
});
test('accepts valid proxy settings', () => {
const { context, warnings } = makeContext({
proxy: {
enabled: true,
host: '127.0.0.1',
port: 9999,
upstreamUrl: 'http://127.0.0.1:8765',
},
});
applyAnkiConnectResolution(context);
assert.equal(context.resolved.ankiConnect.proxy.enabled, true);
assert.equal(context.resolved.ankiConnect.proxy.host, '127.0.0.1');
assert.equal(context.resolved.ankiConnect.proxy.port, 9999);
assert.equal(context.resolved.ankiConnect.proxy.upstreamUrl, 'http://127.0.0.1:8765');
assert.equal(
warnings.some((warning) => warning.path.startsWith('ankiConnect.proxy')),
false,
);
});
test('warns and falls back for invalid proxy settings', () => {
const { context, warnings } = makeContext({
proxy: {
enabled: 'yes',
host: '',
port: -1,
upstreamUrl: '',
},
});
applyAnkiConnectResolution(context);
assert.deepEqual(context.resolved.ankiConnect.proxy, DEFAULT_CONFIG.ankiConnect.proxy);
assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.enabled'));
assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.host'));
assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.port'));
assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.proxy.upstreamUrl'));
});

View File

@@ -12,6 +12,7 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
const fields = isObject(ac.fields) ? (ac.fields as Record<string, unknown>) : {};
const media = isObject(ac.media) ? (ac.media as Record<string, unknown>) : {};
const metadata = isObject(ac.metadata) ? (ac.metadata as Record<string, unknown>) : {};
const proxy = isObject(ac.proxy) ? (ac.proxy as Record<string, unknown>) : {};
const aiSource = isObject(ac.ai) ? ac.ai : isObject(ac.openRouter) ? ac.openRouter : {};
const legacyKeys = new Set([
'audioField',
@@ -85,6 +86,9 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
? (ac.behavior as (typeof context.resolved)['ankiConnect']['behavior'])
: {}),
},
proxy: {
...context.resolved.ankiConnect.proxy,
},
metadata: {
...context.resolved.ankiConnect.metadata,
...(isObject(ac.metadata)
@@ -153,6 +157,68 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
);
}
if (isObject(ac.proxy)) {
const proxyEnabled = asBoolean(proxy.enabled);
if (proxyEnabled !== undefined) {
context.resolved.ankiConnect.proxy.enabled = proxyEnabled;
} else if (proxy.enabled !== undefined) {
context.warn(
'ankiConnect.proxy.enabled',
proxy.enabled,
context.resolved.ankiConnect.proxy.enabled,
'Expected boolean.',
);
}
const proxyHost = asString(proxy.host);
if (proxyHost !== undefined && proxyHost.trim().length > 0) {
context.resolved.ankiConnect.proxy.host = proxyHost.trim();
} else if (proxy.host !== undefined) {
context.warn(
'ankiConnect.proxy.host',
proxy.host,
context.resolved.ankiConnect.proxy.host,
'Expected non-empty string.',
);
}
const proxyUpstreamUrl = asString(proxy.upstreamUrl);
if (proxyUpstreamUrl !== undefined && proxyUpstreamUrl.trim().length > 0) {
context.resolved.ankiConnect.proxy.upstreamUrl = proxyUpstreamUrl.trim();
} else if (proxy.upstreamUrl !== undefined) {
context.warn(
'ankiConnect.proxy.upstreamUrl',
proxy.upstreamUrl,
context.resolved.ankiConnect.proxy.upstreamUrl,
'Expected non-empty string.',
);
}
const proxyPort = asNumber(proxy.port);
if (
proxyPort !== undefined &&
Number.isInteger(proxyPort) &&
proxyPort >= 1 &&
proxyPort <= 65535
) {
context.resolved.ankiConnect.proxy.port = proxyPort;
} else if (proxy.port !== undefined) {
context.warn(
'ankiConnect.proxy.port',
proxy.port,
context.resolved.ankiConnect.proxy.port,
'Expected integer between 1 and 65535.',
);
}
} else if (ac.proxy !== undefined) {
context.warn(
'ankiConnect.proxy',
ac.proxy,
context.resolved.ankiConnect.proxy,
'Expected object.',
);
}
if (Array.isArray(ac.tags)) {
const normalizedTags = ac.tags
.filter((entry): entry is string => typeof entry === 'string')

View File

@@ -41,7 +41,13 @@ test('initializeOverlayRuntime skips Anki integration when ankiConnect.enabled i
setIntegrationCalls += 1;
},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => 'auto',
createFieldGroupingCallback: () =>
async () => ({
keepNoteId: 1,
deleteNoteId: 2,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
});
@@ -90,7 +96,13 @@ test('initializeOverlayRuntime starts Anki integration when ankiConnect.enabled
setIntegrationCalls += 1;
},
showDesktopNotification: () => {},
createFieldGroupingCallback: () => async () => 'manual',
createFieldGroupingCallback: () =>
async () => ({
keepNoteId: 3,
deleteNoteId: 4,
deleteDuplicate: false,
cancelled: false,
}),
getKnownWordCacheStatePath: () => '/tmp/known-words-cache.json',
});

View File

@@ -1696,3 +1696,169 @@ test('createTokenizerDepsRuntime checks MeCab availability before first tokenize
assert.equal(first?.[0]?.surface, '仮面');
assert.equal(second?.[0]?.surface, '仮面');
});
test('tokenizeSubtitle uses async MeCab enrichment override when provided', async () => {
const result = await tokenizeSubtitle(
'猫',
makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], {
tokenizeWithMecab: async () => [
{
headword: '猫',
surface: '猫',
reading: 'ネコ',
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
isMerged: true,
isKnown: false,
isNPlusOneTarget: false,
},
],
enrichTokensWithMecab: async (tokens) =>
tokens.map((token) => ({
...token,
pos1: 'override-pos',
})),
}),
);
assert.equal(result.tokens?.length, 1);
assert.equal(result.tokens?.[0]?.pos1, 'override-pos');
});
test('createTokenizerDepsRuntime exposes async MeCab enrichment helper', async () => {
const deps = createTokenizerDepsRuntime({
getYomitanExt: () => null,
getYomitanParserWindow: () => null,
setYomitanParserWindow: () => {},
getYomitanParserReadyPromise: () => null,
setYomitanParserReadyPromise: () => {},
getYomitanParserInitPromise: () => null,
setYomitanParserInitPromise: () => {},
isKnownWord: () => false,
getKnownWordMatchMode: () => 'headword',
getJlptLevel: () => null,
getMecabTokenizer: () => null,
});
const enriched = await deps.enrichTokensWithMecab?.(
[
{
headword: 'は',
surface: 'は',
reading: 'は',
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.other,
isMerged: true,
isKnown: false,
isNPlusOneTarget: false,
},
],
[
{
headword: 'は',
surface: 'は',
reading: 'ハ',
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
],
);
assert.equal(enriched?.[0]?.pos1, '助詞');
});
test('tokenizeSubtitle skips all enrichment stages when disabled', async () => {
let knownCalls = 0;
let mecabCalls = 0;
let jlptCalls = 0;
let frequencyCalls = 0;
const result = await tokenizeSubtitle(
'猫',
makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], {
isKnownWord: () => {
knownCalls += 1;
return true;
},
getNPlusOneEnabled: () => false,
getJlptEnabled: () => false,
getFrequencyDictionaryEnabled: () => false,
getJlptLevel: () => {
jlptCalls += 1;
return 'N5';
},
getFrequencyRank: () => {
frequencyCalls += 1;
return 10;
},
tokenizeWithMecab: async () => {
mecabCalls += 1;
return null;
},
}),
);
assert.equal(result.tokens?.length, 1);
assert.equal(result.tokens?.[0]?.isKnown, false);
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, false);
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
assert.equal(knownCalls, 0);
assert.equal(mecabCalls, 0);
assert.equal(jlptCalls, 0);
assert.equal(frequencyCalls, 0);
});
test('tokenizeSubtitle keeps frequency enrichment while n+1 is disabled', async () => {
let knownCalls = 0;
let mecabCalls = 0;
let frequencyCalls = 0;
const result = await tokenizeSubtitle(
'猫',
makeDepsFromYomitanTokens([{ surface: '猫', reading: 'ねこ', headword: '猫' }], {
isKnownWord: () => {
knownCalls += 1;
return true;
},
getNPlusOneEnabled: () => false,
getJlptEnabled: () => false,
getFrequencyDictionaryEnabled: () => true,
getFrequencyRank: () => {
frequencyCalls += 1;
return 7;
},
tokenizeWithMecab: async () => {
mecabCalls += 1;
return [
{
headword: '猫',
surface: '猫',
reading: 'ネコ',
startPos: 0,
endPos: 1,
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
];
},
}),
);
assert.equal(result.tokens?.[0]?.frequencyRank, 7);
assert.equal(result.tokens?.[0]?.isKnown, false);
assert.equal(knownCalls, 0);
assert.equal(mecabCalls, 1);
assert.equal(frequencyCalls, 1);
});

View File

@@ -9,13 +9,16 @@ import {
FrequencyDictionaryLookup,
JlptLevel,
} from '../../types';
import { annotateTokens } from './tokenizer/annotation-stage';
import { enrichTokensWithMecabPos1 } from './tokenizer/parser-enrichment-stage';
import { selectYomitanParseTokens } from './tokenizer/parser-selection-stage';
import { requestYomitanParseResults } from './tokenizer/yomitan-parser-runtime';
const logger = createLogger('main:tokenizer');
type MecabTokenEnrichmentFn = (
tokens: MergedToken[],
mecabTokens: MergedToken[] | null,
) => Promise<MergedToken[]>;
export interface TokenizerServiceDeps {
getYomitanExt: () => Extension | null;
getYomitanParserWindow: () => BrowserWindow | null;
@@ -27,12 +30,14 @@ export interface TokenizerServiceDeps {
isKnownWord: (text: string) => boolean;
getKnownWordMatchMode: () => NPlusOneMatchMode;
getJlptLevel: (text: string) => JlptLevel | null;
getNPlusOneEnabled?: () => boolean;
getJlptEnabled?: () => boolean;
getFrequencyDictionaryEnabled?: () => boolean;
getFrequencyRank?: FrequencyDictionaryLookup;
getMinSentenceWordsForNPlusOne?: () => number;
getYomitanGroupDebugEnabled?: () => boolean;
tokenizeWithMecab: (text: string) => Promise<MergedToken[] | null>;
enrichTokensWithMecab?: MecabTokenEnrichmentFn;
}
interface MecabTokenizerLike {
@@ -52,6 +57,7 @@ export interface TokenizerDepsRuntimeOptions {
isKnownWord: (text: string) => boolean;
getKnownWordMatchMode: () => NPlusOneMatchMode;
getJlptLevel: (text: string) => JlptLevel | null;
getNPlusOneEnabled?: () => boolean;
getJlptEnabled?: () => boolean;
getFrequencyDictionaryEnabled?: () => boolean;
getFrequencyRank?: FrequencyDictionaryLookup;
@@ -60,6 +66,82 @@ export interface TokenizerDepsRuntimeOptions {
getMecabTokenizer: () => MecabTokenizerLike | null;
}
interface TokenizerAnnotationOptions {
nPlusOneEnabled: boolean;
jlptEnabled: boolean;
frequencyEnabled: boolean;
minSentenceWordsForNPlusOne: number | undefined;
}
let parserEnrichmentWorkerRuntimeModulePromise:
| Promise<typeof import('./tokenizer/parser-enrichment-worker-runtime')>
| null = null;
let annotationStageModulePromise: Promise<typeof import('./tokenizer/annotation-stage')> | null = null;
let parserEnrichmentFallbackModulePromise:
| Promise<typeof import('./tokenizer/parser-enrichment-stage')>
| null = null;
function getKnownWordLookup(deps: TokenizerServiceDeps, options: TokenizerAnnotationOptions): (text: string) => boolean {
if (!options.nPlusOneEnabled) {
return () => false;
}
return deps.isKnownWord;
}
function needsMecabPosEnrichment(options: TokenizerAnnotationOptions): boolean {
return options.jlptEnabled || options.frequencyEnabled;
}
function hasAnyAnnotationEnabled(options: TokenizerAnnotationOptions): boolean {
return options.nPlusOneEnabled || options.jlptEnabled || options.frequencyEnabled;
}
async function enrichTokensWithMecabAsync(
tokens: MergedToken[],
mecabTokens: MergedToken[] | null,
): Promise<MergedToken[]> {
if (!parserEnrichmentWorkerRuntimeModulePromise) {
parserEnrichmentWorkerRuntimeModulePromise = import('./tokenizer/parser-enrichment-worker-runtime');
}
try {
const runtime = await parserEnrichmentWorkerRuntimeModulePromise;
return await runtime.enrichTokensWithMecabPos1Async(tokens, mecabTokens);
} catch {
if (!parserEnrichmentFallbackModulePromise) {
parserEnrichmentFallbackModulePromise = import('./tokenizer/parser-enrichment-stage');
}
const fallback = await parserEnrichmentFallbackModulePromise;
return fallback.enrichTokensWithMecabPos1(tokens, mecabTokens);
}
}
async function applyAnnotationStage(
tokens: MergedToken[],
deps: TokenizerServiceDeps,
options: TokenizerAnnotationOptions,
): Promise<MergedToken[]> {
if (!hasAnyAnnotationEnabled(options)) {
return tokens;
}
if (!annotationStageModulePromise) {
annotationStageModulePromise = import('./tokenizer/annotation-stage');
}
const annotationStage = await annotationStageModulePromise;
return annotationStage.annotateTokens(
tokens,
{
isKnownWord: getKnownWordLookup(deps, options),
knownWordMatchMode: deps.getKnownWordMatchMode(),
getJlptLevel: deps.getJlptLevel,
getFrequencyRank: deps.getFrequencyRank,
},
options,
);
}
export function createTokenizerDepsRuntime(
options: TokenizerDepsRuntimeOptions,
): TokenizerServiceDeps {
@@ -76,6 +158,7 @@ export function createTokenizerDepsRuntime(
isKnownWord: options.isKnownWord,
getKnownWordMatchMode: options.getKnownWordMatchMode,
getJlptLevel: options.getJlptLevel,
getNPlusOneEnabled: options.getNPlusOneEnabled,
getJlptEnabled: options.getJlptEnabled,
getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled,
getFrequencyRank: options.getFrequencyRank,
@@ -104,8 +187,11 @@ export function createTokenizerDepsRuntime(
return null;
}
return mergeTokens(rawTokens, options.isKnownWord, options.getKnownWordMatchMode());
const isKnownWordLookup = options.getNPlusOneEnabled?.() === false ? () => false : options.isKnownWord;
return mergeTokens(rawTokens, isKnownWordLookup, options.getKnownWordMatchMode());
},
enrichTokensWithMecab: async (tokens, mecabTokens) =>
enrichTokensWithMecabAsync(tokens, mecabTokens),
};
}
@@ -128,36 +214,19 @@ function logSelectedYomitanGroups(text: string, tokens: MergedToken[]): void {
});
}
function getAnnotationOptions(deps: TokenizerServiceDeps): {
jlptEnabled: boolean;
frequencyEnabled: boolean;
minSentenceWordsForNPlusOne: number | undefined;
} {
function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOptions {
return {
nPlusOneEnabled: deps.getNPlusOneEnabled?.() !== false,
jlptEnabled: deps.getJlptEnabled?.() !== false,
frequencyEnabled: deps.getFrequencyDictionaryEnabled?.() !== false,
minSentenceWordsForNPlusOne: deps.getMinSentenceWordsForNPlusOne?.(),
};
}
function applyAnnotationStage(tokens: MergedToken[], deps: TokenizerServiceDeps): MergedToken[] {
const options = getAnnotationOptions(deps);
return annotateTokens(
tokens,
{
isKnownWord: deps.isKnownWord,
knownWordMatchMode: deps.getKnownWordMatchMode(),
getJlptLevel: deps.getJlptLevel,
getFrequencyRank: deps.getFrequencyRank,
},
options,
);
}
async function parseWithYomitanInternalParser(
text: string,
deps: TokenizerServiceDeps,
options: TokenizerAnnotationOptions,
): Promise<MergedToken[] | null> {
const parseResults = await requestYomitanParseResults(text, deps, logger);
if (!parseResults) {
@@ -166,7 +235,7 @@ async function parseWithYomitanInternalParser(
const selectedTokens = selectYomitanParseTokens(
parseResults,
deps.isKnownWord,
getKnownWordLookup(deps, options),
deps.getKnownWordMatchMode(),
);
if (!selectedTokens || selectedTokens.length === 0) {
@@ -177,9 +246,14 @@ async function parseWithYomitanInternalParser(
logSelectedYomitanGroups(text, selectedTokens);
}
if (!needsMecabPosEnrichment(options)) {
return selectedTokens;
}
try {
const mecabTokens = await deps.tokenizeWithMecab(text);
return enrichTokensWithMecabPos1(selectedTokens, mecabTokens);
const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync;
return await enrichTokensWithMecab(selectedTokens, mecabTokens);
} catch (err) {
const error = err as Error;
logger.warn(
@@ -207,12 +281,13 @@ export async function tokenizeSubtitle(
}
const tokenizeText = displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
const annotationOptions = getAnnotationOptions(deps);
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps);
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);
if (yomitanTokens && yomitanTokens.length > 0) {
return {
text: displayText,
tokens: applyAnnotationStage(yomitanTokens, deps),
tokens: await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
};
}

View File

@@ -31,6 +31,7 @@ export interface AnnotationStageDeps {
}
export interface AnnotationStageOptions {
nPlusOneEnabled?: boolean;
jlptEnabled?: boolean;
frequencyEnabled?: boolean;
minSentenceWordsForNPlusOne?: number;
@@ -340,11 +341,14 @@ export function annotateTokens(
deps: AnnotationStageDeps,
options: AnnotationStageOptions = {},
): MergedToken[] {
const knownMarkedTokens = applyKnownWordMarking(
tokens,
deps.isKnownWord,
deps.knownWordMatchMode,
);
const nPlusOneEnabled = options.nPlusOneEnabled !== false;
const knownMarkedTokens = nPlusOneEnabled
? applyKnownWordMarking(tokens, deps.isKnownWord, deps.knownWordMatchMode)
: tokens.map((token) => ({
...token,
isKnown: false,
isNPlusOneTarget: false,
}));
const frequencyEnabled = options.frequencyEnabled !== false;
const frequencyMarkedTokens =
@@ -363,6 +367,14 @@ export function annotateTokens(
jlptLevel: undefined,
}));
if (!nPlusOneEnabled) {
return jlptMarkedTokens.map((token) => ({
...token,
isKnown: false,
isNPlusOneTarget: false,
}));
}
const minSentenceWordsForNPlusOne = options.minSentenceWordsForNPlusOne;
const sanitizedMinSentenceWordsForNPlusOne =
minSentenceWordsForNPlusOne !== undefined &&

View File

@@ -0,0 +1,147 @@
import type { MergedToken } from '../../../types';
import { createLogger } from '../../../logger';
import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage';
const logger = createLogger('main:tokenizer');
const DISABLE_WORKER_ENV = 'SUBMINER_DISABLE_MECAB_ENRICHMENT_WORKER';
interface WorkerRequest {
id: number;
tokens: MergedToken[];
mecabTokens: MergedToken[] | null;
}
interface WorkerResponse {
id?: unknown;
result?: unknown;
error?: unknown;
}
type PendingRequest = {
resolve: (value: MergedToken[]) => void;
reject: (reason?: unknown) => void;
};
class ParserEnrichmentWorkerRuntime {
private worker: import('node:worker_threads').Worker | null = null;
private nextRequestId = 1;
private pending = new Map<number, PendingRequest>();
private initAttempted = false;
async enrichTokens(
tokens: MergedToken[],
mecabTokens: MergedToken[] | null,
): Promise<MergedToken[]> {
const worker = await this.getWorker();
if (!worker) {
return enrichTokensWithMecabPos1(tokens, mecabTokens);
}
return new Promise<MergedToken[]>((resolve, reject) => {
const id = this.nextRequestId++;
this.pending.set(id, { resolve, reject });
const request: WorkerRequest = { id, tokens, mecabTokens };
worker.postMessage(request);
});
}
private async getWorker(): Promise<import('node:worker_threads').Worker | null> {
if (process.env[DISABLE_WORKER_ENV] === '1') {
return null;
}
if (this.worker) {
return this.worker;
}
if (this.initAttempted) {
return null;
}
this.initAttempted = true;
let workerThreads: typeof import('node:worker_threads');
try {
workerThreads = await import('node:worker_threads');
} catch {
return null;
}
let workerPath = '';
try {
workerPath = require.resolve('./parser-enrichment-worker-thread.js');
} catch {
return null;
}
try {
const worker = new workerThreads.Worker(workerPath);
worker.on('message', (message: WorkerResponse) => this.handleWorkerMessage(message));
worker.on('error', (error: Error) => this.handleWorkerFailure(error));
worker.on('exit', (code: number) => {
if (code !== 0) {
this.handleWorkerFailure(new Error(`parser enrichment worker exited with code ${code}`));
} else {
this.worker = null;
}
});
this.worker = worker;
return worker;
} catch (error) {
logger.debug(`Failed to start parser enrichment worker: ${(error as Error).message}`);
return null;
}
}
private handleWorkerMessage(message: WorkerResponse): void {
if (typeof message.id !== 'number') {
return;
}
const request = this.pending.get(message.id);
if (!request) {
return;
}
this.pending.delete(message.id);
if (typeof message.error === 'string' && message.error.length > 0) {
request.reject(new Error(message.error));
return;
}
if (!Array.isArray(message.result)) {
request.reject(new Error('Parser enrichment worker returned invalid payload'));
return;
}
request.resolve(message.result as MergedToken[]);
}
private handleWorkerFailure(error: Error): void {
logger.debug(`Parser enrichment worker unavailable, falling back to main thread: ${error.message}`);
for (const pending of this.pending.values()) {
pending.reject(error);
}
this.pending.clear();
if (this.worker) {
this.worker.removeAllListeners();
this.worker = null;
}
}
}
let runtime: ParserEnrichmentWorkerRuntime | null = null;
export async function enrichTokensWithMecabPos1Async(
tokens: MergedToken[],
mecabTokens: MergedToken[] | null,
): Promise<MergedToken[]> {
if (!runtime) {
runtime = new ParserEnrichmentWorkerRuntime();
}
try {
return await runtime.enrichTokens(tokens, mecabTokens);
} catch {
return enrichTokensWithMecabPos1(tokens, mecabTokens);
}
}

View File

@@ -0,0 +1,25 @@
import { parentPort } from 'node:worker_threads';
import type { MergedToken } from '../../../types';
import { enrichTokensWithMecabPos1 } from './parser-enrichment-stage';
interface WorkerRequest {
id: number;
tokens: MergedToken[];
mecabTokens: MergedToken[] | null;
}
if (!parentPort) {
throw new Error('parser-enrichment worker missing parent port');
}
const port = parentPort;
port.on('message', (message: WorkerRequest) => {
try {
const result = enrichTokensWithMecabPos1(message.tokens, message.mecabTokens);
port.postMessage({ id: message.id, result });
} catch (error) {
const messageText = error instanceof Error ? error.message : String(error);
port.postMessage({ id: message.id, error: messageText });
}
});

View File

@@ -0,0 +1,83 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { syncYomitanDefaultAnkiServer } from './yomitan-parser-runtime';
function createDeps(executeJavaScript: (script: string) => Promise<unknown>) {
const parserWindow = {
isDestroyed: () => false,
webContents: {
executeJavaScript: async (script: string) => await executeJavaScript(script),
},
};
return {
getYomitanExt: () => ({ id: 'ext-id' }) as never,
getYomitanParserWindow: () => parserWindow as never,
setYomitanParserWindow: () => undefined,
getYomitanParserReadyPromise: () => null,
setYomitanParserReadyPromise: () => undefined,
getYomitanParserInitPromise: () => null,
setYomitanParserInitPromise: () => undefined,
};
}
test('syncYomitanDefaultAnkiServer updates default profile server when script reports update', async () => {
let scriptValue = '';
const deps = createDeps(async (script) => {
scriptValue = script;
return { updated: true };
});
const infoLogs: string[] = [];
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
error: () => undefined,
info: (message) => infoLogs.push(message),
});
assert.equal(updated, true);
assert.match(scriptValue, /optionsGetFull/);
assert.match(scriptValue, /setAllSettings/);
assert.equal(infoLogs.length, 1);
});
test('syncYomitanDefaultAnkiServer returns false when script reports no change', async () => {
const deps = createDeps(async () => ({ updated: false }));
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
error: () => undefined,
info: () => undefined,
});
assert.equal(updated, false);
});
test('syncYomitanDefaultAnkiServer logs and returns false on script failure', async () => {
const deps = createDeps(async () => {
throw new Error('execute failed');
});
const errorLogs: string[] = [];
const updated = await syncYomitanDefaultAnkiServer('http://127.0.0.1:8766', deps, {
error: (message) => errorLogs.push(message),
info: () => undefined,
});
assert.equal(updated, false);
assert.equal(errorLogs.length, 1);
});
test('syncYomitanDefaultAnkiServer no-ops for empty target url', async () => {
let executeCount = 0;
const deps = createDeps(async () => {
executeCount += 1;
return { updated: true };
});
const updated = await syncYomitanDefaultAnkiServer(' ', deps, {
error: () => undefined,
info: () => undefined,
});
assert.equal(updated, false);
assert.equal(executeCount, 0);
});

View File

@@ -2,6 +2,7 @@ import type { BrowserWindow, Extension } from 'electron';
interface LoggerLike {
error: (message: string, ...args: unknown[]) => void;
info?: (message: string, ...args: unknown[]) => void;
}
interface YomitanParserRuntimeDeps {
@@ -152,3 +153,90 @@ export async function requestYomitanParseResults(
return null;
}
}
export async function syncYomitanDefaultAnkiServer(
serverUrl: string,
deps: YomitanParserRuntimeDeps,
logger: LoggerLike,
): Promise<boolean> {
const normalizedTargetServer = serverUrl.trim();
if (!normalizedTargetServer) {
return false;
}
const isReady = await ensureYomitanParserWindow(deps, logger);
const parserWindow = deps.getYomitanParserWindow();
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
return false;
}
const script = `
(async () => {
const invoke = (action, params) =>
new Promise((resolve, reject) => {
chrome.runtime.sendMessage({ action, params }, (response) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
return;
}
if (!response || typeof response !== "object") {
reject(new Error("Invalid response from Yomitan backend"));
return;
}
if (response.error) {
reject(new Error(response.error.message || "Yomitan backend error"));
return;
}
resolve(response.result);
});
});
const targetServer = ${JSON.stringify(normalizedTargetServer)};
const optionsFull = await invoke("optionsGetFull", undefined);
const profiles = Array.isArray(optionsFull.profiles) ? optionsFull.profiles : [];
if (profiles.length === 0) {
return { updated: false, reason: "no-profiles" };
}
const defaultProfile = profiles[0];
if (!defaultProfile || typeof defaultProfile !== "object") {
return { updated: false, reason: "invalid-default-profile" };
}
defaultProfile.options = defaultProfile.options && typeof defaultProfile.options === "object"
? defaultProfile.options
: {};
defaultProfile.options.anki = defaultProfile.options.anki && typeof defaultProfile.options.anki === "object"
? defaultProfile.options.anki
: {};
const currentServerRaw = defaultProfile.options.anki.server;
const currentServer = typeof currentServerRaw === "string" ? currentServerRaw.trim() : "";
const canReplaceDefault =
currentServer.length === 0 || currentServer === "http://127.0.0.1:8765";
if (!canReplaceDefault || currentServer === targetServer) {
return { updated: false, reason: "no-change", currentServer, targetServer };
}
defaultProfile.options.anki.server = targetServer;
await invoke("setAllSettings", { value: optionsFull, source: "subminer" });
return { updated: true, currentServer, targetServer };
})();
`;
try {
const result = await parserWindow.webContents.executeJavaScript(script, true);
const updated =
typeof result === 'object' &&
result !== null &&
(result as { updated?: unknown }).updated === true;
if (updated) {
logger.info?.(`Updated Yomitan default profile Anki server to ${normalizedTargetServer}`);
return true;
}
return false;
} catch (err) {
logger.error('Failed to sync Yomitan default profile Anki server:', (err as Error).message);
return false;
}
}

View File

@@ -19,6 +19,7 @@ test('tokenizer deps builder records known-word lookups and maps readers', () =>
isKnownWord: (text) => text === 'known',
recordLookup: (hit) => calls.push(`lookup:${hit}`),
getKnownWordMatchMode: () => 'surface',
getNPlusOneEnabled: () => true,
getMinSentenceWordsForNPlusOne: () => 3,
getJlptLevel: () => 'N2',
getJlptEnabled: () => true,
@@ -33,6 +34,7 @@ test('tokenizer deps builder records known-word lookups and maps readers', () =>
deps.setYomitanParserWindow(null);
deps.setYomitanParserReadyPromise(null);
deps.setYomitanParserInitPromise(null);
assert.equal(deps.getNPlusOneEnabled?.(), true);
assert.equal(deps.getMinSentenceWordsForNPlusOne?.(), 3);
assert.deepEqual(calls, ['lookup:true', 'lookup:false', 'set-window', 'set-ready', 'set-init']);
});

View File

@@ -32,6 +32,11 @@ export function createBuildTokenizerDepsMainHandler(deps: TokenizerMainDeps) {
return hit;
},
getKnownWordMatchMode: () => deps.getKnownWordMatchMode(),
...(deps.getNPlusOneEnabled
? {
getNPlusOneEnabled: () => deps.getNPlusOneEnabled!(),
}
: {}),
getMinSentenceWordsForNPlusOne: () => deps.getMinSentenceWordsForNPlusOne(),
getJlptLevel: (text: string) => deps.getJlptLevel(text),
getJlptEnabled: () => deps.getJlptEnabled(),