diff --git a/bun.lock b/bun.lock index cc04bb0..ae9b817 100644 --- a/bun.lock +++ b/bun.lock @@ -5,6 +5,8 @@ "": { "name": "subminer", "dependencies": { + "@fontsource-variable/geist": "^5.2.8", + "@fontsource-variable/geist-mono": "^5.2.7", "@hono/node-server": "^1.19.11", "axios": "^1.13.5", "commander": "^14.0.3", @@ -98,6 +100,10 @@ "@esbuild/win32-x64": ["@esbuild/win32-x64@0.25.12", "", { "os": "win32", "cpu": "x64" }, "sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA=="], + "@fontsource-variable/geist": ["@fontsource-variable/geist@5.2.8", "", {}, "sha512-cJ6m9e+8MQ5dCYJsLylfZrgBh6KkG4bOLckB35Tr9J/EqdkEM6QllH5PxqP1dhTvFup+HtMRPuz9xOjxXJggxw=="], + + "@fontsource-variable/geist-mono": ["@fontsource-variable/geist-mono@5.2.7", "", {}, "sha512-ZKlZ5sjtalb2TwXKs400mAGDlt/+2ENLNySPx0wTz3bP3mWARCsUW+rpxzZc7e05d2qGch70pItt3K4qttbIYA=="], + "@hono/node-server": ["@hono/node-server@1.19.11", "", { "peerDependencies": { "hono": "^4" } }, "sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g=="], "@isaacs/cliui": ["@isaacs/cliui@9.0.0", "", {}, "sha512-AokJm4tuBHillT+FpMtxQ60n8ObyXBatq7jD2/JA9dxbDDokKQm8KMht5ibGzLVU9IJDIKK4TPKgMHEYMn3lMg=="], diff --git a/changes/2026-03-15-known-words-config-section.md b/changes/2026-03-15-known-words-config-section.md new file mode 100644 index 0000000..6c95bcf --- /dev/null +++ b/changes/2026-03-15-known-words-config-section.md @@ -0,0 +1,5 @@ +type: changed +area: anki + +- Changed known-word cache settings to live under `ankiConnect.knownWords` instead of mixing them into `ankiConnect.nPlusOne`. +- Kept legacy `ankiConnect.nPlusOne` known-word keys and older `ankiConnect.behavior.nPlusOne*` keys as deprecated compatibility fallbacks. diff --git a/changes/2026-03-15-session-delete-from-sessions-tab.md b/changes/2026-03-15-session-delete-from-sessions-tab.md new file mode 100644 index 0000000..d17c95e --- /dev/null +++ b/changes/2026-03-15-session-delete-from-sessions-tab.md @@ -0,0 +1,4 @@ +type: changed +area: stats + +- Added session deletion to the Sessions tab with the same confirmation prompt used by anime episode/session deletes, and removed all associated session rows from the stats database. diff --git a/changes/2026-03-15-stats-overlay-port-fix.md b/changes/2026-03-15-stats-overlay-port-fix.md new file mode 100644 index 0000000..9b9c5b1 --- /dev/null +++ b/changes/2026-03-15-stats-overlay-port-fix.md @@ -0,0 +1,4 @@ +type: fixed +area: stats + +- Fixed the in-app stats overlay so it connects to the configured `stats.serverPort` instead of falling back to the default port. diff --git a/changes/2026-03-15-surface-frequency-fallback.md b/changes/2026-03-15-surface-frequency-fallback.md new file mode 100644 index 0000000..3fffb2a --- /dev/null +++ b/changes/2026-03-15-surface-frequency-fallback.md @@ -0,0 +1,9 @@ +type: fixed +area: overlay + +- Fixed subtitle frequency tagging for merged lookup-backed tokens like `陰に` by falling back to exact surface-form Yomitan frequencies when the normalized headword lookup misses. +- Fixed MeCab merged-token position mapping across line breaks so merged content-plus-particle tokens like `陰に` keep their matched Yomitan frequency instead of inheriting shifted POS tags. +- Fixed grouped frequency parsing in both Yomitan and fallback frequency-dictionary lookups so display values like `118,121` use the leading rank instead of collapsing the rank and occurrence count into `118121`. +- Fixed frequency-rank ingestion to ignore Yomitan dictionaries explicitly marked `occurrence-based`, so raw occurrence counts are no longer treated as subtitle rank values. +- Fixed inflected headword frequency tagging to prefer ranks from the selected Yomitan `termsFind` popup entry itself, ordered by configured dictionary priority, so forms like `潜み` use primary-dictionary ranks like `4073` before falling back to lower-priority raw lemma metadata such as `CC100`. +- Fixed annotation-stage frequency filtering so exact kanji noun tokens like `者` keep their matched rank even when MeCab labels them `名詞/非自立`, instead of dropping the highlight after scan-time frequency lookup succeeds. diff --git a/config.example.jsonc b/config.example.jsonc index 52834b8..067578b 100644 --- a/config.example.jsonc +++ b/config.example.jsonc @@ -343,6 +343,13 @@ "fallbackDuration": 3, // Fallback duration setting. "maxMediaDuration": 30 // Max media duration setting. }, // Media setting. + "knownWords": { + "highlightEnabled": false, // Enable fast local highlighting for words already known in Anki. Values: true | false + "refreshMinutes": 1440, // Minutes between known-word cache refreshes. + "matchMode": "headword", // Known-word matching strategy for subtitle annotations. Values: headword | surface + "decks": [], // Decks used for known-word cache scope. Supports one or more deck names. + "color": "#a6da95" // Color used for known-word highlights. + }, // Known words setting. "behavior": { "overwriteAudio": true, // Overwrite audio setting. Values: true | false "overwriteImage": true, // Overwrite image setting. Values: true | false @@ -352,13 +359,8 @@ "autoUpdateNewCards": true // Automatically update newly added cards. Values: true | false }, // Behavior setting. "nPlusOne": { - "highlightEnabled": false, // Enable fast local highlighting for words already known in Anki. Values: true | false - "refreshMinutes": 1440, // Minutes between known-word cache refreshes. - "matchMode": "headword", // Known-word matching strategy for N+1 highlighting. Values: headword | surface - "decks": [], // Decks used for N+1 known-word cache scope. Supports one or more deck names. "minSentenceWords": 3, // Minimum sentence word count required for N+1 targeting (default: 3). - "nPlusOne": "#c6a0f6", // Color used for the single N+1 target token highlight. - "knownWord": "#a6da95" // Color used for legacy known-word highlights. + "nPlusOne": "#c6a0f6" // Color used for the single N+1 target token highlight. }, // N plus one setting. "metadata": { "pattern": "[SubMiner] %f (%t)" // Pattern setting. @@ -512,7 +514,7 @@ // ========================================== "stats": { "toggleKey": "Backquote", // Key code to toggle the stats overlay. - "serverPort": 5175, // Port for the stats HTTP server. + "serverPort": 6969, // Port for the stats HTTP server. "autoStartServer": true, // Automatically start the stats server on launch. Values: true | false "autoOpenBrowser": true // Automatically open the stats dashboard in a browser when the server starts. Values: true | false } // Local immersion stats dashboard served on localhost and available as an in-app overlay. diff --git a/docs-site/configuration.md b/docs-site/configuration.md index e15e67d..5aa64b1 100644 --- a/docs-site/configuration.md +++ b/docs-site/configuration.md @@ -665,10 +665,10 @@ Use the runtime options palette to toggle settings live while SubMiner is runnin Current runtime options: - `ankiConnect.behavior.autoUpdateNewCards` (`On` / `Off`) -- `ankiConnect.nPlusOne.highlightEnabled` (`On` / `Off`) +- `ankiConnect.knownWords.highlightEnabled` (`On` / `Off`) - `subtitleStyle.enableJlpt` (`On` / `Off`) - `subtitleStyle.frequencyDictionary.enabled` (`On` / `Off`) -- `ankiConnect.nPlusOne.matchMode` (`headword` / `surface`) +- `ankiConnect.knownWords.matchMode` (`headword` / `surface`) - `ankiConnect.isKiku.fieldGrouping` (`auto` / `manual` / `disabled`) Annotation toggles (`nPlusOne`, `enableJlpt`, `frequencyDictionary.enabled`) only apply to new subtitle lines after the toggle. The currently displayed line is not re-tokenized in place. @@ -796,7 +796,7 @@ This example is intentionally compact. The option table below documents availabl | `proxy.upstreamUrl` | string (URL) | Upstream AnkiConnect URL that proxy forwards to (default: `http://127.0.0.1:8765`) | | `tags` | array of strings | Tags automatically added to cards mined/updated by SubMiner (default: `['SubMiner']`; set `[]` to disable automatic tagging). | | `deck` | string | Anki deck to monitor for new cards | -| `ankiConnect.nPlusOne.decks` | array of strings | Decks used for N+1 known-word cache lookups. When omitted/empty, falls back to `ankiConnect.deck`. | +| `ankiConnect.knownWords.decks` | array of strings | Decks used for known-word cache lookups. When omitted/empty, falls back to `ankiConnect.deck`. | | `fields.audio` | string | Card field for audio files (default: `ExpressionAudio`) | | `fields.image` | string | Card field for images (default: `Picture`) | | `fields.sentence` | string | Card field for sentences (default: `Sentence`) | @@ -823,13 +823,13 @@ This example is intentionally compact. The option table below documents availabl | `behavior.overwriteImage` | `true`, `false` | Replace existing images on updates; when `false`, new images are appended/prepended per `behavior.mediaInsertMode` (default: `true`) | | `behavior.mediaInsertMode` | `"append"`, `"prepend"` | Where to insert new media when overwrite is off (default: `"append"`) | | `behavior.highlightWord` | `true`, `false` | Highlight the word in sentence context (default: `true`) | -| `ankiConnect.nPlusOne.highlightEnabled` | `true`, `false` | Enable fast local highlighting for words already known in Anki (default: `false`) | +| `ankiConnect.knownWords.highlightEnabled` | `true`, `false` | Enable fast local highlighting for words already known in Anki (default: `false`) | +| `ankiConnect.knownWords.color` | hex color string | Text color for tokens already found in the local known-word cache (default: `"#a6da95"`). | +| `ankiConnect.knownWords.matchMode` | `"headword"`, `"surface"` | Matching strategy for known-word highlighting (default: `"headword"`). `headword` uses token headwords; `surface` uses visible subtitle text. | +| `ankiConnect.knownWords.refreshMinutes` | number | Minutes between known-word cache refreshes (default: `1440`) | +| `ankiConnect.knownWords.decks` | array of strings | Decks used by known-word cache refresh. Leave empty for compatibility with legacy `deck` scope. | | `ankiConnect.nPlusOne.nPlusOne` | hex color string | Text color for the single target token to study when exactly one unknown candidate exists in a sentence (default: `"#c6a0f6"`). | -| `ankiConnect.nPlusOne.knownWord` | hex color string | Legacy known-word color kept for backward compatibility (default: `"#a6da95"`). | -| `ankiConnect.nPlusOne.matchMode` | `"headword"`, `"surface"` | Matching strategy for known-word highlighting (default: `"headword"`). `headword` uses token headwords; `surface` uses visible subtitle text. | | `ankiConnect.nPlusOne.minSentenceWords` | number | Minimum number of words required in a sentence before single unknown-word N+1 highlighting can trigger (default: `3`). | -| `ankiConnect.nPlusOne.refreshMinutes` | number | Minutes between known-word cache refreshes (default: `1440`) | -| `ankiConnect.nPlusOne.decks` | array of strings | Decks used by known-word cache refresh. Leave empty for compatibility with legacy `deck` scope. | | `behavior.notificationType` | `"osd"`, `"system"`, `"both"`, `"none"` | Notification type on card update (default: `"osd"`) | | `behavior.autoUpdateNewCards` | `true`, `false` | Automatically update cards on creation (default: `true`) | | `metadata.pattern` | string | Format pattern for metadata: `%f`=filename, `%F`=filename+ext, `%t`=time | @@ -864,20 +864,20 @@ SubMiner is intentionally built for [Kiku](https://kiku.youyoumu.my.id/) and [La ### N+1 Word Highlighting -When `ankiConnect.nPlusOne.highlightEnabled` is enabled, SubMiner builds a local cache of known words from Anki to highlight already learned tokens in subtitle rendering. +When `ankiConnect.knownWords.highlightEnabled` is enabled, SubMiner builds a local cache of known words from Anki to highlight already learned tokens in subtitle rendering. Known-word cache policy: - Initial sync runs when the integration starts if the cache is missing or stale. -- `ankiConnect.nPlusOne.refreshMinutes` controls the minimum time between refreshes; between refreshes, cached words are reused without querying Anki. +- `ankiConnect.knownWords.refreshMinutes` controls the minimum time between refreshes; between refreshes, cached words are reused without querying Anki. - `ankiConnect.nPlusOne.nPlusOne` sets the color for the single target token when exactly one eligible unknown word exists. - `ankiConnect.nPlusOne.minSentenceWords` sets the minimum token count required in a sentence for N+1 highlighting (default: `3`). -- `ankiConnect.nPlusOne.knownWord` sets the legacy known-word highlight color for tokens already in Anki. -- `ankiConnect.nPlusOne.decks` accepts one or more decks. If empty, it uses the legacy single `ankiConnect.deck` value as scope. +- `ankiConnect.knownWords.color` sets the known-word highlight color for tokens already in Anki. +- `ankiConnect.knownWords.decks` accepts one or more decks. If empty, it uses the legacy single `ankiConnect.deck` value as scope. - Cache state is persisted to `known-words-cache.json` under the app `userData` directory. - The cache is automatically invalidated when the configured scope changes (for example, when deck changes). -- Cache lookups are in-memory. By default, token headwords are matched against cached `Expression` / `Word` values; set `ankiConnect.nPlusOne.matchMode` to `"surface"` for raw subtitle text matching. -- `ankiConnect.behavior.nPlusOne*` legacy keys (`nPlusOneHighlightEnabled`, `nPlusOneRefreshMinutes`, `nPlusOneMatchMode`) are deprecated and only kept for backward compatibility. +- Cache lookups are in-memory. By default, token headwords are matched against cached `Expression` / `Word` values; set `ankiConnect.knownWords.matchMode` to `"surface"` for raw subtitle text matching. +- Legacy moved keys under `ankiConnect.nPlusOne` (`highlightEnabled`, `refreshMinutes`, `matchMode`, `decks`, `knownWord`) and older `ankiConnect.behavior.nPlusOne*` keys are deprecated and only kept for backward compatibility. - Legacy top-level `ankiConnect` migration keys (for example `audioField`, `generateAudio`, `imageType`) are compatibility-only, validated before mapping, and ignored with a warning when invalid. - If AnkiConnect is unreachable, the cache remains in its previous state and an on-screen/system status message is shown. - Known-word sync activity is logged at `INFO`/`DEBUG` level with the `anki` logger scope and includes scope, notes returned, and word counts. @@ -887,9 +887,12 @@ To refresh roughly once per day, set: ```json { "ankiConnect": { - "nPlusOne": { + "knownWords": { "highlightEnabled": true, "refreshMinutes": 1440 + }, + "nPlusOne": { + "minSentenceWords": 3 } } } diff --git a/docs-site/public/config.example.jsonc b/docs-site/public/config.example.jsonc index 52834b8..067578b 100644 --- a/docs-site/public/config.example.jsonc +++ b/docs-site/public/config.example.jsonc @@ -343,6 +343,13 @@ "fallbackDuration": 3, // Fallback duration setting. "maxMediaDuration": 30 // Max media duration setting. }, // Media setting. + "knownWords": { + "highlightEnabled": false, // Enable fast local highlighting for words already known in Anki. Values: true | false + "refreshMinutes": 1440, // Minutes between known-word cache refreshes. + "matchMode": "headword", // Known-word matching strategy for subtitle annotations. Values: headword | surface + "decks": [], // Decks used for known-word cache scope. Supports one or more deck names. + "color": "#a6da95" // Color used for known-word highlights. + }, // Known words setting. "behavior": { "overwriteAudio": true, // Overwrite audio setting. Values: true | false "overwriteImage": true, // Overwrite image setting. Values: true | false @@ -352,13 +359,8 @@ "autoUpdateNewCards": true // Automatically update newly added cards. Values: true | false }, // Behavior setting. "nPlusOne": { - "highlightEnabled": false, // Enable fast local highlighting for words already known in Anki. Values: true | false - "refreshMinutes": 1440, // Minutes between known-word cache refreshes. - "matchMode": "headword", // Known-word matching strategy for N+1 highlighting. Values: headword | surface - "decks": [], // Decks used for N+1 known-word cache scope. Supports one or more deck names. "minSentenceWords": 3, // Minimum sentence word count required for N+1 targeting (default: 3). - "nPlusOne": "#c6a0f6", // Color used for the single N+1 target token highlight. - "knownWord": "#a6da95" // Color used for legacy known-word highlights. + "nPlusOne": "#c6a0f6" // Color used for the single N+1 target token highlight. }, // N plus one setting. "metadata": { "pattern": "[SubMiner] %f (%t)" // Pattern setting. @@ -512,7 +514,7 @@ // ========================================== "stats": { "toggleKey": "Backquote", // Key code to toggle the stats overlay. - "serverPort": 5175, // Port for the stats HTTP server. + "serverPort": 6969, // Port for the stats HTTP server. "autoStartServer": true, // Automatically start the stats server on launch. Values: true | false "autoOpenBrowser": true // Automatically open the stats dashboard in a browser when the server starts. Values: true | false } // Local immersion stats dashboard served on localhost and available as an in-app overlay. diff --git a/docs-site/subtitle-annotations.md b/docs-site/subtitle-annotations.md index 1747bc1..98fdcb2 100644 --- a/docs-site/subtitle-annotations.md +++ b/docs-site/subtitle-annotations.md @@ -2,7 +2,7 @@ SubMiner annotates subtitle tokens in real time as they appear in the overlay. Four annotation layers work together to surface useful context while you watch: **N+1 highlighting**, **character-name highlighting**, **frequency highlighting**, and **JLPT tagging**. -All four are opt-in and configured under `subtitleStyle` and `ankiConnect.nPlusOne` in your config. They apply independently — you can enable any combination. +All four are opt-in and configured under `subtitleStyle`, `ankiConnect.knownWords`, and `ankiConnect.nPlusOne` in your config. They apply independently — you can enable any combination. ## N+1 Word Highlighting @@ -20,13 +20,13 @@ N+1 highlighting identifies sentences where you know every word except one, maki | Option | Default | Description | | --- | --- | --- | -| `ankiConnect.nPlusOne.highlightEnabled` | `false` | Enable N+1 highlighting | -| `ankiConnect.nPlusOne.refreshMinutes` | `60` | Minutes between Anki cache refreshes | -| `ankiConnect.nPlusOne.decks` | `[]` | Decks to query (falls back to `ankiConnect.deck`) | -| `ankiConnect.nPlusOne.matchMode` | `"headword"` | `"headword"` (dictionary form) or `"surface"` (raw text) | +| `ankiConnect.knownWords.highlightEnabled` | `false` | Enable known-word cache lookups used by N+1 highlighting | +| `ankiConnect.knownWords.refreshMinutes` | `1440` | Minutes between Anki cache refreshes | +| `ankiConnect.knownWords.decks` | `[]` | Decks to query (falls back to `ankiConnect.deck`) | +| `ankiConnect.knownWords.matchMode` | `"headword"` | `"headword"` (dictionary form) or `"surface"` (raw text) | | `ankiConnect.nPlusOne.minSentenceWords` | `3` | Minimum tokens in a sentence for N+1 to trigger | -| `subtitleStyle.nPlusOneColor` | `#c6a0f6` | Color for the single unknown target word | -| `subtitleStyle.knownWordColor` | `#a6da95` | Color for already-known tokens | +| `ankiConnect.nPlusOne.nPlusOne` | `#c6a0f6` | Color for the single unknown target word | +| `ankiConnect.knownWords.color` | `#a6da95` | Color for already-known tokens | ::: tip Set `refreshMinutes` to `1440` (24 hours) for daily sync if your Anki collection is large. @@ -115,7 +115,7 @@ JLPT tagging requires the offline vocabulary bundle. See [JLPT Vocabulary Bundle All annotation layers can be toggled at runtime via the mpv command menu without restarting: -- `ankiConnect.nPlusOne.highlightEnabled` (`On` / `Off`) +- `ankiConnect.knownWords.highlightEnabled` (`On` / `Off`) - `subtitleStyle.nameMatchEnabled` (`On` / `Off`) - `subtitleStyle.enableJlpt` (`On` / `Off`) - `subtitleStyle.frequencyDictionary.enabled` (`On` / `Off`) diff --git a/package.json b/package.json index 39088f9..6411744 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "test:plugin:src": "lua scripts/test-plugin-start-gate.lua && lua scripts/test-plugin-binary-windows.lua", "test:launcher:smoke:src": "bun test launcher/smoke.e2e.test.ts", "test:launcher:src": "bun test launcher/config.test.ts launcher/config-domain-parsers.test.ts launcher/mpv.test.ts launcher/parse-args.test.ts launcher/main.test.ts launcher/commands/command-modules.test.ts launcher/smoke.e2e.test.ts && bun run test:plugin:src", - "test:core:src": "bun test src/cli/args.test.ts src/cli/help.test.ts src/shared/setup-state.test.ts src/core/services/cli-command.test.ts src/core/services/field-grouping-overlay.test.ts src/core/services/numeric-shortcut-session.test.ts src/core/services/secondary-subtitle.test.ts src/core/services/mpv-render-metrics.test.ts src/core/services/overlay-content-measurement.test.ts src/core/services/mpv-control.test.ts src/core/services/mpv.test.ts src/core/services/runtime-options-ipc.test.ts src/core/services/runtime-config.test.ts src/core/services/yomitan-extension-paths.test.ts src/core/services/config-hot-reload.test.ts src/core/services/discord-presence.test.ts src/core/services/tokenizer.test.ts src/core/services/tokenizer/annotation-stage.test.ts src/core/services/tokenizer/parser-selection-stage.test.ts src/core/services/tokenizer/parser-enrichment-stage.test.ts src/core/services/subsync.test.ts src/core/services/overlay-bridge.test.ts src/core/services/overlay-shortcut-handler.test.ts src/core/services/mining.test.ts src/core/services/anki-jimaku.test.ts src/core/services/jimaku-download-path.test.ts src/core/services/jellyfin.test.ts src/core/services/jellyfin-remote.test.ts src/core/services/immersion-tracker-service.test.ts src/core/services/overlay-runtime-init.test.ts src/core/services/app-ready.test.ts src/core/services/startup-bootstrap.test.ts src/core/services/subtitle-processing-controller.test.ts src/core/services/anilist/anilist-update-queue.test.ts src/core/utils/shortcut-config.test.ts src/main/runtime/first-run-setup-plugin.test.ts src/main/runtime/first-run-setup-service.test.ts src/main/runtime/first-run-setup-window.test.ts src/main/runtime/tray-runtime.test.ts src/main/runtime/tray-main-actions.test.ts src/main/runtime/tray-main-deps.test.ts src/main/runtime/tray-runtime-handlers.test.ts src/main/runtime/cli-command-context-main-deps.test.ts src/main/runtime/app-ready-main-deps.test.ts src/renderer/error-recovery.test.ts src/renderer/subtitle-render.test.ts src/renderer/handlers/mouse.test.ts src/renderer/handlers/keyboard.test.ts src/renderer/modals/jimaku.test.ts src/subsync/utils.test.ts src/main/anilist-url-guard.test.ts src/window-trackers/hyprland-tracker.test.ts src/window-trackers/x11-tracker.test.ts src/window-trackers/windows-helper.test.ts src/window-trackers/windows-tracker.test.ts launcher/config.test.ts launcher/config-domain-parsers.test.ts launcher/parse-args.test.ts launcher/main.test.ts launcher/commands/command-modules.test.ts launcher/setup-gate.test.ts", + "test:core:src": "bun test src/cli/args.test.ts src/cli/help.test.ts src/shared/setup-state.test.ts src/core/services/cli-command.test.ts src/core/services/field-grouping-overlay.test.ts src/core/services/numeric-shortcut-session.test.ts src/core/services/secondary-subtitle.test.ts src/core/services/mpv-render-metrics.test.ts src/core/services/overlay-content-measurement.test.ts src/core/services/mpv-control.test.ts src/core/services/mpv.test.ts src/core/services/runtime-options-ipc.test.ts src/core/services/runtime-config.test.ts src/core/services/yomitan-extension-paths.test.ts src/core/services/config-hot-reload.test.ts src/core/services/discord-presence.test.ts src/core/services/tokenizer.test.ts src/core/services/tokenizer/annotation-stage.test.ts src/core/services/tokenizer/parser-selection-stage.test.ts src/core/services/tokenizer/parser-enrichment-stage.test.ts src/core/services/subsync.test.ts src/core/services/overlay-bridge.test.ts src/core/services/overlay-shortcut-handler.test.ts src/core/services/stats-window.test.ts src/core/services/mining.test.ts src/core/services/anki-jimaku.test.ts src/core/services/jimaku-download-path.test.ts src/core/services/jellyfin.test.ts src/core/services/jellyfin-remote.test.ts src/core/services/immersion-tracker-service.test.ts src/core/services/overlay-runtime-init.test.ts src/core/services/app-ready.test.ts src/core/services/startup-bootstrap.test.ts src/core/services/subtitle-processing-controller.test.ts src/core/services/anilist/anilist-update-queue.test.ts src/core/utils/shortcut-config.test.ts src/main/runtime/first-run-setup-plugin.test.ts src/main/runtime/first-run-setup-service.test.ts src/main/runtime/first-run-setup-window.test.ts src/main/runtime/tray-runtime.test.ts src/main/runtime/tray-main-actions.test.ts src/main/runtime/tray-main-deps.test.ts src/main/runtime/tray-runtime-handlers.test.ts src/main/runtime/cli-command-context-main-deps.test.ts src/main/runtime/app-ready-main-deps.test.ts src/renderer/error-recovery.test.ts src/renderer/subtitle-render.test.ts src/renderer/handlers/mouse.test.ts src/renderer/handlers/keyboard.test.ts src/renderer/modals/jimaku.test.ts src/subsync/utils.test.ts src/main/anilist-url-guard.test.ts src/window-trackers/hyprland-tracker.test.ts src/window-trackers/x11-tracker.test.ts src/window-trackers/windows-helper.test.ts src/window-trackers/windows-tracker.test.ts launcher/config.test.ts launcher/config-domain-parsers.test.ts launcher/parse-args.test.ts launcher/main.test.ts launcher/commands/command-modules.test.ts launcher/setup-gate.test.ts stats/src/lib/api-client.test.ts", "test:core:dist": "bun test dist/cli/args.test.js dist/cli/help.test.js dist/core/services/cli-command.test.js dist/core/services/ipc.test.js dist/core/services/anki-jimaku-ipc.test.js dist/core/services/field-grouping-overlay.test.js dist/core/services/numeric-shortcut-session.test.js dist/core/services/secondary-subtitle.test.js dist/core/services/mpv-render-metrics.test.js dist/core/services/overlay-content-measurement.test.js dist/core/services/mpv-control.test.js dist/core/services/mpv.test.js dist/core/services/runtime-options-ipc.test.js dist/core/services/runtime-config.test.js dist/core/services/yomitan-extension-paths.test.js dist/core/services/config-hot-reload.test.js dist/core/services/discord-presence.test.js dist/core/services/tokenizer.test.js dist/core/services/tokenizer/annotation-stage.test.js dist/core/services/tokenizer/parser-selection-stage.test.js dist/core/services/tokenizer/parser-enrichment-stage.test.js dist/core/services/subsync.test.js dist/core/services/overlay-bridge.test.js dist/core/services/overlay-manager.test.js dist/core/services/overlay-shortcut-handler.test.js dist/core/services/mining.test.js dist/core/services/anki-jimaku.test.js dist/core/services/jimaku-download-path.test.js dist/core/services/jellyfin.test.js dist/core/services/jellyfin-remote.test.js dist/core/services/immersion-tracker-service.test.js dist/core/services/overlay-runtime-init.test.js dist/core/services/app-ready.test.js dist/core/services/startup-bootstrap.test.js dist/core/services/subtitle-processing-controller.test.js dist/core/services/anilist/anilist-token-store.test.js dist/core/services/anilist/anilist-update-queue.test.js dist/renderer/error-recovery.test.js dist/renderer/subtitle-render.test.js dist/renderer/handlers/mouse.test.js dist/renderer/handlers/keyboard.test.js dist/renderer/modals/jimaku.test.js dist/subsync/utils.test.js dist/main/anilist-url-guard.test.js dist/window-trackers/hyprland-tracker.test.js dist/window-trackers/x11-tracker.test.js dist/window-trackers/windows-helper.test.js dist/window-trackers/windows-tracker.test.js", "test:core:smoke:dist": "bun test dist/cli/help.test.js dist/core/services/runtime-config.test.js dist/core/services/ipc.test.js dist/core/services/overlay-manager.test.js dist/core/services/anilist/anilist-token-store.test.js dist/core/services/startup-bootstrap.test.js dist/renderer/error-recovery.test.js dist/main/anilist-url-guard.test.js dist/window-trackers/x11-tracker.test.js", "test:smoke:dist": "bun run test:config:smoke:dist && bun run test:core:smoke:dist", @@ -84,6 +84,8 @@ "author": "", "license": "GPL-3.0-or-later", "dependencies": { + "@fontsource-variable/geist": "^5.2.8", + "@fontsource-variable/geist-mono": "^5.2.7", "@hono/node-server": "^1.19.11", "axios": "^1.13.5", "commander": "^14.0.3", diff --git a/scripts/update-frequency.ts b/scripts/update-frequency.ts new file mode 100644 index 0000000..25cc7c7 --- /dev/null +++ b/scripts/update-frequency.ts @@ -0,0 +1,141 @@ +#!/usr/bin/env bun +/** + * Backfill frequency_rank in imm_words from a Yomitan-format frequency dictionary. + * + * Usage: + * bun update-frequency.ts + * + * The directory should contain term_meta_bank_*.json files (Yomitan format) + * and optionally an index.json with metadata. + * + * Example dictionaries: JPDB, BCCWJ, Innocent Corpus (in Yomitan format). + */ + +import { readFileSync, readdirSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import Database from 'libsql'; + +const DB_PATH = join( + process.env.HOME ?? '~', + '.config/SubMiner/immersion.sqlite', +); + +function parsePositiveNumber(value: unknown): number | null { + if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) return null; + return Math.floor(value); +} + +function parseDisplayValue(value: unknown): number | null { + if (typeof value === 'string') { + const match = value.trim().match(/^\d+/)?.[0]; + if (!match) return null; + const n = Number.parseInt(match, 10); + return Number.isFinite(n) && n > 0 ? n : null; + } + return parsePositiveNumber(value); +} + +function extractRank(meta: unknown): number | null { + if (!meta || typeof meta !== 'object') return null; + const freq = (meta as Record).frequency; + if (!freq || typeof freq !== 'object') return null; + const f = freq as Record; + return parseDisplayValue(f.displayValue) ?? parsePositiveNumber(f.value); +} + +function loadDictionary(dirPath: string): Map { + const terms = new Map(); + + const files = readdirSync(dirPath) + .filter((f) => /^term_meta_bank.*\.json$/.test(f)) + .sort(); + + if (files.length === 0) { + console.error(`No term_meta_bank_*.json files found in ${dirPath}`); + process.exit(1); + } + + for (const file of files) { + const raw = JSON.parse(readFileSync(join(dirPath, file), 'utf-8')) as unknown[]; + for (const entry of raw) { + if (!Array.isArray(entry) || entry.length < 3) continue; + const [term, , meta] = entry; + if (typeof term !== 'string') continue; + const rank = extractRank(meta); + if (rank === null) continue; + const normalized = term.trim().toLowerCase(); + if (!normalized) continue; + const existing = terms.get(normalized); + if (existing === undefined || rank < existing) { + terms.set(normalized, rank); + } + } + console.log(` Loaded ${file} (${terms.size} terms total)`); + } + + return terms; +} + +function main() { + const dictPath = process.argv[2]; + if (!dictPath) { + console.error('Usage: bun update-frequency.ts '); + console.error(''); + console.error('The directory should contain Yomitan term_meta_bank_*.json files.'); + console.error('Examples: JPDB, BCCWJ, Innocent Corpus frequency lists.'); + process.exit(1); + } + + if (!existsSync(dictPath)) { + console.error(`Directory not found: ${dictPath}`); + process.exit(1); + } + + if (!existsSync(DB_PATH)) { + console.error(`Database not found: ${DB_PATH}`); + process.exit(1); + } + + console.log(`Loading frequency dictionary from ${dictPath}...`); + const dict = loadDictionary(dictPath); + console.log(`Loaded ${dict.size} terms from frequency dictionary.\n`); + + console.log(`Opening database: ${DB_PATH}`); + const db = new Database(DB_PATH); + db.exec('PRAGMA journal_mode = WAL'); + db.exec('PRAGMA foreign_keys = ON'); + + const words = db.prepare('SELECT id, headword, word FROM imm_words').all() as Array<{ + id: number; + headword: string; + word: string; + }>; + console.log(`Found ${words.length} words in imm_words.\n`); + + const updateStmt = db.prepare( + 'UPDATE imm_words SET frequency_rank = ? WHERE id = ? AND (frequency_rank IS NULL OR frequency_rank > ?)', + ); + + let updated = 0; + let matched = 0; + + for (const w of words) { + const headwordNorm = w.headword.trim().toLowerCase(); + const wordNorm = w.word.trim().toLowerCase(); + + const rank = dict.get(headwordNorm) ?? dict.get(wordNorm) ?? null; + if (rank === null) continue; + + matched++; + const result = updateStmt.run(rank, w.id, rank); + if (result.changes > 0) updated++; + } + + console.log(`Matched: ${matched}/${words.length} words found in frequency dictionary`); + console.log(`Updated: ${updated} rows with new or better frequency_rank`); + + db.close(); + console.log('Done.'); +} + +main(); diff --git a/src/anki-integration.test.ts b/src/anki-integration.test.ts index e361182..f1734a8 100644 --- a/src/anki-integration.test.ts +++ b/src/anki-integration.test.ts @@ -56,7 +56,7 @@ function createIntegrationTestContext( const integration = new AnkiIntegration( { - nPlusOne: { + knownWords: { highlightEnabled: options.highlightEnabled ?? true, }, }, diff --git a/src/anki-integration.ts b/src/anki-integration.ts index f79d158..136de04 100644 --- a/src/anki-integration.ts +++ b/src/anki-integration.ts @@ -465,11 +465,11 @@ export class AnkiIntegration { } getKnownWordMatchMode(): NPlusOneMatchMode { - return this.config.nPlusOne?.matchMode ?? DEFAULT_ANKI_CONNECT_CONFIG.nPlusOne.matchMode; + return this.config.knownWords?.matchMode ?? DEFAULT_ANKI_CONNECT_CONFIG.knownWords.matchMode; } private isKnownWordCacheEnabled(): boolean { - return this.config.nPlusOne?.highlightEnabled === true; + return this.config.knownWords?.highlightEnabled === true; } private getConfiguredAnkiTags(): string[] { diff --git a/src/anki-integration/known-word-cache.ts b/src/anki-integration/known-word-cache.ts index b693fb8..abf3ebf 100644 --- a/src/anki-integration/known-word-cache.ts +++ b/src/anki-integration/known-word-cache.ts @@ -203,32 +203,34 @@ export class KnownWordCacheManager { } private isKnownWordCacheEnabled(): boolean { - return this.deps.getConfig().nPlusOne?.highlightEnabled === true; + return this.deps.getConfig().knownWords?.highlightEnabled === true; } private getKnownWordRefreshIntervalMs(): number { - const minutes = this.deps.getConfig().nPlusOne?.refreshMinutes; + const minutes = this.deps.getConfig().knownWords?.refreshMinutes; const safeMinutes = typeof minutes === 'number' && Number.isFinite(minutes) && minutes > 0 ? minutes - : DEFAULT_ANKI_CONNECT_CONFIG.nPlusOne.refreshMinutes; + : DEFAULT_ANKI_CONNECT_CONFIG.knownWords.refreshMinutes; return safeMinutes * 60_000; } private getKnownWordDecks(): string[] { - const configuredDecks = this.deps.getConfig().nPlusOne?.decks; + const configuredDecks = this.deps.getConfig().knownWords?.decks; if (Array.isArray(configuredDecks)) { - const decks = configuredDecks - .filter((entry): entry is string => typeof entry === 'string') - .map((entry) => entry.trim()) - .filter((entry) => entry.length > 0); - return [...new Set(decks)]; + return configuredDecks + .map((deck) => (typeof deck === 'string' ? deck.trim() : '')) + .filter((deck) => deck.length > 0); } const deck = this.deps.getConfig().deck?.trim(); return deck ? [deck] : []; } + private getConfiguredFields(): string[] { + return ['Expression', 'Word', 'Reading', 'Word Reading']; + } + private buildKnownWordsQuery(): string { const decks = this.getKnownWordDecks(); if (decks.length === 0) { @@ -344,8 +346,8 @@ export class KnownWordCacheManager { private extractKnownWordsFromNoteInfo(noteInfo: KnownWordCacheNoteInfo): string[] { const words: string[] = []; - const preferredFields = ['Expression', 'Word']; - for (const preferredField of preferredFields) { + const configuredFields = this.getConfiguredFields(); + for (const preferredField of configuredFields) { const fieldName = resolveFieldName(Object.keys(noteInfo.fields), preferredField); if (!fieldName) continue; diff --git a/src/anki-integration/runtime.test.ts b/src/anki-integration/runtime.test.ts index b3b157d..dbde606 100644 --- a/src/anki-integration/runtime.test.ts +++ b/src/anki-integration/runtime.test.ts @@ -78,7 +78,7 @@ test('AnkiIntegrationRuntime starts proxy transport when proxy mode is enabled', test('AnkiIntegrationRuntime switches transports and clears known words when runtime patch disables highlighting', () => { const { runtime, calls } = createRuntime({ - nPlusOne: { + knownWords: { highlightEnabled: true, }, pollingRate: 250, @@ -88,7 +88,7 @@ test('AnkiIntegrationRuntime switches transports and clears known words when run calls.length = 0; runtime.applyRuntimeConfigPatch({ - nPlusOne: { + knownWords: { highlightEnabled: false, }, proxy: { diff --git a/src/anki-integration/runtime.ts b/src/anki-integration/runtime.ts index 5f3689c..bccabfe 100644 --- a/src/anki-integration/runtime.ts +++ b/src/anki-integration/runtime.ts @@ -86,6 +86,14 @@ export function normalizeAnkiIntegrationConfig(config: AnkiConnectConfig): AnkiC ...DEFAULT_ANKI_CONNECT_CONFIG.media, ...(config.media ?? {}), }, + knownWords: { + ...DEFAULT_ANKI_CONNECT_CONFIG.knownWords, + ...(config.knownWords ?? {}), + }, + nPlusOne: { + ...DEFAULT_ANKI_CONNECT_CONFIG.nPlusOne, + ...(config.nPlusOne ?? {}), + }, behavior: { ...DEFAULT_ANKI_CONNECT_CONFIG.behavior, ...(config.behavior ?? {}), @@ -136,12 +144,19 @@ export class AnkiIntegrationRuntime { } applyRuntimeConfigPatch(patch: Partial): void { - const wasKnownWordCacheEnabled = this.config.nPlusOne?.highlightEnabled === true; + const wasKnownWordCacheEnabled = this.config.knownWords?.highlightEnabled === true; const previousTransportKey = this.getTransportConfigKey(this.config); const mergedConfig: AnkiConnectConfig = { ...this.config, ...patch, + knownWords: + patch.knownWords !== undefined + ? { + ...(this.config.knownWords ?? DEFAULT_ANKI_CONNECT_CONFIG.knownWords), + ...patch.knownWords, + } + : this.config.knownWords, nPlusOne: patch.nPlusOne !== undefined ? { @@ -177,7 +192,7 @@ export class AnkiIntegrationRuntime { this.config = normalizeAnkiIntegrationConfig(mergedConfig); this.deps.onConfigChanged?.(this.config); - if (wasKnownWordCacheEnabled && this.config.nPlusOne?.highlightEnabled === false) { + if (wasKnownWordCacheEnabled && this.config.knownWords?.highlightEnabled === false) { this.deps.knownWordCache.stopLifecycle(); this.deps.knownWordCache.clearKnownWordCacheState(); } else { diff --git a/src/config/config.test.ts b/src/config/config.test.ts index e559de6..b83836f 100644 --- a/src/config/config.test.ts +++ b/src/config/config.test.ts @@ -1363,13 +1363,13 @@ test('runtime options registry is centralized', () => { ]); }); -test('validates ankiConnect n+1 behavior values', () => { +test('validates ankiConnect knownWords behavior values', () => { const dir = makeTempDir(); fs.writeFileSync( path.join(dir, 'config.jsonc'), `{ "ankiConnect": { - "nPlusOne": { + "knownWords": { "highlightEnabled": "yes", "refreshMinutes": -5 } @@ -1383,24 +1383,24 @@ test('validates ankiConnect n+1 behavior values', () => { const warnings = service.getWarnings(); assert.equal( - config.ankiConnect.nPlusOne.highlightEnabled, - DEFAULT_CONFIG.ankiConnect.nPlusOne.highlightEnabled, + config.ankiConnect.knownWords.highlightEnabled, + DEFAULT_CONFIG.ankiConnect.knownWords.highlightEnabled, ); assert.equal( - config.ankiConnect.nPlusOne.refreshMinutes, - DEFAULT_CONFIG.ankiConnect.nPlusOne.refreshMinutes, + config.ankiConnect.knownWords.refreshMinutes, + DEFAULT_CONFIG.ankiConnect.knownWords.refreshMinutes, ); - assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.highlightEnabled')); - assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.refreshMinutes')); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.knownWords.highlightEnabled')); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.knownWords.refreshMinutes')); }); -test('accepts valid ankiConnect n+1 behavior values', () => { +test('accepts valid ankiConnect knownWords behavior values', () => { const dir = makeTempDir(); fs.writeFileSync( path.join(dir, 'config.jsonc'), `{ "ankiConnect": { - "nPlusOne": { + "knownWords": { "highlightEnabled": true, "refreshMinutes": 120 } @@ -1412,8 +1412,8 @@ test('accepts valid ankiConnect n+1 behavior values', () => { const service = new ConfigService(dir); const config = service.getConfig(); - assert.equal(config.ankiConnect.nPlusOne.highlightEnabled, true); - assert.equal(config.ankiConnect.nPlusOne.refreshMinutes, 120); + assert.equal(config.ankiConnect.knownWords.highlightEnabled, true); + assert.equal(config.ankiConnect.knownWords.refreshMinutes, 120); }); test('validates ankiConnect n+1 minimum sentence word count', () => { @@ -1461,13 +1461,13 @@ test('accepts valid ankiConnect n+1 minimum sentence word count', () => { assert.equal(config.ankiConnect.nPlusOne.minSentenceWords, 4); }); -test('validates ankiConnect n+1 match mode values', () => { +test('validates ankiConnect knownWords match mode values', () => { const dir = makeTempDir(); fs.writeFileSync( path.join(dir, 'config.jsonc'), `{ "ankiConnect": { - "nPlusOne": { + "knownWords": { "matchMode": "bad-mode" } } @@ -1480,19 +1480,19 @@ test('validates ankiConnect n+1 match mode values', () => { const warnings = service.getWarnings(); assert.equal( - config.ankiConnect.nPlusOne.matchMode, - DEFAULT_CONFIG.ankiConnect.nPlusOne.matchMode, + config.ankiConnect.knownWords.matchMode, + DEFAULT_CONFIG.ankiConnect.knownWords.matchMode, ); - assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.matchMode')); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.knownWords.matchMode')); }); -test('accepts valid ankiConnect n+1 match mode values', () => { +test('accepts valid ankiConnect knownWords match mode values', () => { const dir = makeTempDir(); fs.writeFileSync( path.join(dir, 'config.jsonc'), `{ "ankiConnect": { - "nPlusOne": { + "knownWords": { "matchMode": "surface" } } @@ -1503,18 +1503,20 @@ test('accepts valid ankiConnect n+1 match mode values', () => { const service = new ConfigService(dir); const config = service.getConfig(); - assert.equal(config.ankiConnect.nPlusOne.matchMode, 'surface'); + assert.equal(config.ankiConnect.knownWords.matchMode, 'surface'); }); -test('validates ankiConnect n+1 color values', () => { +test('validates ankiConnect knownWords and n+1 color values', () => { const dir = makeTempDir(); fs.writeFileSync( path.join(dir, 'config.jsonc'), `{ "ankiConnect": { "nPlusOne": { - "nPlusOne": "not-a-color", - "knownWord": 123 + "nPlusOne": "not-a-color" + }, + "knownWords": { + "color": 123 } } }`, @@ -1527,22 +1529,24 @@ test('validates ankiConnect n+1 color values', () => { assert.equal(config.ankiConnect.nPlusOne.nPlusOne, DEFAULT_CONFIG.ankiConnect.nPlusOne.nPlusOne); assert.equal( - config.ankiConnect.nPlusOne.knownWord, - DEFAULT_CONFIG.ankiConnect.nPlusOne.knownWord, + config.ankiConnect.knownWords.color, + DEFAULT_CONFIG.ankiConnect.knownWords.color, ); assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.nPlusOne')); - assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.knownWord')); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.knownWords.color')); }); -test('accepts valid ankiConnect n+1 color values', () => { +test('accepts valid ankiConnect knownWords and n+1 color values', () => { const dir = makeTempDir(); fs.writeFileSync( path.join(dir, 'config.jsonc'), `{ "ankiConnect": { "nPlusOne": { - "nPlusOne": "#c6a0f6", - "knownWord": "#a6da95" + "nPlusOne": "#c6a0f6" + }, + "knownWords": { + "color": "#a6da95" } } }`, @@ -1553,7 +1557,46 @@ test('accepts valid ankiConnect n+1 color values', () => { const config = service.getConfig(); assert.equal(config.ankiConnect.nPlusOne.nPlusOne, '#c6a0f6'); - assert.equal(config.ankiConnect.nPlusOne.knownWord, '#a6da95'); + assert.equal(config.ankiConnect.knownWords.color, '#a6da95'); +}); + +test('supports legacy ankiConnect nPlusOne known-word settings as fallback', () => { + const dir = makeTempDir(); + fs.writeFileSync( + path.join(dir, 'config.jsonc'), + `{ + "ankiConnect": { + "nPlusOne": { + "highlightEnabled": true, + "refreshMinutes": 90, + "matchMode": "surface", + "decks": ["Mining", "Kaishi 1.5k"], + "knownWord": "#a6da95" + } + } + }`, + 'utf-8', + ); + + const service = new ConfigService(dir); + const config = service.getConfig(); + const warnings = service.getWarnings(); + + assert.equal(config.ankiConnect.knownWords.highlightEnabled, true); + assert.equal(config.ankiConnect.knownWords.refreshMinutes, 90); + assert.equal(config.ankiConnect.knownWords.matchMode, 'surface'); + assert.deepEqual(config.ankiConnect.knownWords.decks, ['Mining', 'Kaishi 1.5k']); + assert.equal(config.ankiConnect.knownWords.color, '#a6da95'); + assert.ok( + warnings.some( + (warning) => + warning.path === 'ankiConnect.nPlusOne.highlightEnabled' || + warning.path === 'ankiConnect.nPlusOne.refreshMinutes' || + warning.path === 'ankiConnect.nPlusOne.matchMode' || + warning.path === 'ankiConnect.nPlusOne.decks' || + warning.path === 'ankiConnect.nPlusOne.knownWord', + ), + ); }); test('supports legacy ankiConnect.behavior N+1 settings as fallback', () => { @@ -1576,9 +1619,9 @@ test('supports legacy ankiConnect.behavior N+1 settings as fallback', () => { const config = service.getConfig(); const warnings = service.getWarnings(); - assert.equal(config.ankiConnect.nPlusOne.highlightEnabled, true); - assert.equal(config.ankiConnect.nPlusOne.refreshMinutes, 90); - assert.equal(config.ankiConnect.nPlusOne.matchMode, 'surface'); + assert.equal(config.ankiConnect.knownWords.highlightEnabled, true); + assert.equal(config.ankiConnect.knownWords.refreshMinutes, 90); + assert.equal(config.ankiConnect.knownWords.matchMode, 'surface'); assert.ok( warnings.some( (warning) => @@ -1799,13 +1842,13 @@ test('ignores deprecated isLapis sentence-card field overrides', () => { ); }); -test('accepts valid ankiConnect n+1 deck list', () => { +test('accepts valid ankiConnect knownWords deck list', () => { const dir = makeTempDir(); fs.writeFileSync( path.join(dir, 'config.jsonc'), `{ "ankiConnect": { - "nPlusOne": { + "knownWords": { "decks": ["Deck One", "Deck Two"] } } @@ -1816,7 +1859,7 @@ test('accepts valid ankiConnect n+1 deck list', () => { const service = new ConfigService(dir); const config = service.getConfig(); - assert.deepEqual(config.ankiConnect.nPlusOne.decks, ['Deck One', 'Deck Two']); + assert.deepEqual(config.ankiConnect.knownWords.decks, ['Deck One', 'Deck Two']); }); test('accepts valid ankiConnect tags list', () => { @@ -1857,13 +1900,13 @@ test('falls back to default when ankiConnect tags list is invalid', () => { assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.tags')); }); -test('falls back to default when ankiConnect n+1 deck list is invalid', () => { +test('falls back to default when ankiConnect knownWords deck list is invalid', () => { const dir = makeTempDir(); fs.writeFileSync( path.join(dir, 'config.jsonc'), `{ "ankiConnect": { - "nPlusOne": { + "knownWords": { "decks": "not-an-array" } } @@ -1875,8 +1918,8 @@ test('falls back to default when ankiConnect n+1 deck list is invalid', () => { const config = service.getConfig(); const warnings = service.getWarnings(); - assert.deepEqual(config.ankiConnect.nPlusOne.decks, []); - assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.decks')); + assert.deepEqual(config.ankiConnect.knownWords.decks, []); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.knownWords.decks')); }); test('template generator includes known keys', () => { @@ -1891,9 +1934,10 @@ test('template generator includes known keys', () => { assert.match(output, /"youtubeSubgen":/); assert.match(output, /"characterDictionary":\s*\{/); assert.match(output, /"preserveLineBreaks": false/); + assert.match(output, /"knownWords"\s*:\s*\{/); + assert.match(output, /"color": "#a6da95"/); assert.match(output, /"nPlusOne"\s*:\s*\{/); assert.match(output, /"nPlusOne": "#c6a0f6"/); - assert.match(output, /"knownWord": "#a6da95"/); assert.match(output, /"minSentenceWords": 3/); assert.match(output, /auto-generated from src\/config\/definitions.ts/); assert.match( diff --git a/src/config/definitions/defaults-integrations.ts b/src/config/definitions/defaults-integrations.ts index e1c9f73..a533c44 100644 --- a/src/config/definitions/defaults-integrations.ts +++ b/src/config/definitions/defaults-integrations.ts @@ -50,6 +50,13 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick< fallbackDuration: 3.0, maxMediaDuration: 30, }, + knownWords: { + highlightEnabled: false, + refreshMinutes: 1440, + matchMode: 'headword', + decks: [], + color: '#a6da95', + }, behavior: { overwriteAudio: true, overwriteImage: true, @@ -59,13 +66,8 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick< autoUpdateNewCards: true, }, nPlusOne: { - highlightEnabled: false, - refreshMinutes: 1440, - matchMode: 'headword', - decks: [], minSentenceWords: 3, nPlusOne: '#c6a0f6', - knownWord: '#a6da95', }, metadata: { pattern: '[SubMiner] %f (%t)', diff --git a/src/config/definitions/defaults-stats.ts b/src/config/definitions/defaults-stats.ts index c0e5169..7d5e6dd 100644 --- a/src/config/definitions/defaults-stats.ts +++ b/src/config/definitions/defaults-stats.ts @@ -3,7 +3,7 @@ import { ResolvedConfig } from '../../types.js'; export const STATS_DEFAULT_CONFIG: Pick = { stats: { toggleKey: 'Backquote', - serverPort: 5175, + serverPort: 6969, autoStartServer: true, autoOpenBrowser: true, }, diff --git a/src/config/definitions/options-integrations.ts b/src/config/definitions/options-integrations.ts index 91947c6..8569369 100644 --- a/src/config/definitions/options-integrations.ts +++ b/src/config/definitions/options-integrations.ts @@ -77,22 +77,22 @@ export function buildIntegrationConfigOptionRegistry( runtime: runtimeOptionById.get('anki.autoUpdateNewCards'), }, { - path: 'ankiConnect.nPlusOne.matchMode', + path: 'ankiConnect.knownWords.matchMode', kind: 'enum', enumValues: ['headword', 'surface'], - defaultValue: defaultConfig.ankiConnect.nPlusOne.matchMode, - description: 'Known-word matching strategy for N+1 highlighting.', + defaultValue: defaultConfig.ankiConnect.knownWords.matchMode, + description: 'Known-word matching strategy for subtitle annotations.', }, { - path: 'ankiConnect.nPlusOne.highlightEnabled', + path: 'ankiConnect.knownWords.highlightEnabled', kind: 'boolean', - defaultValue: defaultConfig.ankiConnect.nPlusOne.highlightEnabled, + defaultValue: defaultConfig.ankiConnect.knownWords.highlightEnabled, description: 'Enable fast local highlighting for words already known in Anki.', }, { - path: 'ankiConnect.nPlusOne.refreshMinutes', + path: 'ankiConnect.knownWords.refreshMinutes', kind: 'number', - defaultValue: defaultConfig.ankiConnect.nPlusOne.refreshMinutes, + defaultValue: defaultConfig.ankiConnect.knownWords.refreshMinutes, description: 'Minutes between known-word cache refreshes.', }, { @@ -102,10 +102,10 @@ export function buildIntegrationConfigOptionRegistry( description: 'Minimum sentence word count required for N+1 targeting (default: 3).', }, { - path: 'ankiConnect.nPlusOne.decks', + path: 'ankiConnect.knownWords.decks', kind: 'array', - defaultValue: defaultConfig.ankiConnect.nPlusOne.decks, - description: 'Decks used for N+1 known-word cache scope. Supports one or more deck names.', + defaultValue: defaultConfig.ankiConnect.knownWords.decks, + description: 'Decks used for known-word cache scope. Supports one or more deck names.', }, { path: 'ankiConnect.nPlusOne.nPlusOne', @@ -114,10 +114,10 @@ export function buildIntegrationConfigOptionRegistry( description: 'Color used for the single N+1 target token highlight.', }, { - path: 'ankiConnect.nPlusOne.knownWord', + path: 'ankiConnect.knownWords.color', kind: 'string', - defaultValue: defaultConfig.ankiConnect.nPlusOne.knownWord, - description: 'Color used for legacy known-word highlights.', + defaultValue: defaultConfig.ankiConnect.knownWords.color, + description: 'Color used for known-word highlights.', }, { path: 'ankiConnect.isKiku.fieldGrouping', diff --git a/src/config/definitions/runtime-options.ts b/src/config/definitions/runtime-options.ts index 58a4b3a..afba727 100644 --- a/src/config/definitions/runtime-options.ts +++ b/src/config/definitions/runtime-options.ts @@ -21,15 +21,19 @@ export function buildRuntimeOptionRegistry( }, { id: 'subtitle.annotation.nPlusOne', - path: 'ankiConnect.nPlusOne.highlightEnabled', + path: 'ankiConnect.knownWords.highlightEnabled', label: 'N+1 Annotation', scope: 'subtitle', valueType: 'boolean', allowedValues: [true, false], - defaultValue: defaultConfig.ankiConnect.nPlusOne.highlightEnabled, + defaultValue: defaultConfig.ankiConnect.knownWords.highlightEnabled, requiresRestart: false, formatValueForOsd: (value) => (value === true ? 'On' : 'Off'), - toAnkiPatch: () => ({}), + toAnkiPatch: (value) => ({ + knownWords: { + highlightEnabled: value === true, + }, + }), }, { id: 'subtitle.annotation.jlpt', @@ -57,16 +61,16 @@ export function buildRuntimeOptionRegistry( }, { id: 'anki.nPlusOneMatchMode', - path: 'ankiConnect.nPlusOne.matchMode', - label: 'N+1 Match Mode', + path: 'ankiConnect.knownWords.matchMode', + label: 'Known Word Match Mode', scope: 'ankiConnect', valueType: 'enum', allowedValues: ['headword', 'surface'], - defaultValue: defaultConfig.ankiConnect.nPlusOne.matchMode, + defaultValue: defaultConfig.ankiConnect.knownWords.matchMode, requiresRestart: false, formatValueForOsd: (value) => String(value), toAnkiPatch: (value) => ({ - nPlusOne: { + knownWords: { matchMode: value === 'headword' || value === 'surface' ? value : 'headword', }, }), diff --git a/src/config/resolve/anki-connect.test.ts b/src/config/resolve/anki-connect.test.ts index 247ec15..36a373f 100644 --- a/src/config/resolve/anki-connect.test.ts +++ b/src/config/resolve/anki-connect.test.ts @@ -20,23 +20,20 @@ function makeContext(ankiConnect: unknown): { return { context, warnings }; } -test('modern invalid nPlusOne.highlightEnabled warns modern key and does not fallback to legacy', () => { +test('modern invalid knownWords.highlightEnabled warns modern key and does not fallback to legacy', () => { const { context, warnings } = makeContext({ - behavior: { nPlusOneHighlightEnabled: true }, - nPlusOne: { highlightEnabled: 'yes' }, + nPlusOne: { highlightEnabled: true }, + knownWords: { highlightEnabled: 'yes' }, }); applyAnkiConnectResolution(context); assert.equal( - context.resolved.ankiConnect.nPlusOne.highlightEnabled, - DEFAULT_CONFIG.ankiConnect.nPlusOne.highlightEnabled, - ); - assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.highlightEnabled')); - assert.equal( - warnings.some((warning) => warning.path === 'ankiConnect.behavior.nPlusOneHighlightEnabled'), - false, + context.resolved.ankiConnect.knownWords.highlightEnabled, + DEFAULT_CONFIG.ankiConnect.knownWords.highlightEnabled, ); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.knownWords.highlightEnabled')); + assert.equal(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.highlightEnabled'), false); }); test('normalizes ankiConnect tags by trimming and deduping', () => { @@ -53,18 +50,18 @@ test('normalizes ankiConnect tags by trimming and deduping', () => { ); }); -test('warns and falls back for invalid nPlusOne.decks entries', () => { +test('warns and falls back for invalid knownWords.decks entries', () => { const { context, warnings } = makeContext({ - nPlusOne: { decks: ['Core Deck', 123] }, + knownWords: { decks: ['Core Deck', 123] }, }); applyAnkiConnectResolution(context); assert.deepEqual( - context.resolved.ankiConnect.nPlusOne.decks, - DEFAULT_CONFIG.ankiConnect.nPlusOne.decks, + context.resolved.ankiConnect.knownWords.decks, + DEFAULT_CONFIG.ankiConnect.knownWords.decks, ); - assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.nPlusOne.decks')); + assert.ok(warnings.some((warning) => warning.path === 'ankiConnect.knownWords.decks')); }); test('accepts valid proxy settings', () => { diff --git a/src/config/resolve/anki-connect.ts b/src/config/resolve/anki-connect.ts index fa286fb..b2e5042 100644 --- a/src/config/resolve/anki-connect.ts +++ b/src/config/resolve/anki-connect.ts @@ -42,12 +42,13 @@ export function applyAnkiConnectResolution(context: ResolveContext): void { ]); const { + knownWords: _knownWordsConfigFromAnkiConnect, nPlusOne: _nPlusOneConfigFromAnkiConnect, ai: _ankiAiConfig, - ...ankiConnectWithoutNPlusOne + ...ankiConnectWithoutKnownWordsOrNPlusOne } = ac as Record; const ankiConnectWithoutLegacy = Object.fromEntries( - Object.entries(ankiConnectWithoutNPlusOne).filter(([key]) => !legacyKeys.has(key)), + Object.entries(ankiConnectWithoutKnownWordsOrNPlusOne).filter(([key]) => !legacyKeys.has(key)), ); context.resolved.ankiConnect = { @@ -67,6 +68,9 @@ export function applyAnkiConnectResolution(context: ResolveContext): void { ? (ac.media as (typeof context.resolved)['ankiConnect']['media']) : {}), }, + knownWords: { + ...context.resolved.ankiConnect.knownWords, + }, behavior: { ...context.resolved.ankiConnect.behavior, ...(isObject(ac.behavior) @@ -620,81 +624,126 @@ export function applyAnkiConnectResolution(context: ResolveContext): void { ); } + const knownWordsConfig = isObject(ac.knownWords) ? (ac.knownWords as Record) : {}; const nPlusOneConfig = isObject(ac.nPlusOne) ? (ac.nPlusOne as Record) : {}; - const nPlusOneHighlightEnabled = asBoolean(nPlusOneConfig.highlightEnabled); - if (nPlusOneHighlightEnabled !== undefined) { - context.resolved.ankiConnect.nPlusOne.highlightEnabled = nPlusOneHighlightEnabled; + const knownWordsHighlightEnabled = asBoolean(knownWordsConfig.highlightEnabled); + const legacyNPlusOneHighlightEnabled = asBoolean(nPlusOneConfig.highlightEnabled); + if (knownWordsHighlightEnabled !== undefined) { + context.resolved.ankiConnect.knownWords.highlightEnabled = knownWordsHighlightEnabled; + } else if (knownWordsConfig.highlightEnabled !== undefined) { + context.warn( + 'ankiConnect.knownWords.highlightEnabled', + knownWordsConfig.highlightEnabled, + context.resolved.ankiConnect.knownWords.highlightEnabled, + 'Expected boolean.', + ); + context.resolved.ankiConnect.knownWords.highlightEnabled = + DEFAULT_CONFIG.ankiConnect.knownWords.highlightEnabled; + } else if (legacyNPlusOneHighlightEnabled !== undefined) { + context.resolved.ankiConnect.knownWords.highlightEnabled = legacyNPlusOneHighlightEnabled; + context.warn( + 'ankiConnect.nPlusOne.highlightEnabled', + nPlusOneConfig.highlightEnabled, + DEFAULT_CONFIG.ankiConnect.knownWords.highlightEnabled, + 'Legacy key is deprecated; use ankiConnect.knownWords.highlightEnabled', + ); } else if (nPlusOneConfig.highlightEnabled !== undefined) { context.warn( 'ankiConnect.nPlusOne.highlightEnabled', nPlusOneConfig.highlightEnabled, - context.resolved.ankiConnect.nPlusOne.highlightEnabled, + context.resolved.ankiConnect.knownWords.highlightEnabled, 'Expected boolean.', ); - context.resolved.ankiConnect.nPlusOne.highlightEnabled = - DEFAULT_CONFIG.ankiConnect.nPlusOne.highlightEnabled; + context.resolved.ankiConnect.knownWords.highlightEnabled = + DEFAULT_CONFIG.ankiConnect.knownWords.highlightEnabled; } else { - const legacyNPlusOneHighlightEnabled = asBoolean(behavior.nPlusOneHighlightEnabled); - if (legacyNPlusOneHighlightEnabled !== undefined) { - context.resolved.ankiConnect.nPlusOne.highlightEnabled = legacyNPlusOneHighlightEnabled; + const legacyBehaviorNPlusOneHighlightEnabled = asBoolean(behavior.nPlusOneHighlightEnabled); + if (legacyBehaviorNPlusOneHighlightEnabled !== undefined) { + context.resolved.ankiConnect.knownWords.highlightEnabled = + legacyBehaviorNPlusOneHighlightEnabled; context.warn( 'ankiConnect.behavior.nPlusOneHighlightEnabled', behavior.nPlusOneHighlightEnabled, - DEFAULT_CONFIG.ankiConnect.nPlusOne.highlightEnabled, - 'Legacy key is deprecated; use ankiConnect.nPlusOne.highlightEnabled', + DEFAULT_CONFIG.ankiConnect.knownWords.highlightEnabled, + 'Legacy key is deprecated; use ankiConnect.knownWords.highlightEnabled', ); } else { - context.resolved.ankiConnect.nPlusOne.highlightEnabled = - DEFAULT_CONFIG.ankiConnect.nPlusOne.highlightEnabled; + context.resolved.ankiConnect.knownWords.highlightEnabled = + DEFAULT_CONFIG.ankiConnect.knownWords.highlightEnabled; } } - const nPlusOneRefreshMinutes = asNumber(nPlusOneConfig.refreshMinutes); - const hasValidNPlusOneRefreshMinutes = - nPlusOneRefreshMinutes !== undefined && - Number.isInteger(nPlusOneRefreshMinutes) && - nPlusOneRefreshMinutes > 0; - if (nPlusOneRefreshMinutes !== undefined) { - if (hasValidNPlusOneRefreshMinutes) { - context.resolved.ankiConnect.nPlusOne.refreshMinutes = nPlusOneRefreshMinutes; + const knownWordsRefreshMinutes = asNumber(knownWordsConfig.refreshMinutes); + const legacyNPlusOneRefreshMinutes = asNumber(nPlusOneConfig.refreshMinutes); + const hasValidKnownWordsRefreshMinutes = + knownWordsRefreshMinutes !== undefined && + Number.isInteger(knownWordsRefreshMinutes) && + knownWordsRefreshMinutes > 0; + const hasValidLegacyNPlusOneRefreshMinutes = + legacyNPlusOneRefreshMinutes !== undefined && + Number.isInteger(legacyNPlusOneRefreshMinutes) && + legacyNPlusOneRefreshMinutes > 0; + if (knownWordsRefreshMinutes !== undefined) { + if (hasValidKnownWordsRefreshMinutes) { + context.resolved.ankiConnect.knownWords.refreshMinutes = knownWordsRefreshMinutes; + } else { + context.warn( + 'ankiConnect.knownWords.refreshMinutes', + knownWordsConfig.refreshMinutes, + context.resolved.ankiConnect.knownWords.refreshMinutes, + 'Expected a positive integer.', + ); + context.resolved.ankiConnect.knownWords.refreshMinutes = + DEFAULT_CONFIG.ankiConnect.knownWords.refreshMinutes; + } + } else if (legacyNPlusOneRefreshMinutes !== undefined) { + if (hasValidLegacyNPlusOneRefreshMinutes) { + context.resolved.ankiConnect.knownWords.refreshMinutes = legacyNPlusOneRefreshMinutes; + context.warn( + 'ankiConnect.nPlusOne.refreshMinutes', + nPlusOneConfig.refreshMinutes, + DEFAULT_CONFIG.ankiConnect.knownWords.refreshMinutes, + 'Legacy key is deprecated; use ankiConnect.knownWords.refreshMinutes', + ); } else { context.warn( 'ankiConnect.nPlusOne.refreshMinutes', nPlusOneConfig.refreshMinutes, - context.resolved.ankiConnect.nPlusOne.refreshMinutes, + context.resolved.ankiConnect.knownWords.refreshMinutes, 'Expected a positive integer.', ); - context.resolved.ankiConnect.nPlusOne.refreshMinutes = - DEFAULT_CONFIG.ankiConnect.nPlusOne.refreshMinutes; + context.resolved.ankiConnect.knownWords.refreshMinutes = + DEFAULT_CONFIG.ankiConnect.knownWords.refreshMinutes; } } else if (asNumber(behavior.nPlusOneRefreshMinutes) !== undefined) { - const legacyNPlusOneRefreshMinutes = asNumber(behavior.nPlusOneRefreshMinutes); + const legacyBehaviorNPlusOneRefreshMinutes = asNumber(behavior.nPlusOneRefreshMinutes); const hasValidLegacyRefreshMinutes = - legacyNPlusOneRefreshMinutes !== undefined && - Number.isInteger(legacyNPlusOneRefreshMinutes) && - legacyNPlusOneRefreshMinutes > 0; + legacyBehaviorNPlusOneRefreshMinutes !== undefined && + Number.isInteger(legacyBehaviorNPlusOneRefreshMinutes) && + legacyBehaviorNPlusOneRefreshMinutes > 0; if (hasValidLegacyRefreshMinutes) { - context.resolved.ankiConnect.nPlusOne.refreshMinutes = legacyNPlusOneRefreshMinutes; + context.resolved.ankiConnect.knownWords.refreshMinutes = + legacyBehaviorNPlusOneRefreshMinutes; context.warn( 'ankiConnect.behavior.nPlusOneRefreshMinutes', behavior.nPlusOneRefreshMinutes, - DEFAULT_CONFIG.ankiConnect.nPlusOne.refreshMinutes, - 'Legacy key is deprecated; use ankiConnect.nPlusOne.refreshMinutes', + DEFAULT_CONFIG.ankiConnect.knownWords.refreshMinutes, + 'Legacy key is deprecated; use ankiConnect.knownWords.refreshMinutes', ); } else { context.warn( 'ankiConnect.behavior.nPlusOneRefreshMinutes', behavior.nPlusOneRefreshMinutes, - context.resolved.ankiConnect.nPlusOne.refreshMinutes, + context.resolved.ankiConnect.knownWords.refreshMinutes, 'Expected a positive integer.', ); - context.resolved.ankiConnect.nPlusOne.refreshMinutes = - DEFAULT_CONFIG.ankiConnect.nPlusOne.refreshMinutes; + context.resolved.ankiConnect.knownWords.refreshMinutes = + DEFAULT_CONFIG.ankiConnect.knownWords.refreshMinutes; } } else { - context.resolved.ankiConnect.nPlusOne.refreshMinutes = - DEFAULT_CONFIG.ankiConnect.nPlusOne.refreshMinutes; + context.resolved.ankiConnect.knownWords.refreshMinutes = + DEFAULT_CONFIG.ankiConnect.knownWords.refreshMinutes; } const nPlusOneMinSentenceWords = asNumber(nPlusOneConfig.minSentenceWords); @@ -720,72 +769,137 @@ export function applyAnkiConnectResolution(context: ResolveContext): void { DEFAULT_CONFIG.ankiConnect.nPlusOne.minSentenceWords; } - const nPlusOneMatchMode = asString(nPlusOneConfig.matchMode); - const legacyNPlusOneMatchMode = asString(behavior.nPlusOneMatchMode); - const hasValidNPlusOneMatchMode = - nPlusOneMatchMode === 'headword' || nPlusOneMatchMode === 'surface'; - const hasValidLegacyMatchMode = + const knownWordsMatchMode = asString(knownWordsConfig.matchMode); + const legacyNPlusOneMatchMode = asString(nPlusOneConfig.matchMode); + const legacyBehaviorNPlusOneMatchMode = asString(behavior.nPlusOneMatchMode); + const hasValidKnownWordsMatchMode = + knownWordsMatchMode === 'headword' || knownWordsMatchMode === 'surface'; + const hasValidLegacyNPlusOneMatchMode = legacyNPlusOneMatchMode === 'headword' || legacyNPlusOneMatchMode === 'surface'; - if (hasValidNPlusOneMatchMode) { - context.resolved.ankiConnect.nPlusOne.matchMode = nPlusOneMatchMode; - } else if (nPlusOneMatchMode !== undefined) { + const hasValidLegacyMatchMode = + legacyBehaviorNPlusOneMatchMode === 'headword' || legacyBehaviorNPlusOneMatchMode === 'surface'; + if (hasValidKnownWordsMatchMode) { + context.resolved.ankiConnect.knownWords.matchMode = knownWordsMatchMode; + } else if (knownWordsMatchMode !== undefined) { context.warn( - 'ankiConnect.nPlusOne.matchMode', - nPlusOneConfig.matchMode, - DEFAULT_CONFIG.ankiConnect.nPlusOne.matchMode, + 'ankiConnect.knownWords.matchMode', + knownWordsConfig.matchMode, + DEFAULT_CONFIG.ankiConnect.knownWords.matchMode, "Expected 'headword' or 'surface'.", ); - context.resolved.ankiConnect.nPlusOne.matchMode = DEFAULT_CONFIG.ankiConnect.nPlusOne.matchMode; + context.resolved.ankiConnect.knownWords.matchMode = + DEFAULT_CONFIG.ankiConnect.knownWords.matchMode; } else if (legacyNPlusOneMatchMode !== undefined) { + if (hasValidLegacyNPlusOneMatchMode) { + context.resolved.ankiConnect.knownWords.matchMode = legacyNPlusOneMatchMode; + context.warn( + 'ankiConnect.nPlusOne.matchMode', + nPlusOneConfig.matchMode, + DEFAULT_CONFIG.ankiConnect.knownWords.matchMode, + 'Legacy key is deprecated; use ankiConnect.knownWords.matchMode', + ); + } else { + context.warn( + 'ankiConnect.nPlusOne.matchMode', + nPlusOneConfig.matchMode, + context.resolved.ankiConnect.knownWords.matchMode, + "Expected 'headword' or 'surface'.", + ); + context.resolved.ankiConnect.knownWords.matchMode = + DEFAULT_CONFIG.ankiConnect.knownWords.matchMode; + } + } else if (legacyBehaviorNPlusOneMatchMode !== undefined) { if (hasValidLegacyMatchMode) { - context.resolved.ankiConnect.nPlusOne.matchMode = legacyNPlusOneMatchMode; + context.resolved.ankiConnect.knownWords.matchMode = legacyBehaviorNPlusOneMatchMode; context.warn( 'ankiConnect.behavior.nPlusOneMatchMode', behavior.nPlusOneMatchMode, - DEFAULT_CONFIG.ankiConnect.nPlusOne.matchMode, - 'Legacy key is deprecated; use ankiConnect.nPlusOne.matchMode', + DEFAULT_CONFIG.ankiConnect.knownWords.matchMode, + 'Legacy key is deprecated; use ankiConnect.knownWords.matchMode', ); } else { context.warn( 'ankiConnect.behavior.nPlusOneMatchMode', behavior.nPlusOneMatchMode, - context.resolved.ankiConnect.nPlusOne.matchMode, + context.resolved.ankiConnect.knownWords.matchMode, "Expected 'headword' or 'surface'.", ); - context.resolved.ankiConnect.nPlusOne.matchMode = - DEFAULT_CONFIG.ankiConnect.nPlusOne.matchMode; + context.resolved.ankiConnect.knownWords.matchMode = + DEFAULT_CONFIG.ankiConnect.knownWords.matchMode; } } else { - context.resolved.ankiConnect.nPlusOne.matchMode = DEFAULT_CONFIG.ankiConnect.nPlusOne.matchMode; + context.resolved.ankiConnect.knownWords.matchMode = + DEFAULT_CONFIG.ankiConnect.knownWords.matchMode; } - const nPlusOneDecks = nPlusOneConfig.decks; - if (Array.isArray(nPlusOneDecks)) { - const normalizedDecks = nPlusOneDecks + const knownWordsDecks = knownWordsConfig.decks; + const legacyNPlusOneDecks = nPlusOneConfig.decks; + if (Array.isArray(knownWordsDecks)) { + const normalizedDecks = knownWordsDecks .filter((entry): entry is string => typeof entry === 'string') .map((entry) => entry.trim()) .filter((entry) => entry.length > 0); - if (normalizedDecks.length === nPlusOneDecks.length) { - context.resolved.ankiConnect.nPlusOne.decks = [...new Set(normalizedDecks)]; - } else if (nPlusOneDecks.length > 0) { + if (normalizedDecks.length === knownWordsDecks.length) { + context.resolved.ankiConnect.knownWords.decks = [...new Set(normalizedDecks)]; + } else if (knownWordsDecks.length > 0) { context.warn( - 'ankiConnect.nPlusOne.decks', - nPlusOneDecks, - context.resolved.ankiConnect.nPlusOne.decks, + 'ankiConnect.knownWords.decks', + knownWordsDecks, + context.resolved.ankiConnect.knownWords.decks, 'Expected an array of strings.', ); + context.resolved.ankiConnect.knownWords.decks = DEFAULT_CONFIG.ankiConnect.knownWords.decks; } else { - context.resolved.ankiConnect.nPlusOne.decks = []; + context.resolved.ankiConnect.knownWords.decks = []; } - } else if (nPlusOneDecks !== undefined) { + } else if (knownWordsDecks !== undefined) { context.warn( - 'ankiConnect.nPlusOne.decks', - nPlusOneDecks, - context.resolved.ankiConnect.nPlusOne.decks, + 'ankiConnect.knownWords.decks', + knownWordsDecks, + context.resolved.ankiConnect.knownWords.decks, 'Expected an array of strings.', ); - context.resolved.ankiConnect.nPlusOne.decks = []; + context.resolved.ankiConnect.knownWords.decks = DEFAULT_CONFIG.ankiConnect.knownWords.decks; + } else if (Array.isArray(legacyNPlusOneDecks)) { + const normalizedDecks = legacyNPlusOneDecks + .filter((entry): entry is string => typeof entry === 'string') + .map((entry) => entry.trim()) + .filter((entry) => entry.length > 0); + + if (normalizedDecks.length === legacyNPlusOneDecks.length) { + context.resolved.ankiConnect.knownWords.decks = [...new Set(normalizedDecks)]; + context.warn( + 'ankiConnect.nPlusOne.decks', + legacyNPlusOneDecks, + DEFAULT_CONFIG.ankiConnect.knownWords.decks, + 'Legacy key is deprecated; use ankiConnect.knownWords.decks', + ); + } else if (legacyNPlusOneDecks.length > 0) { + context.warn( + 'ankiConnect.nPlusOne.decks', + legacyNPlusOneDecks, + context.resolved.ankiConnect.knownWords.decks, + 'Expected an array of strings.', + ); + context.resolved.ankiConnect.knownWords.decks = DEFAULT_CONFIG.ankiConnect.knownWords.decks; + } else { + context.resolved.ankiConnect.knownWords.decks = []; + context.warn( + 'ankiConnect.nPlusOne.decks', + legacyNPlusOneDecks, + DEFAULT_CONFIG.ankiConnect.knownWords.decks, + 'Legacy key is deprecated; use ankiConnect.knownWords.decks', + ); + } + } else if (legacyNPlusOneDecks !== undefined) { + context.warn( + 'ankiConnect.nPlusOne.decks', + legacyNPlusOneDecks, + context.resolved.ankiConnect.knownWords.decks, + 'Expected an array of strings.', + ); + context.resolved.ankiConnect.knownWords.decks = DEFAULT_CONFIG.ankiConnect.knownWords.decks; } const nPlusOneHighlightColor = asColor(nPlusOneConfig.nPlusOne); @@ -801,17 +915,34 @@ export function applyAnkiConnectResolution(context: ResolveContext): void { context.resolved.ankiConnect.nPlusOne.nPlusOne = DEFAULT_CONFIG.ankiConnect.nPlusOne.nPlusOne; } - const nPlusOneKnownWordColor = asColor(nPlusOneConfig.knownWord); - if (nPlusOneKnownWordColor !== undefined) { - context.resolved.ankiConnect.nPlusOne.knownWord = nPlusOneKnownWordColor; + const knownWordsColor = asColor(knownWordsConfig.color); + const legacyNPlusOneKnownWordColor = asColor(nPlusOneConfig.knownWord); + if (knownWordsColor !== undefined) { + context.resolved.ankiConnect.knownWords.color = knownWordsColor; + } else if (knownWordsConfig.color !== undefined) { + context.warn( + 'ankiConnect.knownWords.color', + knownWordsConfig.color, + context.resolved.ankiConnect.knownWords.color, + 'Expected a hex color value.', + ); + context.resolved.ankiConnect.knownWords.color = DEFAULT_CONFIG.ankiConnect.knownWords.color; + } else if (legacyNPlusOneKnownWordColor !== undefined) { + context.resolved.ankiConnect.knownWords.color = legacyNPlusOneKnownWordColor; + context.warn( + 'ankiConnect.nPlusOne.knownWord', + nPlusOneConfig.knownWord, + DEFAULT_CONFIG.ankiConnect.knownWords.color, + 'Legacy key is deprecated; use ankiConnect.knownWords.color', + ); } else if (nPlusOneConfig.knownWord !== undefined) { context.warn( 'ankiConnect.nPlusOne.knownWord', nPlusOneConfig.knownWord, - context.resolved.ankiConnect.nPlusOne.knownWord, + context.resolved.ankiConnect.knownWords.color, 'Expected a hex color value.', ); - context.resolved.ankiConnect.nPlusOne.knownWord = DEFAULT_CONFIG.ankiConnect.nPlusOne.knownWord; + context.resolved.ankiConnect.knownWords.color = DEFAULT_CONFIG.ankiConnect.knownWords.color; } if ( diff --git a/src/core/services/__tests__/stats-server.test.ts b/src/core/services/__tests__/stats-server.test.ts index 8048188..03e441f 100644 --- a/src/core/services/__tests__/stats-server.test.ts +++ b/src/core/services/__tests__/stats-server.test.ts @@ -53,6 +53,7 @@ const VOCABULARY_STATS = [ pos2: '自立', pos3: null, frequency: 100, + frequencyRank: 42, firstSeen: Date.now(), lastSeen: Date.now(), }, @@ -132,9 +133,7 @@ const EPISODES_PER_DAY = [ { epochDay: Math.floor(Date.now() / 86_400_000), episodeCount: 1 }, ]; -const NEW_ANIME_PER_DAY = [ - { epochDay: Math.floor(Date.now() / 86_400_000) - 2, newAnimeCount: 2 }, -]; +const NEW_ANIME_PER_DAY = [{ epochDay: Math.floor(Date.now() / 86_400_000) - 2, newAnimeCount: 2 }]; const WATCH_TIME_PER_ANIME = [ { @@ -210,7 +209,12 @@ function createMockTracker( getSessionSummaries: async () => SESSION_SUMMARIES, getDailyRollups: async () => DAILY_ROLLUPS, getMonthlyRollups: async () => [], - getQueryHints: async () => ({ totalSessions: 5, activeSessions: 1, episodesToday: 2, activeAnimeCount: 3 }), + getQueryHints: async () => ({ + totalSessions: 5, + activeSessions: 1, + episodesToday: 2, + activeAnimeCount: 3, + }), getSessionTimeline: async () => [], getSessionEvents: async () => [], getVocabularyStats: async () => VOCABULARY_STATS, @@ -445,7 +449,9 @@ describe('stats server API routes', () => { }), ); - const res = await app.request('/api/stats/kanji/occurrences?kanji=%E6%97%A5&limit=999999&offset=10'); + const res = await app.request( + '/api/stats/kanji/occurrences?kanji=%E6%97%A5&limit=999999&offset=10', + ); assert.equal(res.status, 200); const body = await res.json(); assert.ok(Array.isArray(body)); @@ -711,6 +717,23 @@ describe('stats server API routes', () => { assert.equal(res.status, 400); }); + it('DELETE /api/stats/sessions/:sessionId deletes a session', async () => { + let deletedSessionId = 0; + const app = createStatsApp( + createMockTracker({ + deleteSession: async (sessionId: number) => { + deletedSessionId = sessionId; + }, + }), + ); + + const res = await app.request('/api/stats/sessions/42', { method: 'DELETE' }); + + assert.equal(res.status, 200); + assert.equal(deletedSessionId, 42); + assert.deepEqual(await res.json(), { ok: true }); + }); + it('POST /api/stats/anki/browse returns 400 for missing noteId', async () => { const app = createStatsApp(createMockTracker()); const res = await app.request('/api/stats/anki/browse', { method: 'POST' }); diff --git a/src/core/services/frequency-dictionary.test.ts b/src/core/services/frequency-dictionary.test.ts index fde94ce..e7f32fb 100644 --- a/src/core/services/frequency-dictionary.test.ts +++ b/src/core/services/frequency-dictionary.test.ts @@ -130,6 +130,56 @@ test('createFrequencyDictionaryLookup parses composite displayValue by primary r assert.equal(lookup('高み'), 9933); }); +test('createFrequencyDictionaryLookup uses leading display digits for displayValue strings', async () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-')); + const bankPath = path.join(tempDir, 'term_meta_bank_1.json'); + fs.writeFileSync( + bankPath, + JSON.stringify([ + ['潜む', 1, { frequency: { value: 121, displayValue: '118,121' } }], + ['例', 2, { frequency: { value: 1234, displayValue: '1,234' } }], + ]), + ); + + const lookup = await createFrequencyDictionaryLookup({ + searchPaths: [tempDir], + log: () => undefined, + }); + + assert.equal(lookup('潜む'), 118); + assert.equal(lookup('例'), 1); +}); + +test('createFrequencyDictionaryLookup ignores occurrence-based Yomitan dictionaries', async () => { + const logs: string[] = []; + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-')); + fs.writeFileSync( + path.join(tempDir, 'index.json'), + JSON.stringify({ + title: 'CC100', + revision: '1', + frequencyMode: 'occurrence-based', + }), + ); + fs.writeFileSync( + path.join(tempDir, 'term_meta_bank_1.json'), + JSON.stringify([['潜む', 1, { frequency: { value: 118121 } }]]), + ); + + const lookup = await createFrequencyDictionaryLookup({ + searchPaths: [tempDir], + log: (message) => { + logs.push(message); + }, + }); + + assert.equal(lookup('潜む'), null); + assert.equal( + logs.some((entry) => entry.includes('occurrence-based') && entry.includes('CC100')), + true, + ); +}); + test('createFrequencyDictionaryLookup does not require synchronous fs APIs', async () => { const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-')); const bankPath = path.join(tempDir, 'term_meta_bank_1.json'); diff --git a/src/core/services/frequency-dictionary.ts b/src/core/services/frequency-dictionary.ts index f814710..60593d3 100644 --- a/src/core/services/frequency-dictionary.ts +++ b/src/core/services/frequency-dictionary.ts @@ -6,6 +6,8 @@ export interface FrequencyDictionaryLookupOptions { log: (message: string) => void; } +type FrequencyDictionaryMode = 'occurrence-based' | 'rank-based'; + interface FrequencyDictionaryEntry { rank: number; term: string; @@ -29,30 +31,67 @@ function normalizeFrequencyTerm(value: string): string { return value.trim().toLowerCase(); } +async function readDictionaryMetadata( + dictionaryPath: string, + log: (message: string) => void, +): Promise<{ title: string | null; frequencyMode: FrequencyDictionaryMode | null }> { + const indexPath = path.join(dictionaryPath, 'index.json'); + let rawText: string; + try { + rawText = await fs.readFile(indexPath, 'utf-8'); + } catch (error) { + if (isErrorCode(error, 'ENOENT')) { + return { title: null, frequencyMode: null }; + } + log(`Failed to read frequency dictionary index ${indexPath}: ${String(error)}`); + return { title: null, frequencyMode: null }; + } + + let rawIndex: unknown; + try { + rawIndex = JSON.parse(rawText) as unknown; + } catch { + log(`Failed to parse frequency dictionary index as JSON: ${indexPath}`); + return { title: null, frequencyMode: null }; + } + + if (!rawIndex || typeof rawIndex !== 'object') { + return { title: null, frequencyMode: null }; + } + + const titleRaw = (rawIndex as { title?: unknown }).title; + const frequencyModeRaw = (rawIndex as { frequencyMode?: unknown }).frequencyMode; + return { + title: typeof titleRaw === 'string' && titleRaw.trim().length > 0 ? titleRaw.trim() : null, + frequencyMode: + frequencyModeRaw === 'occurrence-based' || frequencyModeRaw === 'rank-based' + ? frequencyModeRaw + : null, + }; +} + function parsePositiveFrequencyString(value: string): number | null { const trimmed = value.trim(); if (!trimmed) { return null; } - const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0]; - if (!numericPrefix) { + const numericMatch = trimmed.match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0]; + if (!numericMatch) { return null; } - const chunks = numericPrefix.split(','); - const normalizedNumber = - chunks.length <= 1 - ? (chunks[0] ?? '') - : chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk)) - ? chunks.join('') - : (chunks[0] ?? ''); - const parsed = Number.parseInt(normalizedNumber, 10); + const parsed = Number.parseFloat(numericMatch); if (!Number.isFinite(parsed) || parsed <= 0) { return null; } - return parsed; + const normalized = Math.floor(parsed); + if (!Number.isFinite(normalized) || normalized <= 0) { + return null; + } + + return normalized; } function parsePositiveFrequencyNumber(value: unknown): number | null { @@ -68,18 +107,32 @@ function parsePositiveFrequencyNumber(value: unknown): number | null { return null; } +function parseDisplayFrequencyNumber(value: unknown): number | null { + if (typeof value === 'string') { + const leadingDigits = value.trim().match(/^\d+/)?.[0]; + if (!leadingDigits) { + return null; + } + const parsed = Number.parseInt(leadingDigits, 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : null; + } + + return parsePositiveFrequencyNumber(value); +} + function extractFrequencyDisplayValue(meta: unknown): number | null { if (!meta || typeof meta !== 'object') return null; const frequency = (meta as { frequency?: unknown }).frequency; if (!frequency || typeof frequency !== 'object') return null; + const rawValue = (frequency as { value?: unknown }).value; + const parsedRawValue = parsePositiveFrequencyNumber(rawValue); const displayValue = (frequency as { displayValue?: unknown }).displayValue; - const parsedDisplayValue = parsePositiveFrequencyNumber(displayValue); + const parsedDisplayValue = parseDisplayFrequencyNumber(displayValue); if (parsedDisplayValue !== null) { return parsedDisplayValue; } - const rawValue = (frequency as { value?: unknown }).value; - return parsePositiveFrequencyNumber(rawValue); + return parsedRawValue; } function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry | null { @@ -141,6 +194,15 @@ async function collectDictionaryFromPath( log: (message: string) => void, ): Promise> { const terms = new Map(); + const metadata = await readDictionaryMetadata(dictionaryPath, log); + if (metadata.frequencyMode === 'occurrence-based') { + log( + `Skipping occurrence-based frequency dictionary ${ + metadata.title ?? dictionaryPath + }; SubMiner frequency tags require rank-based values.`, + ); + return terms; + } let fileNames: string[]; try { diff --git a/src/core/services/immersion-tracker-service.ts b/src/core/services/immersion-tracker-service.ts index dccf784..3942f47 100644 --- a/src/core/services/immersion-tracker-service.ts +++ b/src/core/services/immersion-tracker-service.ts @@ -57,6 +57,8 @@ import { getWordOccurrences, getVideoDurationMs, markVideoWatched, + deleteSession as deleteSessionQuery, + deleteVideo as deleteVideoQuery, } from './immersion-tracker/query'; import { buildVideoKey, @@ -125,6 +127,7 @@ import { type WordDetailRow, type WordOccurrenceRow, type VocabularyStatsRow, + type CountedWordOccurrence, } from './immersion-tracker/types'; import type { MergedToken } from '../../types'; import { shouldExcludeTokenFromVocabularyPersistence } from './tokenizer/annotation-stage'; @@ -402,6 +405,70 @@ export class ImmersionTrackerService { markVideoWatched(this.db, videoId, watched); } + async deleteSession(sessionId: number): Promise { + deleteSessionQuery(this.db, sessionId); + } + + async deleteVideo(videoId: number): Promise { + deleteVideoQuery(this.db, videoId); + } + + async reassignAnimeAnilist(animeId: number, info: { + anilistId: number; + titleRomaji?: string | null; + titleEnglish?: string | null; + titleNative?: string | null; + episodesTotal?: number | null; + description?: string | null; + coverUrl?: string | null; + }): Promise { + this.db.prepare(` + UPDATE imm_anime + SET anilist_id = ?, + title_romaji = COALESCE(?, title_romaji), + title_english = COALESCE(?, title_english), + title_native = COALESCE(?, title_native), + episodes_total = COALESCE(?, episodes_total), + description = ?, + LAST_UPDATE_DATE = ? + WHERE anime_id = ? + `).run( + info.anilistId, + info.titleRomaji ?? null, + info.titleEnglish ?? null, + info.titleNative ?? null, + info.episodesTotal ?? null, + info.description ?? null, + Date.now(), + animeId, + ); + + // Update cover art for all videos in this anime + if (info.coverUrl) { + const videos = this.db.prepare('SELECT video_id FROM imm_videos WHERE anime_id = ?') + .all(animeId) as Array<{ video_id: number }>; + let coverBlob: Buffer | null = null; + try { + const res = await fetch(info.coverUrl); + if (res.ok) coverBlob = Buffer.from(await res.arrayBuffer()); + } catch { /* ignore */ } + for (const v of videos) { + this.db.prepare(` + INSERT INTO imm_media_art (video_id, anilist_id, cover_url, cover_blob, title_romaji, title_english, episodes_total, fetched_at_ms, CREATED_DATE, LAST_UPDATE_DATE) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(video_id) DO UPDATE SET + anilist_id = excluded.anilist_id, cover_url = excluded.cover_url, cover_blob = COALESCE(excluded.cover_blob, cover_blob), + title_romaji = excluded.title_romaji, title_english = excluded.title_english, episodes_total = excluded.episodes_total, + fetched_at_ms = excluded.fetched_at_ms, LAST_UPDATE_DATE = excluded.LAST_UPDATE_DATE + `).run( + v.video_id, info.anilistId, info.coverUrl, coverBlob, + info.titleRomaji ?? null, info.titleEnglish ?? null, info.episodesTotal ?? null, + Date.now(), Date.now(), Date.now(), + ); + } + } + } + async getEpisodeCardEvents(videoId: number): Promise { return getEpisodeCardEvents(this.db, videoId); } @@ -571,19 +638,7 @@ export class ImmersionTrackerService { this.sessionState.tokensSeen += metrics.tokens; this.sessionState.pendingTelemetry = true; - const wordOccurrences = new Map< - string, - { - headword: string; - word: string; - reading: string; - partOfSpeech: string; - pos1: string; - pos2: string; - pos3: string; - occurrenceCount: number; - } - >(); + const wordOccurrences = new Map(); for (const token of tokens ?? []) { if (shouldExcludeTokenFromVocabularyPersistence(token)) { continue; @@ -617,6 +672,7 @@ export class ImmersionTrackerService { pos2: token.pos2 ?? '', pos3: token.pos3 ?? '', occurrenceCount: 1, + frequencyRank: token.frequencyRank ?? null, }); } diff --git a/src/core/services/immersion-tracker/__tests__/query.test.ts b/src/core/services/immersion-tracker/__tests__/query.test.ts index 0ac6265..42abba5 100644 --- a/src/core/services/immersion-tracker/__tests__/query.test.ts +++ b/src/core/services/immersion-tracker/__tests__/query.test.ts @@ -14,6 +14,7 @@ import { import { startSessionRecord } from '../session.js'; import { cleanupVocabularyStats, + deleteSession, getAnimeDetail, getAnimeEpisodes, getAnimeLibrary, @@ -295,35 +296,32 @@ test('cleanupVocabularyStats repairs stored POS metadata and removes excluded im { headword: '旧', frequency: 1 }, ], ); - assert.deepEqual( - repairedRows, - [ - { - headword: '旧', - word: '旧', - reading: 'きゅう', - part_of_speech: 'noun', - pos1: '名詞', - pos2: '一般', - }, - { - headword: '猫', - word: '猫', - reading: 'ねこ', - part_of_speech: 'noun', - pos1: '名詞', - pos2: '一般', - }, - { - headword: '知る', - word: '知っている', - reading: 'しっている', - part_of_speech: 'verb', - pos1: '動詞', - pos2: '自立', - }, - ], - ); + assert.deepEqual(repairedRows, [ + { + headword: '旧', + word: '旧', + reading: 'きゅう', + part_of_speech: 'noun', + pos1: '名詞', + pos2: '一般', + }, + { + headword: '猫', + word: '猫', + reading: 'ねこ', + part_of_speech: 'noun', + pos1: '名詞', + pos2: '一般', + }, + { + headword: '知る', + word: '知っている', + reading: 'しっている', + part_of_speech: 'verb', + pos1: '動詞', + pos2: '自立', + }, + ]); } finally { db.close(); cleanupDbPath(dbPath); @@ -708,7 +706,7 @@ test('anime-level queries group by anime_id and preserve episode-level rows', () canonicalTitle: 'Frieren', anilistId: 52_921, titleRomaji: 'Sousou no Frieren', - titleEnglish: 'Frieren: Beyond Journey\'s End', + titleEnglish: "Frieren: Beyond Journey's End", titleNative: '葬送のフリーレン', metadataJson: '{"source":"anilist"}', }); @@ -1070,3 +1068,151 @@ test('getKanjiOccurrences maps a kanji back to anime, video, and subtitle line c cleanupDbPath(dbPath); } }); + +test('deleteSession removes the session and all associated session-scoped rows', () => { + const dbPath = makeDbPath(); + const db = new Database(dbPath); + + try { + ensureSchema(db); + const stmts = createTrackerPreparedStatements(db); + + const videoId = getOrCreateVideoRecord(db, 'local:/tmp/delete-session.mkv', { + canonicalTitle: 'Delete Session Test', + sourcePath: '/tmp/delete-session.mkv', + sourceUrl: null, + sourceType: SOURCE_TYPE_LOCAL, + }); + + const startedAtMs = 6_000_000; + const { sessionId } = startSessionRecord(db, videoId, startedAtMs); + + stmts.telemetryInsertStmt.run( + sessionId, + startedAtMs + 1_000, + 5_000, + 4_000, + 3, + 9, + 9, + 1, + 2, + 1, + 0, + 0, + 0, + 0, + 0, + startedAtMs + 1_000, + startedAtMs + 1_000, + ); + const eventResult = stmts.eventInsertStmt.run( + sessionId, + startedAtMs + 1_500, + EVENT_SUBTITLE_LINE, + 0, + 0, + 900, + 2, + 0, + '{"line":"delete me"}', + startedAtMs + 1_500, + startedAtMs + 1_500, + ); + const eventId = Number(eventResult.lastInsertRowid); + const wordResult = db + .prepare( + `INSERT INTO imm_words ( + headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + ) + .run('削除', '削除', 'さくじょ', 'noun', '名詞', '一般', '', startedAtMs, startedAtMs, 1); + const kanjiResult = db + .prepare( + `INSERT INTO imm_kanji ( + kanji, first_seen, last_seen, frequency + ) VALUES (?, ?, ?, ?)`, + ) + .run('削', startedAtMs, startedAtMs, 1); + const lineResult = stmts.subtitleLineInsertStmt.run( + sessionId, + eventId, + videoId, + null, + 0, + 0, + 900, + 'delete me', + startedAtMs + 1_500, + startedAtMs + 1_500, + ); + const lineId = Number(lineResult.lastInsertRowid); + db.prepare( + `INSERT INTO imm_word_line_occurrences (line_id, word_id, occurrence_count) + VALUES (?, ?, ?)`, + ).run(lineId, Number(wordResult.lastInsertRowid), 1); + db.prepare( + `INSERT INTO imm_kanji_line_occurrences (line_id, kanji_id, occurrence_count) + VALUES (?, ?, ?)`, + ).run(lineId, Number(kanjiResult.lastInsertRowid), 1); + + deleteSession(db, sessionId); + + const sessionCount = Number( + ( + db + .prepare('SELECT COUNT(*) AS total FROM imm_sessions WHERE session_id = ?') + .get(sessionId) as { + total: number; + } + ).total, + ); + const telemetryCount = Number( + ( + db + .prepare('SELECT COUNT(*) AS total FROM imm_session_telemetry WHERE session_id = ?') + .get(sessionId) as { total: number } + ).total, + ); + const eventCount = Number( + ( + db + .prepare('SELECT COUNT(*) AS total FROM imm_session_events WHERE session_id = ?') + .get(sessionId) as { + total: number; + } + ).total, + ); + const subtitleLineCount = Number( + ( + db + .prepare('SELECT COUNT(*) AS total FROM imm_subtitle_lines WHERE session_id = ?') + .get(sessionId) as { total: number } + ).total, + ); + const wordOccurrenceCount = Number( + ( + db + .prepare('SELECT COUNT(*) AS total FROM imm_word_line_occurrences WHERE line_id = ?') + .get(lineId) as { total: number } + ).total, + ); + const kanjiOccurrenceCount = Number( + ( + db + .prepare('SELECT COUNT(*) AS total FROM imm_kanji_line_occurrences WHERE line_id = ?') + .get(lineId) as { total: number } + ).total, + ); + + assert.equal(sessionCount, 0); + assert.equal(telemetryCount, 0); + assert.equal(eventCount, 0); + assert.equal(subtitleLineCount, 0); + assert.equal(wordOccurrenceCount, 0); + assert.equal(kanjiOccurrenceCount, 0); + } finally { + db.close(); + cleanupDbPath(dbPath); + } +}); diff --git a/src/core/services/immersion-tracker/query.ts b/src/core/services/immersion-tracker/query.ts index 19545c5..c707021 100644 --- a/src/core/services/immersion-tracker/query.ts +++ b/src/core/services/immersion-tracker/query.ts @@ -223,7 +223,8 @@ export function getVocabularyStats( const stmt = db.prepare(` SELECT id AS wordId, headword, word, reading, part_of_speech AS partOfSpeech, pos1, pos2, pos3, - frequency, first_seen AS firstSeen, last_seen AS lastSeen + frequency, frequency_rank AS frequencyRank, + first_seen AS firstSeen, last_seen AS lastSeen FROM imm_words ${whereClause} ORDER BY frequency DESC LIMIT ? `); const params = hasExclude ? [...excludePos, limit] : [limit]; @@ -632,6 +633,7 @@ export function getAnimeDetail(db: DatabaseSync, animeId: number): AnimeDetailRo a.title_romaji AS titleRomaji, a.title_english AS titleEnglish, a.title_native AS titleNative, + a.description AS description, COUNT(DISTINCT s.session_id) AS totalSessions, COALESCE(SUM(sm.max_active_ms), 0) AS totalActiveMs, COALESCE(SUM(sm.max_cards), 0) AS totalCards, @@ -1165,3 +1167,22 @@ export function isVideoWatched(db: DatabaseSync, videoId: number): boolean { } | null; return row?.watched === 1; } + +export function deleteSession(db: DatabaseSync, sessionId: number): void { + db.prepare('DELETE FROM imm_subtitle_lines WHERE session_id = ?').run(sessionId); + db.prepare('DELETE FROM imm_session_telemetry WHERE session_id = ?').run(sessionId); + db.prepare('DELETE FROM imm_session_events WHERE session_id = ?').run(sessionId); + db.prepare('DELETE FROM imm_sessions WHERE session_id = ?').run(sessionId); +} + +export function deleteVideo(db: DatabaseSync, videoId: number): void { + const sessions = db.prepare('SELECT session_id FROM imm_sessions WHERE video_id = ?').all(videoId) as Array<{ session_id: number }>; + for (const s of sessions) { + deleteSession(db, s.session_id); + } + db.prepare('DELETE FROM imm_subtitle_lines WHERE video_id = ?').run(videoId); + db.prepare('DELETE FROM imm_daily_rollups WHERE video_id = ?').run(videoId); + db.prepare('DELETE FROM imm_monthly_rollups WHERE video_id = ?').run(videoId); + db.prepare('DELETE FROM imm_media_art WHERE video_id = ?').run(videoId); + db.prepare('DELETE FROM imm_videos WHERE video_id = ?').run(videoId); +} diff --git a/src/core/services/immersion-tracker/storage.ts b/src/core/services/immersion-tracker/storage.ts index dfcbb83..a440f8a 100644 --- a/src/core/services/immersion-tracker/storage.ts +++ b/src/core/services/immersion-tracker/storage.ts @@ -345,6 +345,7 @@ export function ensureSchema(db: DatabaseSync): void { title_english TEXT, title_native TEXT, episodes_total INTEGER, + description TEXT, metadata_json TEXT, CREATED_DATE INTEGER, LAST_UPDATE_DATE INTEGER @@ -479,6 +480,7 @@ export function ensureSchema(db: DatabaseSync): void { first_seen REAL, last_seen REAL, frequency INTEGER, + frequency_rank INTEGER, UNIQUE(headword, word, reading) ); `); @@ -672,6 +674,11 @@ export function ensureSchema(db: DatabaseSync): void { `); } + if (currentVersion?.schema_version && currentVersion.schema_version < 9) { + addColumnIfMissing(db, 'imm_anime', 'description', 'TEXT'); + addColumnIfMissing(db, 'imm_words', 'frequency_rank', 'INTEGER'); + } + db.exec(` CREATE INDEX IF NOT EXISTS idx_anime_normalized_title ON imm_anime(normalized_title_key) @@ -776,9 +783,9 @@ export function createTrackerPreparedStatements(db: DatabaseSync): TrackerPrepar `), wordUpsertStmt: db.prepare(` INSERT INTO imm_words ( - headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency + headword, word, reading, part_of_speech, pos1, pos2, pos3, first_seen, last_seen, frequency, frequency_rank ) VALUES ( - ?, ?, ?, ?, ?, ?, ?, ?, ?, 1 + ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ? ) ON CONFLICT(headword, word, reading) DO UPDATE SET frequency = COALESCE(frequency, 0) + 1, @@ -792,7 +799,12 @@ export function createTrackerPreparedStatements(db: DatabaseSync): TrackerPrepar pos2 = COALESCE(NULLIF(imm_words.pos2, ''), excluded.pos2), pos3 = COALESCE(NULLIF(imm_words.pos3, ''), excluded.pos3), first_seen = MIN(COALESCE(first_seen, excluded.first_seen), excluded.first_seen), - last_seen = MAX(COALESCE(last_seen, excluded.last_seen), excluded.last_seen) + last_seen = MAX(COALESCE(last_seen, excluded.last_seen), excluded.last_seen), + frequency_rank = CASE + WHEN excluded.frequency_rank IS NOT NULL AND (imm_words.frequency_rank IS NULL OR excluded.frequency_rank < imm_words.frequency_rank) + THEN excluded.frequency_rank + ELSE imm_words.frequency_rank + END `), kanjiUpsertStmt: db.prepare(` INSERT INTO imm_kanji ( @@ -863,6 +875,7 @@ function incrementWordAggregate( occurrence.pos3, firstSeen, lastSeen, + occurrence.frequencyRank ?? null, ); } const row = stmts.wordIdSelectStmt.get( @@ -926,6 +939,7 @@ export function executeQueuedWrite(write: QueuedWrite, stmts: TrackerPreparedSta write.pos3, write.firstSeen, write.lastSeen, + write.frequencyRank ?? null, ); return; } diff --git a/src/core/services/immersion-tracker/types.ts b/src/core/services/immersion-tracker/types.ts index 674cad4..8abf93b 100644 --- a/src/core/services/immersion-tracker/types.ts +++ b/src/core/services/immersion-tracker/types.ts @@ -1,4 +1,4 @@ -export const SCHEMA_VERSION = 7; +export const SCHEMA_VERSION = 9; export const DEFAULT_QUEUE_CAP = 1_000; export const DEFAULT_BATCH_SIZE = 25; export const DEFAULT_FLUSH_INTERVAL_MS = 500; @@ -128,6 +128,7 @@ interface QueuedWordWrite { pos3: string; firstSeen: number; lastSeen: number; + frequencyRank: number | null; } interface QueuedKanjiWrite { @@ -146,6 +147,7 @@ export interface CountedWordOccurrence { pos2: string; pos3: string; occurrenceCount: number; + frequencyRank: number | null; } export interface CountedKanjiOccurrence { @@ -240,6 +242,7 @@ export interface VocabularyStatsRow { pos2: string | null; pos3: string | null; frequency: number; + frequencyRank: number | null; firstSeen: number; lastSeen: number; } @@ -395,6 +398,7 @@ export interface AnimeDetailRow { titleRomaji: string | null; titleEnglish: string | null; titleNative: string | null; + description: string | null; totalSessions: number; totalActiveMs: number; totalCards: number; diff --git a/src/core/services/stats-server.ts b/src/core/services/stats-server.ts index 7b3c7f6..4c02bd2 100644 --- a/src/core/services/stats-server.ts +++ b/src/core/services/stats-server.ts @@ -18,6 +18,7 @@ export interface StatsServerConfig { port: number; staticDir: string; // Path to stats/dist/ tracker: ImmersionTrackerService; + knownWordCachePath?: string; } const STATS_STATIC_CONTENT_TYPES: Record = { @@ -79,7 +80,7 @@ function createStatsStaticResponse(staticDir: string, requestPath: string): Resp export function createStatsApp( tracker: ImmersionTrackerService, - options?: { staticDir?: string }, + options?: { staticDir?: string; knownWordCachePath?: string }, ) { const app = new Hono(); @@ -259,6 +260,70 @@ export function createStatsApp( return c.json({ ok: true }); }); + app.delete('/api/stats/sessions/:sessionId', async (c) => { + const sessionId = parseIntQuery(c.req.param('sessionId'), 0); + if (sessionId <= 0) return c.body(null, 400); + await tracker.deleteSession(sessionId); + return c.json({ ok: true }); + }); + + app.delete('/api/stats/media/:videoId', async (c) => { + const videoId = parseIntQuery(c.req.param('videoId'), 0); + if (videoId <= 0) return c.body(null, 400); + await tracker.deleteVideo(videoId); + return c.json({ ok: true }); + }); + + app.get('/api/stats/anilist/search', async (c) => { + const query = (c.req.query('q') ?? '').trim(); + if (!query) return c.json([]); + try { + const res = await fetch('https://graphql.anilist.co', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + query: `query ($search: String!) { + Page(perPage: 10) { + media(search: $search, type: ANIME) { + id + episodes + season + seasonYear + description(asHtml: false) + coverImage { large medium } + title { romaji english native } + } + } + }`, + variables: { search: query }, + }), + }); + const json = await res.json() as { data?: { Page?: { media?: unknown[] } } }; + return c.json(json.data?.Page?.media ?? []); + } catch { + return c.json([]); + } + }); + + app.get('/api/stats/known-words', (c) => { + const cachePath = options?.knownWordCachePath; + if (!cachePath || !existsSync(cachePath)) return c.json([]); + try { + const raw = JSON.parse(readFileSync(cachePath, 'utf-8')) as { version?: number; words?: string[] }; + if (raw.version === 1 && Array.isArray(raw.words)) return c.json(raw.words); + } catch { /* ignore */ } + return c.json([]); + }); + + app.patch('/api/stats/anime/:animeId/anilist', async (c) => { + const animeId = parseIntQuery(c.req.param('animeId'), 0); + if (animeId <= 0) return c.body(null, 400); + const body = await c.req.json().catch(() => null); + if (!body?.anilistId) return c.body(null, 400); + await tracker.reassignAnimeAnilist(animeId, body); + return c.json({ ok: true }); + }); + app.get('/api/stats/anime/:animeId/cover', async (c) => { const animeId = parseIntQuery(c.req.param('animeId'), 0); if (animeId <= 0) return c.body(null, 404); @@ -363,7 +428,7 @@ export function createStatsApp( } export function startStatsServer(config: StatsServerConfig): { close: () => void } { - const app = createStatsApp(config.tracker, { staticDir: config.staticDir }); + const app = createStatsApp(config.tracker, { staticDir: config.staticDir, knownWordCachePath: config.knownWordCachePath }); const server = serve({ fetch: app.fetch, diff --git a/src/core/services/stats-window-runtime.ts b/src/core/services/stats-window-runtime.ts index 50a34f4..cf1f969 100644 --- a/src/core/services/stats-window-runtime.ts +++ b/src/core/services/stats-window-runtime.ts @@ -55,10 +55,13 @@ export function buildStatsWindowOptions(options: { }; } -export function buildStatsWindowLoadFileOptions(): { query: Record } { +export function buildStatsWindowLoadFileOptions(apiBaseUrl?: string): { + query: Record; +} { return { query: { overlay: '1', + ...(apiBaseUrl ? { apiBase: apiBaseUrl } : {}), }, }; } diff --git a/src/core/services/stats-window.test.ts b/src/core/services/stats-window.test.ts index 9c4d6cc..ba96fd5 100644 --- a/src/core/services/stats-window.test.ts +++ b/src/core/services/stats-window.test.ts @@ -140,3 +140,12 @@ test('buildStatsWindowLoadFileOptions enables overlay rendering mode', () => { }, }); }); + +test('buildStatsWindowLoadFileOptions includes provided stats API base URL', () => { + assert.deepEqual(buildStatsWindowLoadFileOptions('http://127.0.0.1:6123'), { + query: { + overlay: '1', + apiBase: 'http://127.0.0.1:6123', + }, + }); +}); diff --git a/src/core/services/stats-window.ts b/src/core/services/stats-window.ts index 467b0a7..869cf04 100644 --- a/src/core/services/stats-window.ts +++ b/src/core/services/stats-window.ts @@ -16,6 +16,8 @@ export interface StatsWindowOptions { staticDir: string; /** Absolute path to the compiled preload-stats.js */ preloadPath: string; + /** Resolve the active stats API base URL */ + getApiBaseUrl?: () => string; /** Resolve the active stats toggle key from config */ getToggleKey: () => string; /** Resolve the tracked overlay/mpv bounds */ @@ -46,7 +48,7 @@ export function toggleStatsOverlay(options: StatsWindowOptions): void { ); const indexPath = path.join(options.staticDir, 'index.html'); - statsWindow.loadFile(indexPath, buildStatsWindowLoadFileOptions()); + statsWindow.loadFile(indexPath, buildStatsWindowLoadFileOptions(options.getApiBaseUrl?.())); statsWindow.on('closed', () => { statsWindow = null; diff --git a/src/core/services/tokenizer.test.ts b/src/core/services/tokenizer.test.ts index 1aa86bf..801d7c2 100644 --- a/src/core/services/tokenizer.test.ts +++ b/src/core/services/tokenizer.test.ts @@ -706,6 +706,240 @@ test('tokenizeSubtitle prefers Yomitan frequency from highest-priority dictionar assert.equal(result.tokens?.[0]?.frequencyRank, 100); }); +test('tokenizeSubtitle ignores occurrence-based Yomitan frequencies for inflected terms', async () => { + const result = await tokenizeSubtitle( + '潜み', + makeDeps({ + getFrequencyDictionaryEnabled: () => true, + getYomitanExt: () => ({ id: 'dummy-ext' }) as any, + getYomitanParserWindow: () => + ({ + isDestroyed: () => false, + webContents: { + executeJavaScript: async (script: string) => { + if (script.includes('getTermFrequencies')) { + return [ + { + term: '潜む', + reading: 'ひそ', + dictionary: 'CC100', + frequency: 118121, + displayValue: null, + displayValueParsed: false, + }, + ]; + } + + if (script.includes('optionsGetFull')) { + return { + profileCurrent: 0, + profileIndex: 0, + scanLength: 40, + dictionaries: ['CC100'], + dictionaryPriorityByName: { CC100: 0 }, + dictionaryFrequencyModeByName: { CC100: 'occurrence-based' }, + profiles: [ + { + options: { + scanning: { length: 40 }, + dictionaries: [{ name: 'CC100', enabled: true, id: 0 }], + }, + }, + ], + }; + } + + return [ + { + surface: '潜み', + reading: 'ひそ', + headword: '潜む', + startPos: 0, + endPos: 2, + }, + ]; + }, + }, + }) as unknown as Electron.BrowserWindow, + }), + ); + + assert.equal(result.tokens?.length, 1); + assert.equal(result.tokens?.[0]?.frequencyRank, undefined); +}); + +test('tokenizeSubtitle falls back to raw term-only Yomitan rank when no scan-derived rank exists', async () => { + const result = await tokenizeSubtitle( + '潜み', + makeDeps({ + getFrequencyDictionaryEnabled: () => true, + getYomitanExt: () => ({ id: 'dummy-ext' }) as any, + getYomitanParserWindow: () => + ({ + isDestroyed: () => false, + webContents: { + executeJavaScript: async (script: string) => { + if (script.includes('getTermFrequencies')) { + return [ + { + term: '潜む', + reading: 'ひそ', + hasReading: false, + dictionary: 'CC100', + frequency: 118121, + displayValue: null, + displayValueParsed: false, + }, + ]; + } + + if (script.includes('optionsGetFull')) { + return { + profileCurrent: 0, + profileIndex: 0, + scanLength: 40, + dictionaries: ['CC100'], + dictionaryPriorityByName: { CC100: 0 }, + dictionaryFrequencyModeByName: { CC100: 'rank-based' }, + profiles: [ + { + options: { + scanning: { length: 40 }, + dictionaries: [{ name: 'CC100', enabled: true, id: 0 }], + }, + }, + ], + }; + } + + return [ + { + surface: '潜み', + reading: 'ひそ', + headword: '潜む', + startPos: 0, + endPos: 2, + }, + ]; + }, + }, + }) as unknown as Electron.BrowserWindow, + }), + ); + + assert.equal(result.tokens?.length, 1); + assert.equal(result.tokens?.[0]?.frequencyRank, 118121); +}); + +test('tokenizeSubtitle keeps parsed display rank for term-only inflected headword fallback', async () => { + const result = await tokenizeSubtitle( + '潜み', + makeDeps({ + getFrequencyDictionaryEnabled: () => true, + getYomitanExt: () => ({ id: 'dummy-ext' }) as any, + getYomitanParserWindow: () => + ({ + isDestroyed: () => false, + webContents: { + executeJavaScript: async (script: string) => { + if (script.includes('getTermFrequencies')) { + return [ + { + term: '潜む', + reading: 'ひそ', + hasReading: false, + dictionary: 'CC100', + frequency: 118121, + displayValue: '118,121', + displayValueParsed: false, + }, + ]; + } + + if (script.includes('optionsGetFull')) { + return { + profileCurrent: 0, + profileIndex: 0, + scanLength: 40, + dictionaries: ['CC100'], + dictionaryPriorityByName: { CC100: 0 }, + dictionaryFrequencyModeByName: { CC100: 'rank-based' }, + profiles: [ + { + options: { + scanning: { length: 40 }, + dictionaries: [{ name: 'CC100', enabled: true, id: 0 }], + }, + }, + ], + }; + } + + return [ + { + surface: '潜み', + reading: 'ひそ', + headword: '潜む', + startPos: 0, + endPos: 2, + }, + ]; + }, + }, + }) as unknown as Electron.BrowserWindow, + }), + ); + + assert.equal(result.tokens?.length, 1); + assert.equal(result.tokens?.[0]?.frequencyRank, 118); +}); + +test('tokenizeSubtitle preserves scan-derived rank over lower-priority Yomitan fallback', async () => { + const result = await tokenizeSubtitle( + '潜み', + makeDeps({ + getFrequencyDictionaryEnabled: () => true, + getYomitanExt: () => ({ id: 'dummy-ext' }) as any, + getYomitanParserWindow: () => + ({ + isDestroyed: () => false, + webContents: { + executeJavaScript: async (script: string) => { + if (script.includes('getTermFrequencies')) { + return [ + { + term: '潜む', + reading: 'ひそ', + hasReading: false, + dictionary: 'CC100', + dictionaryPriority: 2, + frequency: 118121, + displayValue: null, + displayValueParsed: false, + }, + ]; + } + + return [ + { + surface: '潜み', + reading: 'ひそむ', + headword: '潜む', + startPos: 0, + endPos: 2, + frequencyRank: 4073, + }, + ]; + }, + }, + }) as unknown as Electron.BrowserWindow, + }), + ); + + assert.equal(result.tokens?.length, 1); + assert.equal(result.tokens?.[0]?.frequencyRank, 4073); +}); + test('tokenizeSubtitle uses only selected Yomitan headword for frequency lookup', async () => { const result = await tokenizeSubtitle( '猫です', @@ -836,6 +1070,69 @@ test('tokenizeSubtitle prefers exact headword frequency over surface/reading whe assert.equal(result.tokens?.[0]?.frequencyRank, 8); }); +test('tokenizeSubtitle falls back to exact surface frequency when merged headword lookup misses', async () => { + const frequencyScripts: string[] = []; + const result = await tokenizeSubtitle( + '陰に', + makeDeps({ + getFrequencyDictionaryEnabled: () => true, + getYomitanExt: () => ({ id: 'dummy-ext' }) as any, + getYomitanParserWindow: () => + ({ + isDestroyed: () => false, + webContents: { + executeJavaScript: async (script: string) => { + if (script.includes('getTermFrequencies')) { + frequencyScripts.push(script); + return script.includes('"term":"陰に","reading":"いんに"') + ? [ + { + term: '陰に', + reading: 'いんに', + dictionary: 'freq-dict', + frequency: 5702, + displayValue: '5702', + displayValueParsed: true, + }, + ] + : []; + } + + return [ + { + source: 'scanning-parser', + index: 0, + content: [ + [ + { + text: '陰に', + reading: 'いんに', + headwords: [[{ term: '陰' }]], + }, + ], + ], + }, + ]; + }, + }, + }) as unknown as Electron.BrowserWindow, + }), + ); + + assert.equal(result.tokens?.length, 1); + assert.equal(result.tokens?.[0]?.surface, '陰に'); + assert.equal(result.tokens?.[0]?.headword, '陰'); + assert.equal(result.tokens?.[0]?.frequencyRank, 5702); + assert.equal( + frequencyScripts.some((script) => script.includes('"term":"陰","reading":"いんに"')), + true, + ); + assert.equal( + frequencyScripts.some((script) => script.includes('"term":"陰に","reading":"いんに"')), + true, + ); +}); + test('tokenizeSubtitle keeps no frequency when only reading matches and headword misses', async () => { const result = await tokenizeSubtitle( '猫です', @@ -2287,6 +2584,131 @@ test('tokenizeSubtitle keeps correct MeCab pos1 enrichment when Yomitan offsets assert.equal(targets[0]?.surface, '仮面'); }); +test('tokenizeSubtitle preserves merged token frequency when MeCab positions cross a newline gap', async () => { + const parserWindow = { + isDestroyed: () => false, + webContents: { + executeJavaScript: async (script: string) => { + if (script.includes('getTermFrequencies')) { + return script.includes('"term":"陰に","reading":"いんに"') + ? [ + { + term: '陰に', + reading: 'いんに', + dictionary: 'JPDBv2㋕', + frequency: 5702, + displayValue: '5702', + displayValueParsed: false, + }, + ] + : []; + } + + return [ + { + surface: 'X', + reading: 'えっくす', + headword: 'X', + startPos: 0, + endPos: 1, + }, + { + surface: '陰に', + reading: 'いんに', + headword: '陰に', + startPos: 2, + endPos: 4, + }, + { + surface: '潜み', + reading: 'ひそ', + headword: '潜む', + startPos: 4, + endPos: 6, + }, + ]; + }, + }, + } as unknown as Electron.BrowserWindow; + + const deps = createTokenizerDepsRuntime({ + getYomitanExt: () => ({ id: 'dummy-ext' }) as any, + getYomitanParserWindow: () => parserWindow, + setYomitanParserWindow: () => {}, + getYomitanParserReadyPromise: () => null, + setYomitanParserReadyPromise: () => {}, + getYomitanParserInitPromise: () => null, + setYomitanParserInitPromise: () => {}, + isKnownWord: () => false, + getKnownWordMatchMode: () => 'headword', + getJlptLevel: () => null, + getFrequencyDictionaryEnabled: () => true, + getMecabTokenizer: () => ({ + tokenize: async () => [ + { + word: 'X', + partOfSpeech: PartOfSpeech.noun, + pos1: '名詞', + pos2: '一般', + pos3: '', + pos4: '', + inflectionType: '', + inflectionForm: '', + headword: 'X', + katakanaReading: 'エックス', + pronunciation: 'エックス', + }, + { + word: '陰', + partOfSpeech: PartOfSpeech.noun, + pos1: '名詞', + pos2: '一般', + pos3: '', + pos4: '', + inflectionType: '', + inflectionForm: '', + headword: '陰', + katakanaReading: 'カゲ', + pronunciation: 'カゲ', + }, + { + word: 'に', + partOfSpeech: PartOfSpeech.particle, + pos1: '助詞', + pos2: '格助詞', + pos3: '一般', + pos4: '', + inflectionType: '', + inflectionForm: '', + headword: 'に', + katakanaReading: 'ニ', + pronunciation: 'ニ', + }, + { + word: '潜み', + partOfSpeech: PartOfSpeech.verb, + pos1: '動詞', + pos2: '自立', + pos3: '', + pos4: '', + inflectionType: '五段・マ行', + inflectionForm: '連用形', + headword: '潜む', + katakanaReading: 'ヒソミ', + pronunciation: 'ヒソミ', + }, + ], + }), + }); + + const result = await tokenizeSubtitle('X\n陰に潜み', deps); + + assert.equal(result.tokens?.[1]?.surface, '陰に'); + assert.equal(result.tokens?.[1]?.pos1, '名詞|助詞'); + assert.equal(result.tokens?.[1]?.pos2, '一般|格助詞'); + assert.equal(result.tokens?.[1]?.frequencyRank, 5702); +}); + test('tokenizeSubtitle does not color 1-2 word sentences by default', async () => { const result = await tokenizeSubtitle( '猫です', diff --git a/src/core/services/tokenizer.ts b/src/core/services/tokenizer.ts index dbeaf32..7b48d7a 100644 --- a/src/core/services/tokenizer.ts +++ b/src/core/services/tokenizer.ts @@ -23,6 +23,7 @@ import { requestYomitanScanTokens, requestYomitanTermFrequencies, } from './tokenizer/yomitan-parser-runtime'; +import type { YomitanTermFrequency } from './tokenizer/yomitan-parser-runtime'; const logger = createLogger('main:tokenizer'); @@ -225,7 +226,13 @@ export function createTokenizerDepsRuntime( return null; } - return mergeTokens(rawTokens, options.isKnownWord, options.getKnownWordMatchMode(), false); + return mergeTokens( + rawTokens, + options.isKnownWord, + options.getKnownWordMatchMode(), + false, + text, + ); }, enrichTokensWithMecab: async (tokens, mecabTokens) => enrichTokensWithMecabAsync(tokens, mecabTokens), @@ -336,56 +343,154 @@ function resolveFrequencyLookupText( return token.surface; } +function resolveYomitanFrequencyLookupTexts( + token: MergedToken, + matchMode: FrequencyDictionaryMatchMode, +): string[] { + const primaryLookupText = resolveFrequencyLookupText(token, matchMode).trim(); + if (!primaryLookupText) { + return []; + } + + if (matchMode !== 'headword') { + return [primaryLookupText]; + } + + const normalizedHeadword = token.headword.trim(); + const normalizedSurface = token.surface.trim(); + if ( + !normalizedHeadword || + !normalizedSurface || + normalizedSurface === normalizedHeadword || + normalizedSurface === primaryLookupText + ) { + return [primaryLookupText]; + } + + return [primaryLookupText, normalizedSurface]; +} + function buildYomitanFrequencyTermReadingList( tokens: MergedToken[], matchMode: FrequencyDictionaryMatchMode, ): Array<{ term: string; reading: string | null }> { const termReadingList: Array<{ term: string; reading: string | null }> = []; for (const token of tokens) { - const term = resolveFrequencyLookupText(token, matchMode).trim(); - if (!term) { - continue; - } - const readingRaw = token.reading && token.reading.trim().length > 0 ? token.reading.trim() : null; - termReadingList.push({ term, reading: readingRaw }); + for (const term of resolveYomitanFrequencyLookupTexts(token, matchMode)) { + termReadingList.push({ term, reading: readingRaw }); + } } return termReadingList; } -function buildYomitanFrequencyRankMap( - frequencies: ReadonlyArray<{ term: string; frequency: number; dictionaryPriority?: number }>, -): Map { - const rankByTerm = new Map(); +function makeYomitanFrequencyPairKey(term: string, reading: string | null): string { + return `${term}\u0000${reading ?? ''}`; +} + +interface NormalizedYomitanTermFrequency extends YomitanTermFrequency { + reading: string | null; + frequency: number; +} + +interface YomitanFrequencyIndex { + byPair: Map; + byTerm: Map; +} + +function appendYomitanFrequencyEntry( + map: Map, + key: string, + entry: NormalizedYomitanTermFrequency, +): void { + const existing = map.get(key); + if (existing) { + existing.push(entry); + return; + } + + map.set(key, [entry]); +} + +function buildYomitanFrequencyIndex( + frequencies: ReadonlyArray, +): YomitanFrequencyIndex { + const byPair = new Map(); + const byTerm = new Map(); for (const frequency of frequencies) { - const normalizedTerm = frequency.term.trim(); + const term = frequency.term.trim(); const rank = normalizePositiveFrequencyRank(frequency.frequency); - if (!normalizedTerm || rank === null) { + if (!term || rank === null) { continue; } - const dictionaryPriority = - typeof frequency.dictionaryPriority === 'number' && - Number.isFinite(frequency.dictionaryPriority) - ? Math.max(0, Math.floor(frequency.dictionaryPriority)) - : Number.MAX_SAFE_INTEGER; - const current = rankByTerm.get(normalizedTerm); + + const reading = + typeof frequency.reading === 'string' && frequency.reading.trim().length > 0 + ? frequency.reading.trim() + : null; + const normalizedEntry: NormalizedYomitanTermFrequency = { + ...frequency, + term, + reading, + frequency: rank, + }; + appendYomitanFrequencyEntry(byPair, makeYomitanFrequencyPairKey(term, reading), normalizedEntry); + appendYomitanFrequencyEntry(byTerm, term, normalizedEntry); + } + + return { byPair, byTerm }; +} + +function selectBestYomitanFrequencyRank( + entries: ReadonlyArray, +): number | null { + let bestEntry: NormalizedYomitanTermFrequency | null = null; + for (const entry of entries) { if ( - current === undefined || - dictionaryPriority < current.dictionaryPriority || - (dictionaryPriority === current.dictionaryPriority && rank < current.rank) + bestEntry === null || + entry.dictionaryPriority < bestEntry.dictionaryPriority || + (entry.dictionaryPriority === bestEntry.dictionaryPriority && + entry.frequency < bestEntry.frequency) ) { - rankByTerm.set(normalizedTerm, { rank, dictionaryPriority }); + bestEntry = entry; } } - const collapsedRankByTerm = new Map(); - for (const [term, entry] of rankByTerm.entries()) { - collapsedRankByTerm.set(term, entry.rank); + return bestEntry?.frequency ?? null; +} + +function getYomitanFrequencyRank( + token: MergedToken, + candidateText: string, + matchMode: FrequencyDictionaryMatchMode, + frequencyIndex: YomitanFrequencyIndex, +): number | null { + const normalizedCandidateText = candidateText.trim(); + if (!normalizedCandidateText) { + return null; } - return collapsedRankByTerm; + const reading = + typeof token.reading === 'string' && token.reading.trim().length > 0 ? token.reading.trim() : null; + const pairEntries = + frequencyIndex.byPair.get(makeYomitanFrequencyPairKey(normalizedCandidateText, reading)) ?? []; + const candidateEntries = + pairEntries.length > 0 ? pairEntries : (frequencyIndex.byTerm.get(normalizedCandidateText) ?? []); + if (candidateEntries.length === 0) { + return null; + } + + const normalizedHeadword = token.headword.trim(); + const normalizedSurface = token.surface.trim(); + const isInflectedHeadwordFallback = + matchMode === 'headword' && + normalizedCandidateText === normalizedHeadword && + normalizedSurface.length > 0 && + normalizedSurface !== normalizedHeadword; + + return selectBestYomitanFrequencyRank(candidateEntries); } function getLocalFrequencyRank( @@ -416,7 +521,7 @@ function getLocalFrequencyRank( function applyFrequencyRanks( tokens: MergedToken[], matchMode: FrequencyDictionaryMatchMode, - yomitanRankByTerm: Map, + yomitanFrequencyIndex: YomitanFrequencyIndex, getFrequencyRank: FrequencyDictionaryLookup | undefined, ): MergedToken[] { if (tokens.length === 0) { @@ -441,12 +546,19 @@ function applyFrequencyRanks( }; } - const yomitanRank = yomitanRankByTerm.get(lookupText); - if (yomitanRank !== undefined) { - return { - ...token, - frequencyRank: yomitanRank, - }; + for (const candidateText of resolveYomitanFrequencyLookupTexts(token, matchMode)) { + const yomitanRank = getYomitanFrequencyRank( + token, + candidateText, + matchMode, + yomitanFrequencyIndex, + ); + if (yomitanRank !== null) { + return { + ...token, + frequencyRank: yomitanRank, + }; + } } if (!getFrequencyRank) { @@ -501,6 +613,7 @@ async function parseWithYomitanInternalParser( isKnown: false, isNPlusOneTarget: false, isNameMatch: token.isNameMatch ?? false, + frequencyRank: token.frequencyRank, }), ), ); @@ -510,7 +623,7 @@ async function parseWithYomitanInternalParser( } deps.onTokenizationReady?.(text); - const frequencyRankPromise: Promise> = options.frequencyEnabled + const frequencyRankPromise: Promise = options.frequencyEnabled ? (async () => { const frequencyMatchMode = options.frequencyMatchMode; const termReadingList = buildYomitanFrequencyTermReadingList( @@ -522,9 +635,9 @@ async function parseWithYomitanInternalParser( deps, logger, ); - return buildYomitanFrequencyRankMap(yomitanFrequencies); + return buildYomitanFrequencyIndex(yomitanFrequencies); })() - : Promise.resolve(new Map()); + : Promise.resolve({ byPair: new Map(), byTerm: new Map() }); const mecabEnrichmentPromise: Promise = needsMecabPosEnrichment(options) ? (async () => { @@ -545,7 +658,7 @@ async function parseWithYomitanInternalParser( })() : Promise.resolve(normalizedSelectedTokens); - const [yomitanRankByTerm, enrichedTokens] = await Promise.all([ + const [yomitanFrequencyIndex, enrichedTokens] = await Promise.all([ frequencyRankPromise, mecabEnrichmentPromise, ]); @@ -554,7 +667,7 @@ async function parseWithYomitanInternalParser( return applyFrequencyRanks( enrichedTokens, options.frequencyMatchMode, - yomitanRankByTerm, + yomitanFrequencyIndex, deps.getFrequencyRank, ); } diff --git a/src/core/services/tokenizer/annotation-stage.test.ts b/src/core/services/tokenizer/annotation-stage.test.ts index c6f4cfd..0b5072b 100644 --- a/src/core/services/tokenizer/annotation-stage.test.ts +++ b/src/core/services/tokenizer/annotation-stage.test.ts @@ -293,6 +293,29 @@ test('annotateTokens excludes default non-independent pos2 from frequency and N+ assert.equal(result[0]?.isNPlusOneTarget, false); }); +test('annotateTokens keeps frequency for kanji noun tokens even when mecab marks them non-independent', () => { + const tokens = [ + makeToken({ + surface: '者', + reading: 'もの', + headword: '者', + partOfSpeech: PartOfSpeech.other, + pos1: '名詞', + pos2: '非自立', + pos3: '一般', + startPos: 0, + endPos: 1, + frequencyRank: 475, + }), + ]; + + const result = annotateTokens(tokens, makeDeps(), { + minSentenceWordsForNPlusOne: 1, + }); + + assert.equal(result[0]?.frequencyRank, 475); +}); + test('annotateTokens excludes likely kana SFX tokens from frequency when POS tags are missing', () => { const tokens = [ makeToken({ diff --git a/src/core/services/tokenizer/annotation-stage.ts b/src/core/services/tokenizer/annotation-stage.ts index 3e61386..fcd1449 100644 --- a/src/core/services/tokenizer/annotation-stage.ts +++ b/src/core/services/tokenizer/annotation-stage.ts @@ -89,6 +89,23 @@ function normalizePos2Tag(pos2: string | undefined): string { return typeof pos2 === 'string' ? pos2.trim() : ''; } +function hasKanjiChar(text: string): boolean { + for (const char of text) { + const code = char.codePointAt(0); + if (code === undefined) { + continue; + } + if ( + (code >= 0x3400 && code <= 0x4dbf) || + (code >= 0x4e00 && code <= 0x9fff) || + (code >= 0xf900 && code <= 0xfaff) + ) { + return true; + } + } + return false; +} + function isExcludedComponent( pos1: string | undefined, pos2: string | undefined, @@ -169,6 +186,34 @@ function isFrequencyExcludedByPos( ); } +function shouldKeepFrequencyForNonIndependentKanjiNoun( + token: MergedToken, + pos1Exclusions: ReadonlySet, +): boolean { + if (pos1Exclusions.has('名詞')) { + return false; + } + + const rank = + typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank) + ? Math.max(1, Math.floor(token.frequencyRank)) + : null; + if (rank === null) { + return false; + } + + const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1)); + const pos2Parts = splitNormalizedTagParts(normalizePos2Tag(token.pos2)); + if (pos1Parts.length !== 1 || pos2Parts.length !== 1) { + return false; + } + if (pos1Parts[0] !== '名詞' || pos2Parts[0] !== '非自立') { + return false; + } + + return hasKanjiChar(token.surface) || hasKanjiChar(token.headword); +} + export function shouldExcludeTokenFromVocabularyPersistence( token: MergedToken, options: Pick = {}, @@ -454,7 +499,10 @@ function filterTokenFrequencyRank( pos1Exclusions: ReadonlySet, pos2Exclusions: ReadonlySet, ): number | undefined { - if (isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions)) { + if ( + isFrequencyExcludedByPos(token, pos1Exclusions, pos2Exclusions) && + !shouldKeepFrequencyForNonIndependentKanjiNoun(token, pos1Exclusions) + ) { return undefined; } diff --git a/src/core/services/tokenizer/yomitan-parser-runtime.test.ts b/src/core/services/tokenizer/yomitan-parser-runtime.test.ts index 8f0ee1a..5ccb443 100644 --- a/src/core/services/tokenizer/yomitan-parser-runtime.test.ts +++ b/src/core/services/tokenizer/yomitan-parser-runtime.test.ts @@ -188,6 +188,7 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async { term: '猫', reading: 'ねこ', + hasReading: true, dictionary: 'freq-dict', dictionaryPriority: 0, frequency: 77, @@ -197,6 +198,7 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async { term: '鍛える', reading: 'きたえる', + hasReading: false, dictionary: 'freq-dict', dictionaryPriority: 1, frequency: 46961, @@ -217,9 +219,11 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async assert.equal(result.length, 2); assert.equal(result[0]?.term, '猫'); + assert.equal(result[0]?.hasReading, true); assert.equal(result[0]?.frequency, 77); assert.equal(result[0]?.dictionaryPriority, 0); assert.equal(result[1]?.term, '鍛える'); + assert.equal(result[1]?.hasReading, false); assert.equal(result[1]?.frequency, 2847); assert.match(scriptValue, /getTermFrequencies/); assert.match(scriptValue, /optionsGetFull/); @@ -247,6 +251,96 @@ test('requestYomitanTermFrequencies prefers primary rank from displayValue array assert.equal(result[0]?.frequency, 7141); }); +test('requestYomitanTermFrequencies prefers primary rank from displayValue string pair when raw frequency matches trailing count', async () => { + const deps = createDeps(async () => [ + { + term: '潜む', + reading: 'ひそむ', + dictionary: 'freq-dict', + dictionaryPriority: 0, + frequency: 121, + displayValue: '118,121', + displayValueParsed: false, + }, + ]); + + const result = await requestYomitanTermFrequencies([{ term: '潜む', reading: 'ひそむ' }], deps, { + error: () => undefined, + }); + + assert.equal(result.length, 1); + assert.equal(result[0]?.term, '潜む'); + assert.equal(result[0]?.frequency, 118); +}); + +test('requestYomitanTermFrequencies uses leading display digits for displayValue strings', async () => { + const deps = createDeps(async () => [ + { + term: '例', + reading: 'れい', + dictionary: 'freq-dict', + dictionaryPriority: 0, + frequency: 1234, + displayValue: '1,234', + displayValueParsed: false, + }, + ]); + + const result = await requestYomitanTermFrequencies([{ term: '例', reading: 'れい' }], deps, { + error: () => undefined, + }); + + assert.equal(result.length, 1); + assert.equal(result[0]?.term, '例'); + assert.equal(result[0]?.frequency, 1); +}); + +test('requestYomitanTermFrequencies ignores occurrence-based dictionaries for rank tagging', async () => { + let metadataScript = ''; + const deps = createDeps(async (script) => { + if (script.includes('getTermFrequencies')) { + return [ + { + term: '潜む', + reading: 'ひそむ', + dictionary: 'CC100', + frequency: 118121, + displayValue: null, + displayValueParsed: false, + }, + ]; + } + + if (script.includes('optionsGetFull')) { + metadataScript = script; + return { + profileCurrent: 0, + profileIndex: 0, + scanLength: 40, + dictionaries: ['CC100'], + dictionaryPriorityByName: { CC100: 0 }, + dictionaryFrequencyModeByName: { CC100: 'occurrence-based' }, + profiles: [ + { + options: { + scanning: { length: 40 }, + dictionaries: [{ name: 'CC100', enabled: true, id: 0 }], + }, + }, + ], + }; + } + return []; + }); + + const result = await requestYomitanTermFrequencies([{ term: '潜む', reading: 'ひそむ' }], deps, { + error: () => undefined, + }); + + assert.deepEqual(result, []); + assert.match(metadataScript, /getDictionaryInfo/); +}); + test('requestYomitanTermFrequencies requests term-only fallback only after reading miss', async () => { const frequencyScripts: string[] = []; const deps = createDeps(async (script) => { @@ -485,6 +579,317 @@ test('requestYomitanScanTokens uses left-to-right termsFind scanning instead of assert.match(scannerScript ?? '', /deinflect:\s*true/); }); +test('requestYomitanScanTokens extracts best frequency rank from selected termsFind entry', async () => { + let scannerScript = ''; + const deps = createDeps(async (script) => { + if (script.includes('termsFind')) { + scannerScript = script; + return []; + } + if (script.includes('optionsGetFull')) { + return { + profileCurrent: 0, + profileIndex: 0, + scanLength: 40, + dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'], + dictionaryPriorityByName: { + 'JPDBv2㋕': 0, + Jiten: 1, + CC100: 2, + }, + dictionaryFrequencyModeByName: { + 'JPDBv2㋕': 'rank-based', + Jiten: 'rank-based', + CC100: 'rank-based', + }, + profiles: [ + { + options: { + scanning: { length: 40 }, + dictionaries: [ + { name: 'JPDBv2㋕', enabled: true, id: 0 }, + { name: 'Jiten', enabled: true, id: 1 }, + { name: 'CC100', enabled: true, id: 2 }, + ], + }, + }, + ], + }; + } + return null; + }); + + await requestYomitanScanTokens('潜み', deps, { + error: () => undefined, + }); + + const result = await runInjectedYomitanScript(scannerScript, (action, params) => { + if (action !== 'termsFind') { + throw new Error(`unexpected action: ${action}`); + } + + const text = (params as { text?: string } | undefined)?.text ?? ''; + if (!text.startsWith('潜み')) { + return { originalTextLength: 0, dictionaryEntries: [] }; + } + + return { + originalTextLength: 2, + dictionaryEntries: [ + { + headwords: [ + { + term: '潜む', + reading: 'ひそむ', + sources: [{ originalText: '潜み', isPrimary: true, matchType: 'exact' }], + }, + ], + frequencies: [ + { + headwordIndex: 0, + dictionary: 'JPDBv2㋕', + frequency: 20181, + displayValue: '4073,20181句', + }, + { + headwordIndex: 0, + dictionary: 'Jiten', + frequency: 28594, + displayValue: '4592,28594句', + }, + { + headwordIndex: 0, + dictionary: 'CC100', + frequency: 118121, + displayValue: null, + }, + ], + }, + ], + }; + }); + + assert.deepEqual(result, [ + { + surface: '潜み', + reading: 'ひそ', + headword: '潜む', + startPos: 0, + endPos: 2, + isNameMatch: false, + frequencyRank: 4073, + }, + ]); +}); + +test('requestYomitanScanTokens uses frequency from later exact-match entry when first exact entry has none', async () => { + let scannerScript = ''; + const deps = createDeps(async (script) => { + if (script.includes('termsFind')) { + scannerScript = script; + return []; + } + if (script.includes('optionsGetFull')) { + return { + profileCurrent: 0, + profileIndex: 0, + scanLength: 40, + dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'], + dictionaryPriorityByName: { + 'JPDBv2㋕': 0, + Jiten: 1, + CC100: 2, + }, + dictionaryFrequencyModeByName: { + 'JPDBv2㋕': 'rank-based', + Jiten: 'rank-based', + CC100: 'rank-based', + }, + profiles: [ + { + options: { + scanning: { length: 40 }, + dictionaries: [ + { name: 'JPDBv2㋕', enabled: true, id: 0 }, + { name: 'Jiten', enabled: true, id: 1 }, + { name: 'CC100', enabled: true, id: 2 }, + ], + }, + }, + ], + }; + } + return null; + }); + + await requestYomitanScanTokens('者', deps, { + error: () => undefined, + }); + + const result = await runInjectedYomitanScript(scannerScript, (action, params) => { + if (action !== 'termsFind') { + throw new Error(`unexpected action: ${action}`); + } + + const text = (params as { text?: string } | undefined)?.text ?? ''; + if (!text.startsWith('者')) { + return { originalTextLength: 0, dictionaryEntries: [] }; + } + + return { + originalTextLength: 1, + dictionaryEntries: [ + { + headwords: [ + { + term: '者', + reading: 'もの', + sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }], + }, + ], + frequencies: [], + }, + { + headwords: [ + { + term: '者', + reading: 'もの', + sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }], + }, + ], + frequencies: [ + { + headwordIndex: 0, + dictionary: 'JPDBv2㋕', + frequency: 79601, + displayValue: '475,79601句', + }, + { + headwordIndex: 0, + dictionary: 'Jiten', + frequency: 338, + displayValue: '338', + }, + ], + }, + ], + }; + }); + + assert.deepEqual(result, [ + { + surface: '者', + reading: 'もの', + headword: '者', + startPos: 0, + endPos: 1, + isNameMatch: false, + frequencyRank: 475, + }, + ]); +}); + +test('requestYomitanScanTokens can use frequency from later exact secondary-match entry', async () => { + let scannerScript = ''; + const deps = createDeps(async (script) => { + if (script.includes('termsFind')) { + scannerScript = script; + return []; + } + if (script.includes('optionsGetFull')) { + return { + profileCurrent: 0, + profileIndex: 0, + scanLength: 40, + dictionaries: ['JPDBv2㋕', 'Jiten', 'CC100'], + dictionaryPriorityByName: { + 'JPDBv2㋕': 0, + Jiten: 1, + CC100: 2, + }, + dictionaryFrequencyModeByName: { + 'JPDBv2㋕': 'rank-based', + Jiten: 'rank-based', + CC100: 'rank-based', + }, + profiles: [ + { + options: { + scanning: { length: 40 }, + dictionaries: [ + { name: 'JPDBv2㋕', enabled: true, id: 0 }, + { name: 'Jiten', enabled: true, id: 1 }, + { name: 'CC100', enabled: true, id: 2 }, + ], + }, + }, + ], + }; + } + return null; + }); + + await requestYomitanScanTokens('者', deps, { + error: () => undefined, + }); + + const result = await runInjectedYomitanScript(scannerScript, (action, params) => { + if (action !== 'termsFind') { + throw new Error(`unexpected action: ${action}`); + } + + const text = (params as { text?: string } | undefined)?.text ?? ''; + if (!text.startsWith('者')) { + return { originalTextLength: 0, dictionaryEntries: [] }; + } + + return { + originalTextLength: 1, + dictionaryEntries: [ + { + headwords: [ + { + term: '者', + reading: 'もの', + sources: [{ originalText: '者', isPrimary: true, matchType: 'exact' }], + }, + ], + frequencies: [], + }, + { + headwords: [ + { + term: '者', + reading: 'もの', + sources: [{ originalText: '者', isPrimary: false, matchType: 'exact' }], + }, + ], + frequencies: [ + { + headwordIndex: 0, + dictionary: 'JPDBv2㋕', + frequency: 79601, + displayValue: '475,79601句', + }, + ], + }, + ], + }; + }); + + assert.deepEqual(result, [ + { + surface: '者', + reading: 'もの', + headword: '者', + startPos: 0, + endPos: 1, + isNameMatch: false, + frequencyRank: 475, + }, + ]); +}); + test('requestYomitanScanTokens marks tokens backed by SubMiner character dictionary entries', async () => { const deps = createDeps(async (script) => { if (script.includes('optionsGetFull')) { diff --git a/src/core/services/tokenizer/yomitan-parser-runtime.ts b/src/core/services/tokenizer/yomitan-parser-runtime.ts index fddda4e..4613d80 100644 --- a/src/core/services/tokenizer/yomitan-parser-runtime.ts +++ b/src/core/services/tokenizer/yomitan-parser-runtime.ts @@ -20,19 +20,24 @@ interface YomitanParserRuntimeDeps { createYomitanExtensionWindow?: (pageName: string) => Promise; } +type YomitanFrequencyMode = 'occurrence-based' | 'rank-based'; + export interface YomitanDictionaryInfo { title: string; revision?: string | number; + frequencyMode?: YomitanFrequencyMode; } export interface YomitanTermFrequency { term: string; reading: string | null; + hasReading: boolean; dictionary: string; dictionaryPriority: number; frequency: number; displayValue: string | null; displayValueParsed: boolean; + frequencyDerivedFromDisplayValue: boolean; } export interface YomitanTermReadingPair { @@ -47,6 +52,7 @@ export interface YomitanScanToken { startPos: number; endPos: number; isNameMatch?: boolean; + frequencyRank?: number; } interface YomitanProfileMetadata { @@ -54,6 +60,7 @@ interface YomitanProfileMetadata { scanLength: number; dictionaries: string[]; dictionaryPriorityByName: Record; + dictionaryFrequencyModeByName: Partial>; } const DEFAULT_YOMITAN_SCAN_LENGTH = 40; @@ -78,7 +85,8 @@ function isScanTokenArray(value: unknown): value is YomitanScanToken[] { typeof entry.headword === 'string' && typeof entry.startPos === 'number' && typeof entry.endPos === 'number' && - (entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean'), + (entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean') && + (entry.frequencyRank === undefined || typeof entry.frequencyRank === 'number'), ) ); } @@ -117,24 +125,22 @@ function parsePositiveFrequencyString(value: string): number | null { return null; } - const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0]; - if (!numericPrefix) { + const numericMatch = trimmed.match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0]; + if (!numericMatch) { return null; } - const chunks = numericPrefix.split(','); - const normalizedNumber = - chunks.length <= 1 - ? (chunks[0] ?? '') - : chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk)) - ? chunks.join('') - : (chunks[0] ?? ''); - const parsed = Number.parseInt(normalizedNumber, 10); + const parsed = Number.parseFloat(numericMatch); if (!Number.isFinite(parsed) || parsed <= 0) { return null; } - return parsed; + const normalized = Math.floor(parsed); + if (!Number.isFinite(normalized) || normalized <= 0) { + return null; + } + + return normalized; } function parsePositiveFrequencyValue(value: unknown): number | null { @@ -159,6 +165,19 @@ function parsePositiveFrequencyValue(value: unknown): number | null { return null; } +function parseDisplayFrequencyValue(value: unknown): number | null { + if (typeof value === 'string') { + const leadingDigits = value.trim().match(/^\d+/)?.[0]; + if (!leadingDigits) { + return null; + } + const parsed = Number.parseInt(leadingDigits, 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : null; + } + + return parsePositiveFrequencyValue(value); +} + function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null { if (!isObject(value)) { return null; @@ -169,9 +188,7 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null { const rawFrequency = parsePositiveFrequencyValue(value.frequency); const displayValueRaw = value.displayValue; const parsedDisplayFrequency = - displayValueRaw !== null && displayValueRaw !== undefined - ? parsePositiveFrequencyValue(displayValueRaw) - : null; + displayValueRaw !== null && displayValueRaw !== undefined ? parseDisplayFrequencyValue(displayValueRaw) : null; const frequency = parsedDisplayFrequency ?? rawFrequency; if (!term || !dictionary || frequency === null) { return null; @@ -184,17 +201,20 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null { const reading = value.reading === null ? null : typeof value.reading === 'string' ? value.reading : null; + const hasReading = value.hasReading === false ? false : reading !== null; const displayValue = typeof displayValueRaw === 'string' ? displayValueRaw : null; const displayValueParsed = value.displayValueParsed === true; return { term, reading, + hasReading, dictionary, dictionaryPriority, frequency, displayValue, displayValueParsed, + frequencyDerivedFromDisplayValue: parsedDisplayFrequency !== null, }; } @@ -300,17 +320,34 @@ function toYomitanProfileMetadata(value: unknown): YomitanProfileMetadata | null } } + const dictionaryFrequencyModeByNameRaw = value.dictionaryFrequencyModeByName; + const dictionaryFrequencyModeByName: Partial> = {}; + if (isObject(dictionaryFrequencyModeByNameRaw)) { + for (const [name, frequencyModeRaw] of Object.entries(dictionaryFrequencyModeByNameRaw)) { + const normalizedName = name.trim(); + if (!normalizedName) { + continue; + } + if (frequencyModeRaw !== 'occurrence-based' && frequencyModeRaw !== 'rank-based') { + continue; + } + dictionaryFrequencyModeByName[normalizedName] = frequencyModeRaw; + } + } + return { profileIndex, scanLength, dictionaries, dictionaryPriorityByName, + dictionaryFrequencyModeByName, }; } function normalizeFrequencyEntriesWithPriority( rawResult: unknown[], dictionaryPriorityByName: Record, + dictionaryFrequencyModeByName: Partial>, ): YomitanTermFrequency[] { const normalized: YomitanTermFrequency[] = []; for (const entry of rawResult) { @@ -319,6 +356,10 @@ function normalizeFrequencyEntriesWithPriority( continue; } + if (dictionaryFrequencyModeByName[frequency.dictionary] === 'occurrence-based') { + continue; + } + const dictionaryPriority = dictionaryPriorityByName[frequency.dictionary]; normalized.push({ ...frequency, @@ -425,8 +466,34 @@ async function requestYomitanProfileMetadata( acc[entry.name] = index; return acc; }, {}); + let dictionaryFrequencyModeByName = {}; + try { + const dictionaryInfo = await invoke("getDictionaryInfo", undefined); + dictionaryFrequencyModeByName = Array.isArray(dictionaryInfo) + ? dictionaryInfo.reduce((acc, entry) => { + if (!entry || typeof entry !== "object" || typeof entry.title !== "string") { + return acc; + } + if ( + entry.frequencyMode === "occurrence-based" || + entry.frequencyMode === "rank-based" + ) { + acc[entry.title] = entry.frequencyMode; + } + return acc; + }, {}) + : {}; + } catch { + dictionaryFrequencyModeByName = {}; + } - return { profileIndex, scanLength, dictionaries, dictionaryPriorityByName }; + return { + profileIndex, + scanLength, + dictionaries, + dictionaryPriorityByName, + dictionaryFrequencyModeByName + }; })(); `; @@ -774,7 +841,133 @@ const YOMITAN_SCANNING_HELPERS = String.raw` } return segments; } - function getPreferredHeadword(dictionaryEntries, token) { + function parsePositiveFrequencyNumber(value) { + if (typeof value === 'number' && Number.isFinite(value) && value > 0) { + return Math.max(1, Math.floor(value)); + } + if (typeof value === 'string') { + const numericMatch = value.trim().match(/[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?/)?.[0]; + if (!numericMatch) { return null; } + const parsed = Number.parseFloat(numericMatch); + if (!Number.isFinite(parsed) || parsed <= 0) { return null; } + return Math.max(1, Math.floor(parsed)); + } + if (Array.isArray(value)) { + for (const item of value) { + const parsed = parsePositiveFrequencyNumber(item); + if (parsed !== null) { return parsed; } + } + } + return null; + } + function parseDisplayFrequencyNumber(value) { + if (typeof value === 'string') { + const leadingDigits = value.trim().match(/^\d+/)?.[0]; + if (!leadingDigits) { return null; } + const parsed = Number.parseInt(leadingDigits, 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : null; + } + return parsePositiveFrequencyNumber(value); + } + function getFrequencyDictionaryName(frequency) { + const candidates = [ + frequency?.dictionary, + frequency?.dictionaryName, + frequency?.name, + frequency?.title, + frequency?.dictionaryTitle, + frequency?.dictionaryAlias + ]; + for (const candidate of candidates) { + if (typeof candidate === 'string' && candidate.trim().length > 0) { + return candidate.trim(); + } + } + return null; + } + function getBestFrequencyRank(dictionaryEntry, headwordIndex, dictionaryPriorityByName, dictionaryFrequencyModeByName) { + let best = null; + const headwordCount = Array.isArray(dictionaryEntry?.headwords) ? dictionaryEntry.headwords.length : 0; + for (const frequency of dictionaryEntry?.frequencies || []) { + if (!frequency || typeof frequency !== 'object') { continue; } + const frequencyHeadwordIndex = frequency.headwordIndex; + if (typeof frequencyHeadwordIndex === 'number') { + if (frequencyHeadwordIndex !== headwordIndex) { continue; } + } else if (headwordCount > 1) { + continue; + } + const dictionary = getFrequencyDictionaryName(frequency); + if (!dictionary) { continue; } + if (dictionaryFrequencyModeByName[dictionary] === 'occurrence-based') { continue; } + const rank = + parseDisplayFrequencyNumber(frequency.displayValue) ?? + parsePositiveFrequencyNumber(frequency.frequency); + if (rank === null) { continue; } + const priorityRaw = dictionaryPriorityByName[dictionary]; + const fallbackPriority = + typeof frequency.dictionaryIndex === 'number' && Number.isFinite(frequency.dictionaryIndex) + ? Math.max(0, Math.floor(frequency.dictionaryIndex)) + : Number.MAX_SAFE_INTEGER; + const priority = + typeof priorityRaw === 'number' && Number.isFinite(priorityRaw) + ? Math.max(0, Math.floor(priorityRaw)) + : fallbackPriority; + if (best === null || priority < best.priority || (priority === best.priority && rank < best.rank)) { + best = { priority, rank }; + } + } + return best?.rank ?? null; + } + function hasExactSource(headword, token, requirePrimary) { + for (const src of headword.sources || []) { + if (src.originalText !== token) { continue; } + if (requirePrimary && !src.isPrimary) { continue; } + if (src.matchType !== 'exact') { continue; } + return true; + } + return false; + } + function collectExactHeadwordMatches(dictionaryEntries, token, requirePrimary) { + const matches = []; + for (const dictionaryEntry of dictionaryEntries || []) { + const headwords = Array.isArray(dictionaryEntry?.headwords) ? dictionaryEntry.headwords : []; + for (let headwordIndex = 0; headwordIndex < headwords.length; headwordIndex += 1) { + const headword = headwords[headwordIndex]; + if (!hasExactSource(headword, token, requirePrimary)) { continue; } + matches.push({ dictionaryEntry, headword, headwordIndex }); + } + } + return matches; + } + function sameHeadword(match, preferredMatch) { + if (!match || !preferredMatch) { + return false; + } + if (match.headword?.term !== preferredMatch.headword?.term) { + return false; + } + const matchReading = typeof match.headword?.reading === 'string' ? match.headword.reading : ''; + const preferredReading = + typeof preferredMatch.headword?.reading === 'string' ? preferredMatch.headword.reading : ''; + return matchReading === preferredReading; + } + function getBestFrequencyRankForMatches(matches, dictionaryPriorityByName, dictionaryFrequencyModeByName) { + let best = null; + for (const match of matches) { + const rank = getBestFrequencyRank( + match.dictionaryEntry, + match.headwordIndex, + dictionaryPriorityByName, + dictionaryFrequencyModeByName + ); + if (rank === null) { continue; } + if (best === null || rank < best) { + best = rank; + } + } + return best; + } + function getPreferredHeadword(dictionaryEntries, token, dictionaryPriorityByName, dictionaryFrequencyModeByName) { function appendDictionaryNames(target, value) { if (!value || typeof value !== 'object') { return; @@ -813,36 +1006,33 @@ const YOMITAN_SCANNING_HELPERS = String.raw` } return getDictionaryEntryNames(entry).some((name) => name.startsWith("SubMiner Character Dictionary")); } - function hasExactPrimarySource(headword, token) { - for (const src of headword.sources || []) { - if (src.originalText !== token) { continue; } - if (!src.isPrimary) { continue; } - if (src.matchType !== 'exact') { continue; } - return true; - } - return false; - } + const exactPrimaryMatches = collectExactHeadwordMatches(dictionaryEntries, token, true); let matchedNameDictionary = false; if (includeNameMatchMetadata) { for (const dictionaryEntry of dictionaryEntries || []) { if (!isNameDictionaryEntry(dictionaryEntry)) { continue; } - for (const headword of dictionaryEntry.headwords || []) { - if (!hasExactPrimarySource(headword, token)) { continue; } + for (const match of exactPrimaryMatches) { + if (match.dictionaryEntry !== dictionaryEntry) { continue; } matchedNameDictionary = true; break; } if (matchedNameDictionary) { break; } } } - for (const dictionaryEntry of dictionaryEntries || []) { - for (const headword of dictionaryEntry.headwords || []) { - if (!hasExactPrimarySource(headword, token)) { continue; } - return { - term: headword.term, - reading: headword.reading, - isNameMatch: matchedNameDictionary || isNameDictionaryEntry(dictionaryEntry) - }; - } + const preferredMatch = exactPrimaryMatches[0]; + if (preferredMatch) { + const exactFrequencyMatches = collectExactHeadwordMatches(dictionaryEntries, token, false) + .filter((match) => sameHeadword(match, preferredMatch)); + return { + term: preferredMatch.headword.term, + reading: preferredMatch.headword.reading, + isNameMatch: matchedNameDictionary || isNameDictionaryEntry(preferredMatch.dictionaryEntry), + frequencyRank: getBestFrequencyRankForMatches( + exactFrequencyMatches.length > 0 ? exactFrequencyMatches : exactPrimaryMatches, + dictionaryPriorityByName, + dictionaryFrequencyModeByName + ) + }; } return null; } @@ -853,6 +1043,8 @@ function buildYomitanScanningScript( profileIndex: number, scanLength: number, includeNameMatchMetadata: boolean, + dictionaryPriorityByName: Record, + dictionaryFrequencyModeByName: Partial>, ): string { return ` (async () => { @@ -876,6 +1068,8 @@ function buildYomitanScanningScript( }); ${YOMITAN_SCANNING_HELPERS} const includeNameMatchMetadata = ${includeNameMatchMetadata ? 'true' : 'false'}; + const dictionaryPriorityByName = ${JSON.stringify(dictionaryPriorityByName)}; + const dictionaryFrequencyModeByName = ${JSON.stringify(dictionaryFrequencyModeByName)}; const text = ${JSON.stringify(text)}; const details = {matchType: "exact", deinflect: true}; const tokens = []; @@ -889,7 +1083,12 @@ ${YOMITAN_SCANNING_HELPERS} const originalTextLength = typeof result?.originalTextLength === "number" ? result.originalTextLength : 0; if (dictionaryEntries.length > 0 && originalTextLength > 0 && (originalTextLength !== character.length || isCodePointJapanese(codePoint))) { const source = substring.substring(0, originalTextLength); - const preferredHeadword = getPreferredHeadword(dictionaryEntries, source); + const preferredHeadword = getPreferredHeadword( + dictionaryEntries, + source, + dictionaryPriorityByName, + dictionaryFrequencyModeByName + ); if (preferredHeadword && typeof preferredHeadword.term === "string") { const reading = typeof preferredHeadword.reading === "string" ? preferredHeadword.reading : ""; const segments = distributeFuriganaInflected(preferredHeadword.term, reading, source); @@ -900,6 +1099,10 @@ ${YOMITAN_SCANNING_HELPERS} startPos: i, endPos: i + originalTextLength, isNameMatch: includeNameMatchMetadata && preferredHeadword.isNameMatch === true, + frequencyRank: + typeof preferredHeadword.frequencyRank === "number" && Number.isFinite(preferredHeadword.frequencyRank) + ? Math.max(1, Math.floor(preferredHeadword.frequencyRank)) + : undefined, }); i += originalTextLength; continue; @@ -1036,6 +1239,8 @@ export async function requestYomitanScanTokens( profileIndex, scanLength, options?.includeNameMatchMetadata === true, + metadata?.dictionaryPriorityByName ?? {}, + metadata?.dictionaryFrequencyModeByName ?? {}, ), true, ); @@ -1099,7 +1304,11 @@ async function fetchYomitanTermFrequencies( try { const rawResult = await parserWindow.webContents.executeJavaScript(script, true); return Array.isArray(rawResult) - ? normalizeFrequencyEntriesWithPriority(rawResult, metadata.dictionaryPriorityByName) + ? normalizeFrequencyEntriesWithPriority( + rawResult, + metadata.dictionaryPriorityByName, + metadata.dictionaryFrequencyModeByName, + ) : []; } catch (err) { logger.error('Yomitan term frequency request failed:', (err as Error).message); @@ -1541,10 +1750,15 @@ export async function getYomitanDictionaryInfo( .map((entry) => { const title = typeof entry.title === 'string' ? entry.title.trim() : ''; const revision = entry.revision; + const frequencyMode: YomitanFrequencyMode | undefined = + entry.frequencyMode === 'occurrence-based' || entry.frequencyMode === 'rank-based' + ? entry.frequencyMode + : undefined; return { title, revision: typeof revision === 'string' || typeof revision === 'number' ? revision : undefined, + frequencyMode, }; }) .filter((entry) => entry.title.length > 0); diff --git a/src/main.ts b/src/main.ts index 86869e4..947a11d 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1672,7 +1672,7 @@ function shouldInitializeMecabForAnnotations(): boolean { const config = getResolvedConfig(); const nPlusOneEnabled = getRuntimeBooleanOption( 'subtitle.annotation.nPlusOne', - config.ankiConnect.nPlusOne.highlightEnabled, + config.ankiConnect.knownWords.highlightEnabled, ); const jlptEnabled = getRuntimeBooleanOption( 'subtitle.annotation.jlpt', @@ -2511,6 +2511,7 @@ const ensureStatsServerStarted = (): string => { port: getResolvedConfig().stats.serverPort, staticDir: statsDistPath, tracker, + knownWordCachePath: path.join(USER_DATA_PATH, 'known-words-cache.json'), }); appState.statsServer = statsServer; } @@ -2576,6 +2577,7 @@ const immersionTrackerStartupMainDeps: Parameters< registerStatsOverlayToggle({ staticDir: statsDistPath, preloadPath: statsPreloadPath, + getApiBaseUrl: () => ensureStatsServerStarted(), getToggleKey: () => getResolvedConfig().stats.toggleKey, resolveBounds: () => getCurrentOverlayGeometry(), }); @@ -3058,11 +3060,11 @@ const { }, getKnownWordMatchMode: () => appState.ankiIntegration?.getKnownWordMatchMode() ?? - getResolvedConfig().ankiConnect.nPlusOne.matchMode, + getResolvedConfig().ankiConnect.knownWords.matchMode, getNPlusOneEnabled: () => getRuntimeBooleanOption( 'subtitle.annotation.nPlusOne', - getResolvedConfig().ankiConnect.nPlusOne.highlightEnabled, + getResolvedConfig().ankiConnect.knownWords.highlightEnabled, ), getMinSentenceWordsForNPlusOne: () => getResolvedConfig().ankiConnect.nPlusOne.minSentenceWords, diff --git a/src/main/runtime/config-hot-reload-handlers.ts b/src/main/runtime/config-hot-reload-handlers.ts index 9458b9a..602be3c 100644 --- a/src/main/runtime/config-hot-reload-handlers.ts +++ b/src/main/runtime/config-hot-reload-handlers.ts @@ -25,7 +25,7 @@ export function resolveSubtitleStyleForRenderer(config: ResolvedConfig) { return { ...config.subtitleStyle, nPlusOneColor: config.ankiConnect.nPlusOne.nPlusOne, - knownWordColor: config.ankiConnect.nPlusOne.knownWord, + knownWordColor: config.ankiConnect.knownWords.color, nameMatchColor: config.subtitleStyle.nameMatchColor, enableJlpt: config.subtitleStyle.enableJlpt, frequencyDictionary: config.subtitleStyle.frequencyDictionary, diff --git a/src/main/runtime/stats-cli-command.test.ts b/src/main/runtime/stats-cli-command.test.ts index d382f1e..81036e2 100644 --- a/src/main/runtime/stats-cli-command.test.ts +++ b/src/main/runtime/stats-cli-command.test.ts @@ -9,7 +9,7 @@ function makeHandler(overrides: Partial ({ immersionTracking: { enabled: true }, - stats: { serverPort: 5175 }, + stats: { serverPort: 6969 }, }), ensureImmersionTrackerStarted: () => { calls.push('ensureImmersionTrackerStarted'); @@ -17,7 +17,7 @@ function makeHandler(overrides: Partial ({ cleanupVocabularyStats: undefined }), ensureStatsServerStarted: () => { calls.push('ensureStatsServerStarted'); - return 'http://127.0.0.1:5175'; + return 'http://127.0.0.1:6969'; }, openExternal: async (url) => { calls.push(`openExternal:${url}`); @@ -51,13 +51,13 @@ test('stats cli command starts tracker, server, browser, and writes success resp assert.deepEqual(calls, [ 'ensureImmersionTrackerStarted', 'ensureStatsServerStarted', - 'openExternal:http://127.0.0.1:5175', - 'info:Stats dashboard available at http://127.0.0.1:5175', + 'openExternal:http://127.0.0.1:6969', + 'info:Stats dashboard available at http://127.0.0.1:6969', ]); assert.deepEqual(responses, [ { responsePath: '/tmp/subminer-stats-response.json', - payload: { ok: true, url: 'http://127.0.0.1:5175' }, + payload: { ok: true, url: 'http://127.0.0.1:6969' }, }, ]); }); @@ -66,7 +66,7 @@ test('stats cli command fails when immersion tracking is disabled', async () => const { handler, calls, responses } = makeHandler({ getResolvedConfig: () => ({ immersionTracking: { enabled: false }, - stats: { serverPort: 5175 }, + stats: { serverPort: 6969 }, }), }); diff --git a/src/token-merger.ts b/src/token-merger.ts index f26470d..fbec420 100644 --- a/src/token-merger.ts +++ b/src/token-merger.ts @@ -169,13 +169,17 @@ export function mergeTokens( isKnownWord: (text: string) => boolean = () => false, knownWordMatchMode: 'headword' | 'surface' = 'headword', shouldLookupKnownWords = true, + sourceText?: string, ): MergedToken[] { if (!tokens || tokens.length === 0) { return []; } const result: MergedToken[] = []; + const normalizedSourceText = + typeof sourceText === 'string' ? sourceText.replace(/\r?\n/g, ' ').trim() : null; let charOffset = 0; + let sourceCursor = 0; let lastStandaloneToken: Token | null = null; const resolveKnownMatch = (text: string | undefined): boolean => { if (!shouldLookupKnownWords || !text) { @@ -185,9 +189,12 @@ export function mergeTokens( }; for (const token of tokens) { - const start = charOffset; - const end = charOffset + token.word.length; + const matchedStart = + normalizedSourceText !== null ? normalizedSourceText.indexOf(token.word, sourceCursor) : -1; + const start = matchedStart >= sourceCursor ? matchedStart : charOffset; + const end = start + token.word.length; charOffset = end; + sourceCursor = end; let shouldMergeToken = false; diff --git a/src/types.ts b/src/types.ts index 25c99bf..ec94b5d 100644 --- a/src/types.ts +++ b/src/types.ts @@ -244,13 +244,15 @@ export interface AnkiConnectConfig { fallbackDuration?: number; maxMediaDuration?: number; }; - nPlusOne?: { + knownWords?: { highlightEnabled?: boolean; refreshMinutes?: number; matchMode?: NPlusOneMatchMode; decks?: string[]; + color?: string; + }; + nPlusOne?: { nPlusOne?: string; - knownWord?: string; minSentenceWords?: number; }; behavior?: { @@ -733,13 +735,15 @@ export interface ResolvedConfig { fallbackDuration: number; maxMediaDuration: number; }; - nPlusOne: { + knownWords: { highlightEnabled: boolean; refreshMinutes: number; matchMode: NPlusOneMatchMode; decks: string[]; + color: string; + }; + nPlusOne: { nPlusOne: string; - knownWord: string; minSentenceWords: number; }; behavior: { diff --git a/stats/bun.lock b/stats/bun.lock index 6997aba..91148e8 100644 --- a/stats/bun.lock +++ b/stats/bun.lock @@ -5,6 +5,8 @@ "": { "name": "@subminer/stats-ui", "dependencies": { + "@fontsource-variable/geist": "^5.2.8", + "@fontsource-variable/geist-mono": "^5.2.7", "react": "^19.0.0", "react-dom": "^19.0.0", "recharts": "^2.15.0", @@ -113,6 +115,10 @@ "@esbuild/win32-x64": ["@esbuild/win32-x64@0.25.12", "", { "os": "win32", "cpu": "x64" }, "sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA=="], + "@fontsource-variable/geist": ["@fontsource-variable/geist@5.2.8", "", {}, "sha512-cJ6m9e+8MQ5dCYJsLylfZrgBh6KkG4bOLckB35Tr9J/EqdkEM6QllH5PxqP1dhTvFup+HtMRPuz9xOjxXJggxw=="], + + "@fontsource-variable/geist-mono": ["@fontsource-variable/geist-mono@5.2.7", "", {}, "sha512-ZKlZ5sjtalb2TwXKs400mAGDlt/+2ENLNySPx0wTz3bP3mWARCsUW+rpxzZc7e05d2qGch70pItt3K4qttbIYA=="], + "@jridgewell/gen-mapping": ["@jridgewell/gen-mapping@0.3.13", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.0", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA=="], "@jridgewell/remapping": ["@jridgewell/remapping@2.3.5", "", { "dependencies": { "@jridgewell/gen-mapping": "^0.3.5", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ=="], diff --git a/stats/package.json b/stats/package.json index fd1c580..cbe3d71 100644 --- a/stats/package.json +++ b/stats/package.json @@ -8,6 +8,8 @@ "preview": "vite preview" }, "dependencies": { + "@fontsource-variable/geist": "^5.2.8", + "@fontsource-variable/geist-mono": "^5.2.7", "react": "^19.0.0", "react-dom": "^19.0.0", "recharts": "^2.15.0" diff --git a/stats/src/App.tsx b/stats/src/App.tsx index c272488..57b82bf 100644 --- a/stats/src/App.tsx +++ b/stats/src/App.tsx @@ -42,12 +42,12 @@ export function App() {
{activeTab === 'overview' ? ( -
+
) : null} {activeTab === 'anime' ? ( -
+
setSelectedAnimeId(null)} @@ -56,12 +56,12 @@ export function App() {
) : null} {activeTab === 'trends' ? ( -