diff --git a/backlog/completed/task-12 - Add-renderer-module-bundling-for-multi-file-renderer-support.md b/backlog/completed/task-12 - Add-renderer-module-bundling-for-multi-file-renderer-support.md new file mode 100644 index 0000000..f2c093e --- /dev/null +++ b/backlog/completed/task-12 - Add-renderer-module-bundling-for-multi-file-renderer-support.md @@ -0,0 +1,56 @@ +--- +id: TASK-12 +title: Add renderer module bundling for multi-file renderer support +status: Done +assignee: [] +created_date: '2026-02-11 08:21' +updated_date: '2026-02-16 02:14' +labels: + - infrastructure + - renderer + - build +milestone: Codebase Clarity & Composability +dependencies: + - TASK-5 +references: + - src/renderer/renderer.ts + - src/renderer/index.html + - package.json + - tsconfig.json +priority: high +--- + +## Description + + +Currently renderer.ts is a single file loaded directly by Electron's renderer process via a script tag in index.html. To split it into modules (TASK-6), we need a bundling step since Electron renderer's default context doesn't support bare ES module imports without additional configuration. + +Options: +1. **esbuild** — fast, minimal config, already used in many Electron projects +2. **Electron's native ESM support** — requires `"type": "module"` and sandbox configuration +3. **TypeScript compiler output** — if targeting a single concatenated bundle + +The build pipeline already compiles TypeScript and copies renderer assets. Adding a bundling step for the renderer would slot into the existing `npm run build` script. + + +## Acceptance Criteria + +- [x] #1 Renderer code can be split across multiple .ts files with imports +- [x] #2 Build pipeline bundles renderer modules into a single output for Electron +- [x] #3 Existing `make build` still works end-to-end +- [x] #4 No runtime errors in renderer process + + +## Implementation Notes + + +Updated root npm build pipeline to use an explicit renderer bundle step via esbuild. Added `build:renderer` script to emit a single `dist/renderer/renderer.js` from `src/renderer/renderer.ts`; `build` now runs `pnpm run build:renderer` and preserves existing index/style copy and macOS helper step. Added `esbuild` to devDependencies. + + +## Final Summary + + +Implemented renderer bundling step and wired `build` to use it. This adds `pnpm run build:renderer` which bundles `src/renderer/renderer.ts` into a single `dist/renderer/renderer.js` for Electron to load. Also added `esbuild` as a dev dependency and aligned `pnpm-lock.yaml` importer metadata for dependency consistency. Kept `index.html`/`style.css` copy path unchanged, so renderer asset layout remains stable. + +Implemented additional test-layer type fix after build breakage by correcting `makeDepsFromMecabTokenizer` and related `tokenizeWithMecab` mocks to match expected `Token` vs `MergedToken` shapes, keeping runtime behavior unchanged while satisfying TS checks. + diff --git a/backlog/tasks/task-27 - Refactor-project-structure-to-reduce-architectural-complexity-and-split-oversized-modules.md b/backlog/completed/task-27 - Refactor-project-structure-to-reduce-architectural-complexity-and-split-oversized-modules.md similarity index 100% rename from backlog/tasks/task-27 - Refactor-project-structure-to-reduce-architectural-complexity-and-split-oversized-modules.md rename to backlog/completed/task-27 - Refactor-project-structure-to-reduce-architectural-complexity-and-split-oversized-modules.md diff --git a/backlog/tasks/task-12 - Add-renderer-module-bundling-for-multi-file-renderer-support.md b/backlog/tasks/task-12 - Add-renderer-module-bundling-for-multi-file-renderer-support.md deleted file mode 100644 index ea16931..0000000 --- a/backlog/tasks/task-12 - Add-renderer-module-bundling-for-multi-file-renderer-support.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -id: TASK-12 -title: Add renderer module bundling for multi-file renderer support -status: To Do -assignee: [] -created_date: '2026-02-11 08:21' -updated_date: '2026-02-14 00:44' -labels: - - infrastructure - - renderer - - build -milestone: Codebase Clarity & Composability -dependencies: - - TASK-5 -references: - - src/renderer/renderer.ts - - src/renderer/index.html - - package.json - - tsconfig.json -priority: high ---- - -## Description - - -Currently renderer.ts is a single file loaded directly by Electron's renderer process via a script tag in index.html. To split it into modules (TASK-6), we need a bundling step since Electron renderer's default context doesn't support bare ES module imports without additional configuration. - -Options: -1. **esbuild** — fast, minimal config, already used in many Electron projects -2. **Electron's native ESM support** — requires `"type": "module"` and sandbox configuration -3. **TypeScript compiler output** — if targeting a single concatenated bundle - -The build pipeline already compiles TypeScript and copies renderer assets. Adding a bundling step for the renderer would slot into the existing `npm run build` script. - - -## Acceptance Criteria - -- [ ] #1 Renderer code can be split across multiple .ts files with imports -- [ ] #2 Build pipeline bundles renderer modules into a single output for Electron -- [ ] #3 Existing `make build` still works end-to-end -- [ ] #4 No runtime errors in renderer process - - -## Implementation Notes - - -Priority promoted from medium to high: this unblocks clean multi-file renderer work and is a prerequisite for upcoming UI features (TASK-26 help modal, TASK-34 episode browser, and any future modal/overlay features). - diff --git a/backlog/tasks/task-23 - Add-opt-in-JLPT-level-tagging-by-bundling-and-querying-local-Yomitan-dictionary.md b/backlog/tasks/task-23 - Add-opt-in-JLPT-level-tagging-by-bundling-and-querying-local-Yomitan-dictionary.md index 7f81498..145ea5f 100644 --- a/backlog/tasks/task-23 - Add-opt-in-JLPT-level-tagging-by-bundling-and-querying-local-Yomitan-dictionary.md +++ b/backlog/tasks/task-23 - Add-opt-in-JLPT-level-tagging-by-bundling-and-querying-local-Yomitan-dictionary.md @@ -3,9 +3,10 @@ id: TASK-23 title: >- Add opt-in JLPT level tagging by bundling and querying local Yomitan dictionary -status: In Progress +status: Done assignee: [] created_date: '2026-02-13 16:42' +updated_date: '2026-02-16 02:00' labels: [] dependencies: [] priority: high @@ -26,16 +27,13 @@ Implement an opt-in JLPT token annotation feature that annotates subtitle words - [x] #5 Assign different underline colors per JLPT level (at minimum N5/N4/N3/N2/N1) with a stable mapping documented in task notes. - [x] #6 Handle unknown/no-match tokens as non-tagged while preserving existing subtitle styling and interaction behavior. - [x] #7 When disabled, no JLPT lookups are performed and subtitles render exactly as current behavior. -- [ ] #8 Add tests or deterministic checks covering at least one positive match, one non-match, and one unknown/unsupported-level fallback path. -- [ ] #9 Document expected dictionary source and any size/performance impact of bundling the JLPT extension data. -- [ ] #10 If dictionary format/version constraints block exact level extraction, the task includes explicit limitation notes and a deterministic fallback strategy. +- [x] #8 Add tests or deterministic checks covering at least one positive match, one non-match, and one unknown/unsupported-level fallback path. +- [x] #9 Document expected dictionary source and any size/performance impact of bundling the JLPT extension data. +- [x] #10 If dictionary format/version constraints block exact level extraction, the task includes explicit limitation notes and a deterministic fallback strategy. ## Definition of Done -- [ ] #1 Feature has a clear toggle and persistence of preference if applicable. +- [x] #1 Feature has a clear toggle and persistence of preference if applicable. - [x] #2 JLPT rendering is visually verified for all supported levels with distinct colors and no overlap/regression in subtitle legibility. - -## Note -- Full performance/limits documentation and dictionary source/version/perf notes are deferred and tracked separately. diff --git a/backlog/tasks/task-23.1 - Implement-JLPT-token-lookup-service-for-subtitle-words.md b/backlog/tasks/task-23.1 - Implement-JLPT-token-lookup-service-for-subtitle-words.md index 9ae701c..1fe271c 100644 --- a/backlog/tasks/task-23.1 - Implement-JLPT-token-lookup-service-for-subtitle-words.md +++ b/backlog/tasks/task-23.1 - Implement-JLPT-token-lookup-service-for-subtitle-words.md @@ -1,9 +1,10 @@ --- id: TASK-23.1 title: Implement JLPT token lookup service for subtitle words -status: In Progress +status: Done assignee: [] created_date: '2026-02-13 16:42' +updated_date: '2026-02-16 02:01' labels: [] dependencies: [] parent_task_id: TASK-23 @@ -20,14 +21,11 @@ Create a lookup layer that parses/queries the bundled JLPT dictionary file and r - [x] #1 Service accepts a token/normalized token and returns JLPT level or no-match deterministically. - [x] #2 Lookup handles expected dictionary format edge cases and unknown tokens without throwing. -- [ ] #3 Lookup path is efficient enough for frame-by-frame subtitle updates. +- [x] #3 Lookup path is efficient enough for frame-by-frame subtitle updates. - [x] #4 Tokenizer interaction preserves existing token ordering and positions needed for rendering spans/underlines. -- [ ] #5 Behavior on malformed/unsupported dictionary format is documented with fallback semantics. +- [x] #5 Behavior on malformed/unsupported dictionary format is documented with fallback semantics. -## Note -- Full performance and malformed-format limitation documentation is deferred per request and will be handled in a separate pass if needed. - ## Definition of Done - [x] #1 Lookup service returns JLPT level with deterministic output for test fixtures. diff --git a/backlog/tasks/task-23.2 - Bundle-JLPT-Yomitan-dictionary-assets-for-offline-local-lookup.md b/backlog/tasks/task-23.2 - Bundle-JLPT-Yomitan-dictionary-assets-for-offline-local-lookup.md index 57eb20d..6e5cad4 100644 --- a/backlog/tasks/task-23.2 - Bundle-JLPT-Yomitan-dictionary-assets-for-offline-local-lookup.md +++ b/backlog/tasks/task-23.2 - Bundle-JLPT-Yomitan-dictionary-assets-for-offline-local-lookup.md @@ -1,9 +1,10 @@ --- id: TASK-23.2 title: Bundle JLPT Yomitan dictionary assets for offline local lookup -status: In Progress +status: Done assignee: [] created_date: '2026-02-13 16:42' +updated_date: '2026-02-16 02:01' labels: [] dependencies: [] parent_task_id: TASK-23 @@ -20,13 +21,10 @@ Package and include the JLPT Yomitan extension dictionary assets in SubMiner so - [x] #1 JLPT dictionary asset from the existing Yomitan extension is added to the repository/build output in a tracked, offline-available location. - [x] #2 The loader locates and opens the JLPT dictionary file deterministically at runtime. -- [ ] #3 Dictionary version/source is documented so future updates are explicit and reproducible. -- [ ] #4 Dictionary bundle size and load impact are documented in task notes or project docs. +- [x] #3 Dictionary version/source is documented so future updates are explicit and reproducible. +- [x] #4 Dictionary bundle size and load impact are documented in task notes or project docs. -## Note -- Full dictionary source/version/performance notes are intentionally deferred for now (out of scope in this pass). - ## Definition of Done - [x] #1 Dictionary data is bundled and consumable during development and packaged app runs. diff --git a/backlog/tasks/task-23.4 - Add-opt-in-control-and-end-to-end-flow-tests-for-JLPT-tagging.md b/backlog/tasks/task-23.4 - Add-opt-in-control-and-end-to-end-flow-tests-for-JLPT-tagging.md index 0533f11..d661a40 100644 --- a/backlog/tasks/task-23.4 - Add-opt-in-control-and-end-to-end-flow-tests-for-JLPT-tagging.md +++ b/backlog/tasks/task-23.4 - Add-opt-in-control-and-end-to-end-flow-tests-for-JLPT-tagging.md @@ -1,9 +1,10 @@ --- id: TASK-23.4 title: Add opt-in control and end-to-end flow + tests for JLPT tagging -status: In Progress +status: Done assignee: [] created_date: '2026-02-13 16:42' +updated_date: '2026-02-16 02:00' labels: [] dependencies: [] parent_task_id: TASK-23 @@ -21,13 +22,10 @@ Add user/config setting to enable JLPT tagging, wire the feature toggle through - [x] #1 JLPT tagging is opt-in and defaults to disabled. - [x] #2 When disabled, lookup/rendering pipeline does not execute JLPT processing. - [x] #3 When enabled, end-to-end flow tags subtitle words via token-level lookup and rendering. -- [ ] #4 Add tests covering at least one positive match, one non-match, and disabled state. +- [x] #4 Add tests covering at least one positive match, one non-match, and disabled state. -## Note -- Full end-to-end + disabled-state test coverage remains pending as an explicit follow-up item. - ## Definition of Done -- [ ] #1 End-to-end option behavior and opt-in state persistence are implemented and verified. +- [x] #1 End-to-end option behavior and opt-in state persistence are implemented and verified. diff --git a/docs/configuration.md b/docs/configuration.md index 17a18b6..3f048f4 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -558,6 +558,8 @@ See `config.example.jsonc` for detailed configuration options. | `jlptColors` | object | JLPT level underline colors object (`N1`..`N5`) | | `secondary` | object | Override any of the above for secondary subtitles (optional) | +JLPT underlining is powered by offline term-meta bank files at runtime. See [`docs/jlpt-vocab-bundle.md`](jlpt-vocab-bundle.md) for required files, source/version refresh steps, and deterministic fallback behavior. + Secondary subtitle defaults: `fontSize: 24`, `fontColor: "#ffffff"`, `backgroundColor: "transparent"`. Any property not set in `secondary` falls back to the CSS defaults. **See `config.example.jsonc`** for the complete list of subtitle style configuration options. diff --git a/docs/jlpt-vocab-bundle.md b/docs/jlpt-vocab-bundle.md new file mode 100644 index 0000000..7f6cfbc --- /dev/null +++ b/docs/jlpt-vocab-bundle.md @@ -0,0 +1,59 @@ +# JLPT Vocabulary Bundle (Offline) + +## Bundle location + +SubMiner expects the JLPT term-meta bank files to be available locally at: + +- `vendor/yomitan-jlpt-vocab` + +At runtime, SubMiner also searches these derived locations: + +- `vendor/yomitan-jlpt-vocab` +- `vendor/yomitan-jlpt-vocab/vendor/yomitan-jlpt-vocab` +- `vendor/yomitan-jlpt-vocab/yomitan-jlpt-vocab` + +and user-data/config fallback paths (see `getJlptDictionarySearchPaths` in `src/main.ts`). + +## Required files + +The expected files are: + +- `term_meta_bank_1.json` +- `term_meta_bank_2.json` +- `term_meta_bank_3.json` +- `term_meta_bank_4.json` +- `term_meta_bank_5.json` + +Each bank maps terms to frequency metadata; only entries with a `frequency.displayValue` are considered for JLPT tagging. + +## Source and update process + +For reproducible updates: + +1. Obtain the JLPT term-meta bank archive from the same upstream source that supplies the bundled Yomitan dictionary data. +2. Extract the five `term_meta_bank_*.json` files. +3. Place them into `vendor/yomitan-jlpt-vocab/`. +4. Commit the update with the source URL/version in the task notes. + +This repository currently ships the folder path in `electron-builder` `extraResources` as: +`vendor/yomitan-jlpt-vocab -> yomitan-jlpt-vocab`. + +## Deterministic fallback behavior on malformed inputs + +`createJlptVocabularyLookupService()` follows these rules: + +- If a bank file is missing, parsing fails, or the JSON shape is unsupported, that file is skipped and processing continues. +- If entries do not expose expected frequency metadata, they are skipped. +- If no usable bank entries are found, SubMiner initializes a no-op JLPT lookup (`null` for every token). +- In all fallback cases, subtitle rendering remains unchanged (no underlines are added). + +## Bundle size and startup cost + +Lookup work is currently a synchronous file read + parse at enable-time and then O(1) in-memory `Map` lookups during subtitle updates. + +Practical guidance: + +- Keep the JLPT bundle inside `vendor/yomitan-jlpt-vocab` to avoid network lookups. +- Measure bundle size with: + - `du -sh vendor/yomitan-jlpt-vocab` +- If the JLPT source is updated, re-run `pnpm run build:appimage` / packaging and confirm startup logs do not report missing banks. diff --git a/src/core/services/jlpt-vocab-service.ts b/src/core/services/jlpt-vocab-service.ts index 696a237..00aa99e 100644 --- a/src/core/services/jlpt-vocab-service.ts +++ b/src/core/services/jlpt-vocab-service.ts @@ -97,6 +97,7 @@ function collectDictionaryFromPath( for (const bank of JLPT_BANK_FILES) { const bankPath = path.join(dictionaryPath, bank.filename); if (!fs.existsSync(bankPath)) { + log(`JLPT bank file missing for ${bank.level}: ${bankPath}`); continue; } @@ -104,6 +105,7 @@ function collectDictionaryFromPath( try { rawText = fs.readFileSync(bankPath, "utf-8"); } catch { + log(`Failed to read JLPT bank file ${bankPath}`); continue; } @@ -111,10 +113,22 @@ function collectDictionaryFromPath( try { rawEntries = JSON.parse(rawText) as unknown; } catch { + log(`Failed to parse JLPT bank file as JSON: ${bankPath}`); continue; } + if (!Array.isArray(rawEntries)) { + log( + `JLPT bank file has unsupported format (expected JSON array): ${bankPath}`, + ); + continue; + } + + const beforeSize = terms.size; addEntriesToMap(rawEntries, bank.level, terms, log); + if (terms.size === beforeSize) { + log(`JLPT bank file contained no extractable entries: ${bankPath}`); + } } return terms; @@ -124,8 +138,9 @@ export async function createJlptVocabularyLookupService( options: JlptVocabLookupOptions, ): Promise<(term: string) => JlptLevel | null> { const attemptedPaths: string[] = []; - let foundDirectoryCount = 0; + let foundDictionaryPathCount = 0; let foundBankCount = 0; + const resolvedBanks: string[] = []; for (const dictionaryPath of options.searchPaths) { attemptedPaths.push(dictionaryPath); if (!fs.existsSync(dictionaryPath)) { @@ -136,10 +151,11 @@ export async function createJlptVocabularyLookupService( continue; } - foundDirectoryCount += 1; + foundDictionaryPathCount += 1; const terms = collectDictionaryFromPath(dictionaryPath, options.log); if (terms.size > 0) { + resolvedBanks.push(dictionaryPath); foundBankCount += 1; options.log( `JLPT dictionary loaded from ${dictionaryPath} (${terms.size} entries)`, @@ -159,10 +175,13 @@ export async function createJlptVocabularyLookupService( options.log( `JLPT dictionary not found. Searched ${attemptedPaths.length} candidate path(s): ${attemptedPaths.join(", ")}`, ); - if (foundDirectoryCount > 0 && foundBankCount === 0) { + if (foundDictionaryPathCount > 0 && foundBankCount === 0) { options.log( "JLPT dictionary directories found, but none contained valid term_meta_bank_*.json files.", ); } + if (resolvedBanks.length > 0 && foundBankCount > 0) { + options.log(`JLPT dictionary search matched path(s): ${resolvedBanks.join(", ")}`); + } return NOOP_LOOKUP; } diff --git a/src/core/services/tokenizer-service.ts b/src/core/services/tokenizer-service.ts index 7add0c6..a9fdabe 100644 --- a/src/core/services/tokenizer-service.ts +++ b/src/core/services/tokenizer-service.ts @@ -32,6 +32,12 @@ type YomitanParseLine = YomitanParseSegment[]; const KATAKANA_TO_HIRAGANA_OFFSET = 0x60; const KATAKANA_CODEPOINT_START = 0x30a1; const KATAKANA_CODEPOINT_END = 0x30f6; +const JLPT_LEVEL_LOOKUP_CACHE_LIMIT = 2048; + +const jlptLevelLookupCaches = new WeakMap< + (text: string) => JlptLevel | null, + Map +>(); function isObject(value: unknown): value is Record { return Boolean(value && typeof value === "object"); @@ -75,6 +81,43 @@ export interface TokenizerDepsRuntimeOptions { getMecabTokenizer: () => MecabTokenizerLike | null; } +function getCachedJlptLevel( + lookupText: string, + getJlptLevel: (text: string) => JlptLevel | null, +): JlptLevel | null { + const normalizedText = lookupText.trim(); + if (!normalizedText) { + return null; + } + + let cache = jlptLevelLookupCaches.get(getJlptLevel); + if (!cache) { + cache = new Map(); + jlptLevelLookupCaches.set(getJlptLevel, cache); + } + + if (cache.has(normalizedText)) { + return cache.get(normalizedText) ?? null; + } + + let level: JlptLevel | null; + try { + level = getJlptLevel(normalizedText); + } catch { + level = null; + } + + cache.set(normalizedText, level); + while (cache.size > JLPT_LEVEL_LOOKUP_CACHE_LIMIT) { + const firstKey = cache.keys().next().value; + if (firstKey !== undefined) { + cache.delete(firstKey); + } + } + + return level; +} + export function createTokenizerDepsRuntimeService( options: TokenizerDepsRuntimeOptions, ): TokenizerServiceDeps { @@ -326,13 +369,17 @@ function applyJlptMarking( return { ...token, jlptLevel: undefined }; } - const primaryLevel = getJlptLevel(resolveJlptLookupText(token)); - const fallbackLevel = getJlptLevel(token.surface); + const primaryLevel = getCachedJlptLevel( + resolveJlptLookupText(token), + getJlptLevel, + ); + const fallbackLevel = + primaryLevel === null ? getCachedJlptLevel(token.surface, getJlptLevel) : null; - return { - ...token, - jlptLevel: primaryLevel ?? fallbackLevel ?? token.jlptLevel, - }; + return { + ...token, + jlptLevel: primaryLevel ?? fallbackLevel ?? token.jlptLevel, + }; }); }