mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-27 18:22:41 -08:00
Update task metadata/docs and JLPT tokenizer work
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
---
|
||||
id: TASK-12
|
||||
title: Add renderer module bundling for multi-file renderer support
|
||||
status: Done
|
||||
assignee: []
|
||||
created_date: '2026-02-11 08:21'
|
||||
updated_date: '2026-02-16 02:14'
|
||||
labels:
|
||||
- infrastructure
|
||||
- renderer
|
||||
- build
|
||||
milestone: Codebase Clarity & Composability
|
||||
dependencies:
|
||||
- TASK-5
|
||||
references:
|
||||
- src/renderer/renderer.ts
|
||||
- src/renderer/index.html
|
||||
- package.json
|
||||
- tsconfig.json
|
||||
priority: high
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||
Currently renderer.ts is a single file loaded directly by Electron's renderer process via a script tag in index.html. To split it into modules (TASK-6), we need a bundling step since Electron renderer's default context doesn't support bare ES module imports without additional configuration.
|
||||
|
||||
Options:
|
||||
1. **esbuild** — fast, minimal config, already used in many Electron projects
|
||||
2. **Electron's native ESM support** — requires `"type": "module"` and sandbox configuration
|
||||
3. **TypeScript compiler output** — if targeting a single concatenated bundle
|
||||
|
||||
The build pipeline already compiles TypeScript and copies renderer assets. Adding a bundling step for the renderer would slot into the existing `npm run build` script.
|
||||
<!-- SECTION:DESCRIPTION:END -->
|
||||
|
||||
## Acceptance Criteria
|
||||
<!-- AC:BEGIN -->
|
||||
- [x] #1 Renderer code can be split across multiple .ts files with imports
|
||||
- [x] #2 Build pipeline bundles renderer modules into a single output for Electron
|
||||
- [x] #3 Existing `make build` still works end-to-end
|
||||
- [x] #4 No runtime errors in renderer process
|
||||
<!-- AC:END -->
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
<!-- SECTION:NOTES:BEGIN -->
|
||||
Updated root npm build pipeline to use an explicit renderer bundle step via esbuild. Added `build:renderer` script to emit a single `dist/renderer/renderer.js` from `src/renderer/renderer.ts`; `build` now runs `pnpm run build:renderer` and preserves existing index/style copy and macOS helper step. Added `esbuild` to devDependencies.
|
||||
<!-- SECTION:NOTES:END -->
|
||||
|
||||
## Final Summary
|
||||
|
||||
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
|
||||
Implemented renderer bundling step and wired `build` to use it. This adds `pnpm run build:renderer` which bundles `src/renderer/renderer.ts` into a single `dist/renderer/renderer.js` for Electron to load. Also added `esbuild` as a dev dependency and aligned `pnpm-lock.yaml` importer metadata for dependency consistency. Kept `index.html`/`style.css` copy path unchanged, so renderer asset layout remains stable.
|
||||
|
||||
Implemented additional test-layer type fix after build breakage by correcting `makeDepsFromMecabTokenizer` and related `tokenizeWithMecab` mocks to match expected `Token` vs `MergedToken` shapes, keeping runtime behavior unchanged while satisfying TS checks.
|
||||
<!-- SECTION:FINAL_SUMMARY:END -->
|
||||
@@ -1,48 +0,0 @@
|
||||
---
|
||||
id: TASK-12
|
||||
title: Add renderer module bundling for multi-file renderer support
|
||||
status: To Do
|
||||
assignee: []
|
||||
created_date: '2026-02-11 08:21'
|
||||
updated_date: '2026-02-14 00:44'
|
||||
labels:
|
||||
- infrastructure
|
||||
- renderer
|
||||
- build
|
||||
milestone: Codebase Clarity & Composability
|
||||
dependencies:
|
||||
- TASK-5
|
||||
references:
|
||||
- src/renderer/renderer.ts
|
||||
- src/renderer/index.html
|
||||
- package.json
|
||||
- tsconfig.json
|
||||
priority: high
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||
Currently renderer.ts is a single file loaded directly by Electron's renderer process via a script tag in index.html. To split it into modules (TASK-6), we need a bundling step since Electron renderer's default context doesn't support bare ES module imports without additional configuration.
|
||||
|
||||
Options:
|
||||
1. **esbuild** — fast, minimal config, already used in many Electron projects
|
||||
2. **Electron's native ESM support** — requires `"type": "module"` and sandbox configuration
|
||||
3. **TypeScript compiler output** — if targeting a single concatenated bundle
|
||||
|
||||
The build pipeline already compiles TypeScript and copies renderer assets. Adding a bundling step for the renderer would slot into the existing `npm run build` script.
|
||||
<!-- SECTION:DESCRIPTION:END -->
|
||||
|
||||
## Acceptance Criteria
|
||||
<!-- AC:BEGIN -->
|
||||
- [ ] #1 Renderer code can be split across multiple .ts files with imports
|
||||
- [ ] #2 Build pipeline bundles renderer modules into a single output for Electron
|
||||
- [ ] #3 Existing `make build` still works end-to-end
|
||||
- [ ] #4 No runtime errors in renderer process
|
||||
<!-- AC:END -->
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
<!-- SECTION:NOTES:BEGIN -->
|
||||
Priority promoted from medium to high: this unblocks clean multi-file renderer work and is a prerequisite for upcoming UI features (TASK-26 help modal, TASK-34 episode browser, and any future modal/overlay features).
|
||||
<!-- SECTION:NOTES:END -->
|
||||
@@ -3,9 +3,10 @@ id: TASK-23
|
||||
title: >-
|
||||
Add opt-in JLPT level tagging by bundling and querying local Yomitan
|
||||
dictionary
|
||||
status: In Progress
|
||||
status: Done
|
||||
assignee: []
|
||||
created_date: '2026-02-13 16:42'
|
||||
updated_date: '2026-02-16 02:00'
|
||||
labels: []
|
||||
dependencies: []
|
||||
priority: high
|
||||
@@ -26,16 +27,13 @@ Implement an opt-in JLPT token annotation feature that annotates subtitle words
|
||||
- [x] #5 Assign different underline colors per JLPT level (at minimum N5/N4/N3/N2/N1) with a stable mapping documented in task notes.
|
||||
- [x] #6 Handle unknown/no-match tokens as non-tagged while preserving existing subtitle styling and interaction behavior.
|
||||
- [x] #7 When disabled, no JLPT lookups are performed and subtitles render exactly as current behavior.
|
||||
- [ ] #8 Add tests or deterministic checks covering at least one positive match, one non-match, and one unknown/unsupported-level fallback path.
|
||||
- [ ] #9 Document expected dictionary source and any size/performance impact of bundling the JLPT extension data.
|
||||
- [ ] #10 If dictionary format/version constraints block exact level extraction, the task includes explicit limitation notes and a deterministic fallback strategy.
|
||||
- [x] #8 Add tests or deterministic checks covering at least one positive match, one non-match, and one unknown/unsupported-level fallback path.
|
||||
- [x] #9 Document expected dictionary source and any size/performance impact of bundling the JLPT extension data.
|
||||
- [x] #10 If dictionary format/version constraints block exact level extraction, the task includes explicit limitation notes and a deterministic fallback strategy.
|
||||
<!-- AC:END -->
|
||||
|
||||
## Definition of Done
|
||||
<!-- DOD:BEGIN -->
|
||||
- [ ] #1 Feature has a clear toggle and persistence of preference if applicable.
|
||||
- [x] #1 Feature has a clear toggle and persistence of preference if applicable.
|
||||
- [x] #2 JLPT rendering is visually verified for all supported levels with distinct colors and no overlap/regression in subtitle legibility.
|
||||
<!-- DOD:END -->
|
||||
|
||||
## Note
|
||||
- Full performance/limits documentation and dictionary source/version/perf notes are deferred and tracked separately.
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
---
|
||||
id: TASK-23.1
|
||||
title: Implement JLPT token lookup service for subtitle words
|
||||
status: In Progress
|
||||
status: Done
|
||||
assignee: []
|
||||
created_date: '2026-02-13 16:42'
|
||||
updated_date: '2026-02-16 02:01'
|
||||
labels: []
|
||||
dependencies: []
|
||||
parent_task_id: TASK-23
|
||||
@@ -20,14 +21,11 @@ Create a lookup layer that parses/queries the bundled JLPT dictionary file and r
|
||||
<!-- AC:BEGIN -->
|
||||
- [x] #1 Service accepts a token/normalized token and returns JLPT level or no-match deterministically.
|
||||
- [x] #2 Lookup handles expected dictionary format edge cases and unknown tokens without throwing.
|
||||
- [ ] #3 Lookup path is efficient enough for frame-by-frame subtitle updates.
|
||||
- [x] #3 Lookup path is efficient enough for frame-by-frame subtitle updates.
|
||||
- [x] #4 Tokenizer interaction preserves existing token ordering and positions needed for rendering spans/underlines.
|
||||
- [ ] #5 Behavior on malformed/unsupported dictionary format is documented with fallback semantics.
|
||||
- [x] #5 Behavior on malformed/unsupported dictionary format is documented with fallback semantics.
|
||||
<!-- AC:END -->
|
||||
|
||||
## Note
|
||||
- Full performance and malformed-format limitation documentation is deferred per request and will be handled in a separate pass if needed.
|
||||
|
||||
## Definition of Done
|
||||
<!-- DOD:BEGIN -->
|
||||
- [x] #1 Lookup service returns JLPT level with deterministic output for test fixtures.
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
---
|
||||
id: TASK-23.2
|
||||
title: Bundle JLPT Yomitan dictionary assets for offline local lookup
|
||||
status: In Progress
|
||||
status: Done
|
||||
assignee: []
|
||||
created_date: '2026-02-13 16:42'
|
||||
updated_date: '2026-02-16 02:01'
|
||||
labels: []
|
||||
dependencies: []
|
||||
parent_task_id: TASK-23
|
||||
@@ -20,13 +21,10 @@ Package and include the JLPT Yomitan extension dictionary assets in SubMiner so
|
||||
<!-- AC:BEGIN -->
|
||||
- [x] #1 JLPT dictionary asset from the existing Yomitan extension is added to the repository/build output in a tracked, offline-available location.
|
||||
- [x] #2 The loader locates and opens the JLPT dictionary file deterministically at runtime.
|
||||
- [ ] #3 Dictionary version/source is documented so future updates are explicit and reproducible.
|
||||
- [ ] #4 Dictionary bundle size and load impact are documented in task notes or project docs.
|
||||
- [x] #3 Dictionary version/source is documented so future updates are explicit and reproducible.
|
||||
- [x] #4 Dictionary bundle size and load impact are documented in task notes or project docs.
|
||||
<!-- AC:END -->
|
||||
|
||||
## Note
|
||||
- Full dictionary source/version/performance notes are intentionally deferred for now (out of scope in this pass).
|
||||
|
||||
## Definition of Done
|
||||
<!-- DOD:BEGIN -->
|
||||
- [x] #1 Dictionary data is bundled and consumable during development and packaged app runs.
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
---
|
||||
id: TASK-23.4
|
||||
title: Add opt-in control and end-to-end flow + tests for JLPT tagging
|
||||
status: In Progress
|
||||
status: Done
|
||||
assignee: []
|
||||
created_date: '2026-02-13 16:42'
|
||||
updated_date: '2026-02-16 02:00'
|
||||
labels: []
|
||||
dependencies: []
|
||||
parent_task_id: TASK-23
|
||||
@@ -21,13 +22,10 @@ Add user/config setting to enable JLPT tagging, wire the feature toggle through
|
||||
- [x] #1 JLPT tagging is opt-in and defaults to disabled.
|
||||
- [x] #2 When disabled, lookup/rendering pipeline does not execute JLPT processing.
|
||||
- [x] #3 When enabled, end-to-end flow tags subtitle words via token-level lookup and rendering.
|
||||
- [ ] #4 Add tests covering at least one positive match, one non-match, and disabled state.
|
||||
- [x] #4 Add tests covering at least one positive match, one non-match, and disabled state.
|
||||
<!-- AC:END -->
|
||||
|
||||
## Note
|
||||
- Full end-to-end + disabled-state test coverage remains pending as an explicit follow-up item.
|
||||
|
||||
## Definition of Done
|
||||
<!-- DOD:BEGIN -->
|
||||
- [ ] #1 End-to-end option behavior and opt-in state persistence are implemented and verified.
|
||||
- [x] #1 End-to-end option behavior and opt-in state persistence are implemented and verified.
|
||||
<!-- DOD:END -->
|
||||
|
||||
@@ -558,6 +558,8 @@ See `config.example.jsonc` for detailed configuration options.
|
||||
| `jlptColors` | object | JLPT level underline colors object (`N1`..`N5`) |
|
||||
| `secondary` | object | Override any of the above for secondary subtitles (optional) |
|
||||
|
||||
JLPT underlining is powered by offline term-meta bank files at runtime. See [`docs/jlpt-vocab-bundle.md`](jlpt-vocab-bundle.md) for required files, source/version refresh steps, and deterministic fallback behavior.
|
||||
|
||||
Secondary subtitle defaults: `fontSize: 24`, `fontColor: "#ffffff"`, `backgroundColor: "transparent"`. Any property not set in `secondary` falls back to the CSS defaults.
|
||||
|
||||
**See `config.example.jsonc`** for the complete list of subtitle style configuration options.
|
||||
|
||||
59
docs/jlpt-vocab-bundle.md
Normal file
59
docs/jlpt-vocab-bundle.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# JLPT Vocabulary Bundle (Offline)
|
||||
|
||||
## Bundle location
|
||||
|
||||
SubMiner expects the JLPT term-meta bank files to be available locally at:
|
||||
|
||||
- `vendor/yomitan-jlpt-vocab`
|
||||
|
||||
At runtime, SubMiner also searches these derived locations:
|
||||
|
||||
- `vendor/yomitan-jlpt-vocab`
|
||||
- `vendor/yomitan-jlpt-vocab/vendor/yomitan-jlpt-vocab`
|
||||
- `vendor/yomitan-jlpt-vocab/yomitan-jlpt-vocab`
|
||||
|
||||
and user-data/config fallback paths (see `getJlptDictionarySearchPaths` in `src/main.ts`).
|
||||
|
||||
## Required files
|
||||
|
||||
The expected files are:
|
||||
|
||||
- `term_meta_bank_1.json`
|
||||
- `term_meta_bank_2.json`
|
||||
- `term_meta_bank_3.json`
|
||||
- `term_meta_bank_4.json`
|
||||
- `term_meta_bank_5.json`
|
||||
|
||||
Each bank maps terms to frequency metadata; only entries with a `frequency.displayValue` are considered for JLPT tagging.
|
||||
|
||||
## Source and update process
|
||||
|
||||
For reproducible updates:
|
||||
|
||||
1. Obtain the JLPT term-meta bank archive from the same upstream source that supplies the bundled Yomitan dictionary data.
|
||||
2. Extract the five `term_meta_bank_*.json` files.
|
||||
3. Place them into `vendor/yomitan-jlpt-vocab/`.
|
||||
4. Commit the update with the source URL/version in the task notes.
|
||||
|
||||
This repository currently ships the folder path in `electron-builder` `extraResources` as:
|
||||
`vendor/yomitan-jlpt-vocab -> yomitan-jlpt-vocab`.
|
||||
|
||||
## Deterministic fallback behavior on malformed inputs
|
||||
|
||||
`createJlptVocabularyLookupService()` follows these rules:
|
||||
|
||||
- If a bank file is missing, parsing fails, or the JSON shape is unsupported, that file is skipped and processing continues.
|
||||
- If entries do not expose expected frequency metadata, they are skipped.
|
||||
- If no usable bank entries are found, SubMiner initializes a no-op JLPT lookup (`null` for every token).
|
||||
- In all fallback cases, subtitle rendering remains unchanged (no underlines are added).
|
||||
|
||||
## Bundle size and startup cost
|
||||
|
||||
Lookup work is currently a synchronous file read + parse at enable-time and then O(1) in-memory `Map` lookups during subtitle updates.
|
||||
|
||||
Practical guidance:
|
||||
|
||||
- Keep the JLPT bundle inside `vendor/yomitan-jlpt-vocab` to avoid network lookups.
|
||||
- Measure bundle size with:
|
||||
- `du -sh vendor/yomitan-jlpt-vocab`
|
||||
- If the JLPT source is updated, re-run `pnpm run build:appimage` / packaging and confirm startup logs do not report missing banks.
|
||||
@@ -97,6 +97,7 @@ function collectDictionaryFromPath(
|
||||
for (const bank of JLPT_BANK_FILES) {
|
||||
const bankPath = path.join(dictionaryPath, bank.filename);
|
||||
if (!fs.existsSync(bankPath)) {
|
||||
log(`JLPT bank file missing for ${bank.level}: ${bankPath}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -104,6 +105,7 @@ function collectDictionaryFromPath(
|
||||
try {
|
||||
rawText = fs.readFileSync(bankPath, "utf-8");
|
||||
} catch {
|
||||
log(`Failed to read JLPT bank file ${bankPath}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -111,10 +113,22 @@ function collectDictionaryFromPath(
|
||||
try {
|
||||
rawEntries = JSON.parse(rawText) as unknown;
|
||||
} catch {
|
||||
log(`Failed to parse JLPT bank file as JSON: ${bankPath}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!Array.isArray(rawEntries)) {
|
||||
log(
|
||||
`JLPT bank file has unsupported format (expected JSON array): ${bankPath}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
const beforeSize = terms.size;
|
||||
addEntriesToMap(rawEntries, bank.level, terms, log);
|
||||
if (terms.size === beforeSize) {
|
||||
log(`JLPT bank file contained no extractable entries: ${bankPath}`);
|
||||
}
|
||||
}
|
||||
|
||||
return terms;
|
||||
@@ -124,8 +138,9 @@ export async function createJlptVocabularyLookupService(
|
||||
options: JlptVocabLookupOptions,
|
||||
): Promise<(term: string) => JlptLevel | null> {
|
||||
const attemptedPaths: string[] = [];
|
||||
let foundDirectoryCount = 0;
|
||||
let foundDictionaryPathCount = 0;
|
||||
let foundBankCount = 0;
|
||||
const resolvedBanks: string[] = [];
|
||||
for (const dictionaryPath of options.searchPaths) {
|
||||
attemptedPaths.push(dictionaryPath);
|
||||
if (!fs.existsSync(dictionaryPath)) {
|
||||
@@ -136,10 +151,11 @@ export async function createJlptVocabularyLookupService(
|
||||
continue;
|
||||
}
|
||||
|
||||
foundDirectoryCount += 1;
|
||||
foundDictionaryPathCount += 1;
|
||||
|
||||
const terms = collectDictionaryFromPath(dictionaryPath, options.log);
|
||||
if (terms.size > 0) {
|
||||
resolvedBanks.push(dictionaryPath);
|
||||
foundBankCount += 1;
|
||||
options.log(
|
||||
`JLPT dictionary loaded from ${dictionaryPath} (${terms.size} entries)`,
|
||||
@@ -159,10 +175,13 @@ export async function createJlptVocabularyLookupService(
|
||||
options.log(
|
||||
`JLPT dictionary not found. Searched ${attemptedPaths.length} candidate path(s): ${attemptedPaths.join(", ")}`,
|
||||
);
|
||||
if (foundDirectoryCount > 0 && foundBankCount === 0) {
|
||||
if (foundDictionaryPathCount > 0 && foundBankCount === 0) {
|
||||
options.log(
|
||||
"JLPT dictionary directories found, but none contained valid term_meta_bank_*.json files.",
|
||||
);
|
||||
}
|
||||
if (resolvedBanks.length > 0 && foundBankCount > 0) {
|
||||
options.log(`JLPT dictionary search matched path(s): ${resolvedBanks.join(", ")}`);
|
||||
}
|
||||
return NOOP_LOOKUP;
|
||||
}
|
||||
|
||||
@@ -32,6 +32,12 @@ type YomitanParseLine = YomitanParseSegment[];
|
||||
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
|
||||
const KATAKANA_CODEPOINT_START = 0x30a1;
|
||||
const KATAKANA_CODEPOINT_END = 0x30f6;
|
||||
const JLPT_LEVEL_LOOKUP_CACHE_LIMIT = 2048;
|
||||
|
||||
const jlptLevelLookupCaches = new WeakMap<
|
||||
(text: string) => JlptLevel | null,
|
||||
Map<string, JlptLevel | null>
|
||||
>();
|
||||
|
||||
function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value && typeof value === "object");
|
||||
@@ -75,6 +81,43 @@ export interface TokenizerDepsRuntimeOptions {
|
||||
getMecabTokenizer: () => MecabTokenizerLike | null;
|
||||
}
|
||||
|
||||
function getCachedJlptLevel(
|
||||
lookupText: string,
|
||||
getJlptLevel: (text: string) => JlptLevel | null,
|
||||
): JlptLevel | null {
|
||||
const normalizedText = lookupText.trim();
|
||||
if (!normalizedText) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let cache = jlptLevelLookupCaches.get(getJlptLevel);
|
||||
if (!cache) {
|
||||
cache = new Map<string, JlptLevel | null>();
|
||||
jlptLevelLookupCaches.set(getJlptLevel, cache);
|
||||
}
|
||||
|
||||
if (cache.has(normalizedText)) {
|
||||
return cache.get(normalizedText) ?? null;
|
||||
}
|
||||
|
||||
let level: JlptLevel | null;
|
||||
try {
|
||||
level = getJlptLevel(normalizedText);
|
||||
} catch {
|
||||
level = null;
|
||||
}
|
||||
|
||||
cache.set(normalizedText, level);
|
||||
while (cache.size > JLPT_LEVEL_LOOKUP_CACHE_LIMIT) {
|
||||
const firstKey = cache.keys().next().value;
|
||||
if (firstKey !== undefined) {
|
||||
cache.delete(firstKey);
|
||||
}
|
||||
}
|
||||
|
||||
return level;
|
||||
}
|
||||
|
||||
export function createTokenizerDepsRuntimeService(
|
||||
options: TokenizerDepsRuntimeOptions,
|
||||
): TokenizerServiceDeps {
|
||||
@@ -326,8 +369,12 @@ function applyJlptMarking(
|
||||
return { ...token, jlptLevel: undefined };
|
||||
}
|
||||
|
||||
const primaryLevel = getJlptLevel(resolveJlptLookupText(token));
|
||||
const fallbackLevel = getJlptLevel(token.surface);
|
||||
const primaryLevel = getCachedJlptLevel(
|
||||
resolveJlptLookupText(token),
|
||||
getJlptLevel,
|
||||
);
|
||||
const fallbackLevel =
|
||||
primaryLevel === null ? getCachedJlptLevel(token.surface, getJlptLevel) : null;
|
||||
|
||||
return {
|
||||
...token,
|
||||
|
||||
Reference in New Issue
Block a user