mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-27 18:22:41 -08:00
Merge pull request #2 from ksyasuda/add-jlpt-tagging
Add opt-in JLPT tagging flow
This commit is contained in:
63
.github/workflows/docs.yml
vendored
63
.github/workflows/docs.yml
vendored
@@ -1,63 +0,0 @@
|
|||||||
name: Docs
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
paths:
|
|
||||||
- 'docs/**'
|
|
||||||
- '.github/workflows/docs.yml'
|
|
||||||
- 'package.json'
|
|
||||||
- 'pnpm-lock.yaml'
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pages: write
|
|
||||||
id-token: write
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: pages
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Setup pnpm
|
|
||||||
uses: pnpm/action-setup@v4
|
|
||||||
with:
|
|
||||||
version: 9
|
|
||||||
|
|
||||||
- name: Setup Node.js
|
|
||||||
uses: actions/setup-node@v4
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
cache: pnpm
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: pnpm install --frozen-lockfile
|
|
||||||
|
|
||||||
- name: Build docs
|
|
||||||
run: pnpm run docs:build
|
|
||||||
|
|
||||||
- name: Setup Pages
|
|
||||||
uses: actions/configure-pages@v5
|
|
||||||
|
|
||||||
- name: Upload artifact
|
|
||||||
uses: actions/upload-pages-artifact@v3
|
|
||||||
with:
|
|
||||||
path: docs/.vitepress/dist
|
|
||||||
|
|
||||||
deploy:
|
|
||||||
environment:
|
|
||||||
name: github-pages
|
|
||||||
url: ${{ steps.deployment.outputs.page_url }}
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: build
|
|
||||||
steps:
|
|
||||||
- name: Deploy to GitHub Pages
|
|
||||||
id: deployment
|
|
||||||
uses: actions/deploy-pages@v4
|
|
||||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -2,3 +2,6 @@
|
|||||||
path = vendor/texthooker-ui
|
path = vendor/texthooker-ui
|
||||||
url = https://github.com/ksyasuda/texthooker-ui.git
|
url = https://github.com/ksyasuda/texthooker-ui.git
|
||||||
branch = subminer
|
branch = subminer
|
||||||
|
[submodule "vendor/yomitan-jlpt-vocab"]
|
||||||
|
path = vendor/yomitan-jlpt-vocab
|
||||||
|
url = https://github.com/stephenmk/yomitan-jlpt-vocab
|
||||||
|
|||||||
@@ -46,12 +46,19 @@ The `subminer` wrapper uses a [Bun](https://bun.sh) shebang, so `bun` must be on
|
|||||||
### From Source
|
### From Source
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/ksyasuda/SubMiner.git
|
git clone --recurse-submodules https://github.com/ksyasuda/SubMiner.git
|
||||||
cd SubMiner
|
cd SubMiner
|
||||||
make build
|
make build
|
||||||
make install
|
make install
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you already cloned without submodules:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd SubMiner
|
||||||
|
git submodule update --init --recursive
|
||||||
|
```
|
||||||
|
|
||||||
For macOS builds, signing, and platform-specific details, see [docs/installation.md](docs/installation.md).
|
For macOS builds, signing, and platform-specific details, see [docs/installation.md](docs/installation.md).
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ id: TASK-23
|
|||||||
title: >-
|
title: >-
|
||||||
Add opt-in JLPT level tagging by bundling and querying local Yomitan
|
Add opt-in JLPT level tagging by bundling and querying local Yomitan
|
||||||
dictionary
|
dictionary
|
||||||
status: To Do
|
status: In Progress
|
||||||
assignee: []
|
assignee: []
|
||||||
created_date: '2026-02-13 16:42'
|
created_date: '2026-02-13 16:42'
|
||||||
labels: []
|
labels: []
|
||||||
@@ -19,13 +19,13 @@ Implement an opt-in JLPT token annotation feature that annotates subtitle words
|
|||||||
|
|
||||||
## Acceptance Criteria
|
## Acceptance Criteria
|
||||||
<!-- AC:BEGIN -->
|
<!-- AC:BEGIN -->
|
||||||
- [ ] #1 Add an opt-in setting/feature flag so JLPT tagging is disabled by default and can be enabled per user/session as requested.
|
- [x] #1 Add an opt-in setting/feature flag so JLPT tagging is disabled by default and can be enabled per user/session as requested.
|
||||||
- [ ] #2 Bundle the existing JLPT Yomitan extension package/data into the project so lookups can be performed offline from local files.
|
- [x] #2 Bundle the existing JLPT Yomitan extension package/data into the project so lookups can be performed offline from local files.
|
||||||
- [ ] #3 Implement token-level dictionary lookup against the bundled JLPT dictionary file to determine presence and JLPT level for words in subtitle lines.
|
- [x] #3 Implement token-level dictionary lookup against the bundled JLPT dictionary file to determine presence and JLPT level for words in subtitle lines.
|
||||||
- [ ] #4 Render a colored underline under each token determined to have a JLPT level; the underline must match token width/length and not affect layout or disrupt line rendering.
|
- [x] #4 Render a colored underline under each token determined to have a JLPT level; the underline must match token width/length and not affect layout or disrupt line rendering.
|
||||||
- [ ] #5 Assign different underline colors per JLPT level (at minimum N5/N4/N3/N2/N1) with a stable mapping documented in task notes.
|
- [x] #5 Assign different underline colors per JLPT level (at minimum N5/N4/N3/N2/N1) with a stable mapping documented in task notes.
|
||||||
- [ ] #6 Handle unknown/no-match tokens as non-tagged while preserving existing subtitle styling and interaction behavior.
|
- [x] #6 Handle unknown/no-match tokens as non-tagged while preserving existing subtitle styling and interaction behavior.
|
||||||
- [ ] #7 When disabled, no JLPT lookups are performed and subtitles render exactly as current behavior.
|
- [x] #7 When disabled, no JLPT lookups are performed and subtitles render exactly as current behavior.
|
||||||
- [ ] #8 Add tests or deterministic checks covering at least one positive match, one non-match, and one unknown/unsupported-level fallback path.
|
- [ ] #8 Add tests or deterministic checks covering at least one positive match, one non-match, and one unknown/unsupported-level fallback path.
|
||||||
- [ ] #9 Document expected dictionary source and any size/performance impact of bundling the JLPT extension data.
|
- [ ] #9 Document expected dictionary source and any size/performance impact of bundling the JLPT extension data.
|
||||||
- [ ] #10 If dictionary format/version constraints block exact level extraction, the task includes explicit limitation notes and a deterministic fallback strategy.
|
- [ ] #10 If dictionary format/version constraints block exact level extraction, the task includes explicit limitation notes and a deterministic fallback strategy.
|
||||||
@@ -34,5 +34,8 @@ Implement an opt-in JLPT token annotation feature that annotates subtitle words
|
|||||||
## Definition of Done
|
## Definition of Done
|
||||||
<!-- DOD:BEGIN -->
|
<!-- DOD:BEGIN -->
|
||||||
- [ ] #1 Feature has a clear toggle and persistence of preference if applicable.
|
- [ ] #1 Feature has a clear toggle and persistence of preference if applicable.
|
||||||
- [ ] #2 JLPT rendering is visually verified for all supported levels with distinct colors and no overlap/regression in subtitle legibility.
|
- [x] #2 JLPT rendering is visually verified for all supported levels with distinct colors and no overlap/regression in subtitle legibility.
|
||||||
<!-- DOD:END -->
|
<!-- DOD:END -->
|
||||||
|
|
||||||
|
## Note
|
||||||
|
- Full performance/limits documentation and dictionary source/version/perf notes are deferred and tracked separately.
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: TASK-23.1
|
id: TASK-23.1
|
||||||
title: Implement JLPT token lookup service for subtitle words
|
title: Implement JLPT token lookup service for subtitle words
|
||||||
status: To Do
|
status: In Progress
|
||||||
assignee: []
|
assignee: []
|
||||||
created_date: '2026-02-13 16:42'
|
created_date: '2026-02-13 16:42'
|
||||||
labels: []
|
labels: []
|
||||||
@@ -18,14 +18,17 @@ Create a lookup layer that parses/queries the bundled JLPT dictionary file and r
|
|||||||
|
|
||||||
## Acceptance Criteria
|
## Acceptance Criteria
|
||||||
<!-- AC:BEGIN -->
|
<!-- AC:BEGIN -->
|
||||||
- [ ] #1 Service accepts a token/normalized token and returns JLPT level or no-match deterministically.
|
- [x] #1 Service accepts a token/normalized token and returns JLPT level or no-match deterministically.
|
||||||
- [ ] #2 Lookup handles expected dictionary format edge cases and unknown tokens without throwing.
|
- [x] #2 Lookup handles expected dictionary format edge cases and unknown tokens without throwing.
|
||||||
- [ ] #3 Lookup path is efficient enough for frame-by-frame subtitle updates.
|
- [ ] #3 Lookup path is efficient enough for frame-by-frame subtitle updates.
|
||||||
- [ ] #4 Tokenizer interaction preserves existing token ordering and positions needed for rendering spans/underlines.
|
- [x] #4 Tokenizer interaction preserves existing token ordering and positions needed for rendering spans/underlines.
|
||||||
- [ ] #5 Behavior on malformed/unsupported dictionary format is documented with fallback semantics.
|
- [ ] #5 Behavior on malformed/unsupported dictionary format is documented with fallback semantics.
|
||||||
<!-- AC:END -->
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Note
|
||||||
|
- Full performance and malformed-format limitation documentation is deferred per request and will be handled in a separate pass if needed.
|
||||||
|
|
||||||
## Definition of Done
|
## Definition of Done
|
||||||
<!-- DOD:BEGIN -->
|
<!-- DOD:BEGIN -->
|
||||||
- [ ] #1 Lookup service returns JLPT level with deterministic output for test fixtures.
|
- [x] #1 Lookup service returns JLPT level with deterministic output for test fixtures.
|
||||||
<!-- DOD:END -->
|
<!-- DOD:END -->
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: TASK-23.2
|
id: TASK-23.2
|
||||||
title: Bundle JLPT Yomitan dictionary assets for offline local lookup
|
title: Bundle JLPT Yomitan dictionary assets for offline local lookup
|
||||||
status: To Do
|
status: In Progress
|
||||||
assignee: []
|
assignee: []
|
||||||
created_date: '2026-02-13 16:42'
|
created_date: '2026-02-13 16:42'
|
||||||
labels: []
|
labels: []
|
||||||
@@ -18,13 +18,16 @@ Package and include the JLPT Yomitan extension dictionary assets in SubMiner so
|
|||||||
|
|
||||||
## Acceptance Criteria
|
## Acceptance Criteria
|
||||||
<!-- AC:BEGIN -->
|
<!-- AC:BEGIN -->
|
||||||
- [ ] #1 JLPT dictionary asset from the existing Yomitan extension is added to the repository/build output in a tracked, offline-available location.
|
- [x] #1 JLPT dictionary asset from the existing Yomitan extension is added to the repository/build output in a tracked, offline-available location.
|
||||||
- [ ] #2 The loader locates and opens the JLPT dictionary file deterministically at runtime.
|
- [x] #2 The loader locates and opens the JLPT dictionary file deterministically at runtime.
|
||||||
- [ ] #3 Dictionary version/source is documented so future updates are explicit and reproducible.
|
- [ ] #3 Dictionary version/source is documented so future updates are explicit and reproducible.
|
||||||
- [ ] #4 Dictionary bundle size and load impact are documented in task notes or project docs.
|
- [ ] #4 Dictionary bundle size and load impact are documented in task notes or project docs.
|
||||||
<!-- AC:END -->
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Note
|
||||||
|
- Full dictionary source/version/performance notes are intentionally deferred for now (out of scope in this pass).
|
||||||
|
|
||||||
## Definition of Done
|
## Definition of Done
|
||||||
<!-- DOD:BEGIN -->
|
<!-- DOD:BEGIN -->
|
||||||
- [ ] #1 Dictionary data is bundled and consumable during development and packaged app runs.
|
- [x] #1 Dictionary data is bundled and consumable during development and packaged app runs.
|
||||||
<!-- DOD:END -->
|
<!-- DOD:END -->
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: TASK-23.3
|
id: TASK-23.3
|
||||||
title: Render JLPT token underlines with level-based colors in subtitle lines
|
title: Render JLPT token underlines with level-based colors in subtitle lines
|
||||||
status: To Do
|
status: Done
|
||||||
assignee: []
|
assignee: []
|
||||||
created_date: '2026-02-13 16:42'
|
created_date: '2026-02-13 16:42'
|
||||||
labels: []
|
labels: []
|
||||||
@@ -18,14 +18,14 @@ Render JLPT-aware token annotations as token-length colored underlines in the su
|
|||||||
|
|
||||||
## Acceptance Criteria
|
## Acceptance Criteria
|
||||||
<!-- AC:BEGIN -->
|
<!-- AC:BEGIN -->
|
||||||
- [ ] #1 For each token with JLPT level, renderer draws an underline matching token width/length.
|
- [x] #1 For each token with JLPT level, renderer draws an underline matching token width/length.
|
||||||
- [ ] #2 Underlines use distinct colors by JLPT level (e.g., N5/N4/N3/N2/N1) and mapping is consistent/documented.
|
- [x] #2 Underlines use distinct colors by JLPT level (e.g., N5/N4/N3/N2/N1) and mapping is consistent/documented.
|
||||||
- [ ] #3 Non-tagged tokens remain visually unchanged.
|
- [x] #3 Non-tagged tokens remain visually unchanged.
|
||||||
- [ ] #4 Rendering does not alter line height/selection behavior or break wrapping behavior.
|
- [x] #4 Rendering does not alter line height/selection behavior or break wrapping behavior.
|
||||||
- [ ] #5 Feature degrades gracefully when level data is missing or lookup is unavailable.
|
- [x] #5 Feature degrades gracefully when level data is missing or lookup is unavailable.
|
||||||
<!-- AC:END -->
|
<!-- AC:END -->
|
||||||
|
|
||||||
## Definition of Done
|
## Definition of Done
|
||||||
<!-- DOD:BEGIN -->
|
<!-- DOD:BEGIN -->
|
||||||
- [ ] #1 Visual output validated for all mapped JLPT levels with no legibility/layout regressions.
|
- [x] #1 Visual output validated for all mapped JLPT levels with no legibility/layout regressions.
|
||||||
<!-- DOD:END -->
|
<!-- DOD:END -->
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
id: TASK-23.4
|
id: TASK-23.4
|
||||||
title: Add opt-in control and end-to-end flow + tests for JLPT tagging
|
title: Add opt-in control and end-to-end flow + tests for JLPT tagging
|
||||||
status: To Do
|
status: In Progress
|
||||||
assignee: []
|
assignee: []
|
||||||
created_date: '2026-02-13 16:42'
|
created_date: '2026-02-13 16:42'
|
||||||
labels: []
|
labels: []
|
||||||
@@ -18,12 +18,15 @@ Add user/config setting to enable JLPT tagging, wire the feature toggle through
|
|||||||
|
|
||||||
## Acceptance Criteria
|
## Acceptance Criteria
|
||||||
<!-- AC:BEGIN -->
|
<!-- AC:BEGIN -->
|
||||||
- [ ] #1 JLPT tagging is opt-in and defaults to disabled.
|
- [x] #1 JLPT tagging is opt-in and defaults to disabled.
|
||||||
- [ ] #2 When disabled, lookup/rendering pipeline does not execute JLPT processing.
|
- [x] #2 When disabled, lookup/rendering pipeline does not execute JLPT processing.
|
||||||
- [ ] #3 When enabled, end-to-end flow tags subtitle words via token-level lookup and rendering.
|
- [x] #3 When enabled, end-to-end flow tags subtitle words via token-level lookup and rendering.
|
||||||
- [ ] #4 Add tests covering at least one positive match, one non-match, and disabled state.
|
- [ ] #4 Add tests covering at least one positive match, one non-match, and disabled state.
|
||||||
<!-- AC:END -->
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Note
|
||||||
|
- Full end-to-end + disabled-state test coverage remains pending as an explicit follow-up item.
|
||||||
|
|
||||||
## Definition of Done
|
## Definition of Done
|
||||||
<!-- DOD:BEGIN -->
|
<!-- DOD:BEGIN -->
|
||||||
- [ ] #1 End-to-end option behavior and opt-in state persistence are implemented and verified.
|
- [ ] #1 End-to-end option behavior and opt-in state persistence are implemented and verified.
|
||||||
|
|||||||
@@ -149,6 +149,7 @@
|
|||||||
// Primary and secondary subtitle styling.
|
// Primary and secondary subtitle styling.
|
||||||
// ==========================================
|
// ==========================================
|
||||||
"subtitleStyle": {
|
"subtitleStyle": {
|
||||||
|
"enableJlpt": false,
|
||||||
"fontFamily": "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif",
|
"fontFamily": "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif",
|
||||||
"fontSize": 35,
|
"fontSize": 35,
|
||||||
"fontColor": "#cad3f5",
|
"fontColor": "#cad3f5",
|
||||||
@@ -157,6 +158,13 @@
|
|||||||
"backgroundColor": "rgba(54, 58, 79, 0.5)",
|
"backgroundColor": "rgba(54, 58, 79, 0.5)",
|
||||||
"nPlusOneColor": "#c6a0f6",
|
"nPlusOneColor": "#c6a0f6",
|
||||||
"knownWordColor": "#a6da95",
|
"knownWordColor": "#a6da95",
|
||||||
|
"jlptColors": {
|
||||||
|
"N1": "#ed8796",
|
||||||
|
"N2": "#f5a97f",
|
||||||
|
"N3": "#f9e2af",
|
||||||
|
"N4": "#a6e3a1",
|
||||||
|
"N5": "#8aadf4"
|
||||||
|
},
|
||||||
"secondary": {
|
"secondary": {
|
||||||
"fontSize": 24,
|
"fontSize": 24,
|
||||||
"fontColor": "#ffffff",
|
"fontColor": "#ffffff",
|
||||||
|
|||||||
@@ -552,12 +552,26 @@ See `config.example.jsonc` for detailed configuration options.
|
|||||||
| `fontWeight` | string | CSS font-weight, e.g. `"bold"`, `"normal"`, `"600"` (default: `"normal"`) |
|
| `fontWeight` | string | CSS font-weight, e.g. `"bold"`, `"normal"`, `"600"` (default: `"normal"`) |
|
||||||
| `fontStyle` | string | `"normal"` or `"italic"` (default: `"normal"`) |
|
| `fontStyle` | string | `"normal"` or `"italic"` (default: `"normal"`) |
|
||||||
| `backgroundColor` | string | Any CSS color, including `"transparent"` (default: `"rgba(54, 58, 79, 0.5)"`) |
|
| `backgroundColor` | string | Any CSS color, including `"transparent"` (default: `"rgba(54, 58, 79, 0.5)"`) |
|
||||||
|
| `enableJlpt` | boolean | Enable JLPT level underline styling (`false` by default) |
|
||||||
|
| `nPlusOneColor` | string | Existing n+1 highlight color (default: `#c6a0f6`) |
|
||||||
|
| `knownWordColor` | string | Existing known-word highlight color (default: `#a6da95`) |
|
||||||
|
| `jlptColors` | object | JLPT level underline colors object (`N1`..`N5`) |
|
||||||
| `secondary` | object | Override any of the above for secondary subtitles (optional) |
|
| `secondary` | object | Override any of the above for secondary subtitles (optional) |
|
||||||
|
|
||||||
Secondary subtitle defaults: `fontSize: 24`, `fontColor: "#ffffff"`, `backgroundColor: "transparent"`. Any property not set in `secondary` falls back to the CSS defaults.
|
Secondary subtitle defaults: `fontSize: 24`, `fontColor: "#ffffff"`, `backgroundColor: "transparent"`. Any property not set in `secondary` falls back to the CSS defaults.
|
||||||
|
|
||||||
**See `config.example.jsonc`** for the complete list of subtitle style configuration options.
|
**See `config.example.jsonc`** for the complete list of subtitle style configuration options.
|
||||||
|
|
||||||
|
`jlptColors` keys are:
|
||||||
|
|
||||||
|
| Key | Default | Description |
|
||||||
|
| ---- | --------- | ---------------------------------------- |
|
||||||
|
| `N1` | `#ed8796` | JLPT N1 underline color |
|
||||||
|
| `N2` | `#f5a97f` | JLPT N2 underline color |
|
||||||
|
| `N3` | `#f9e2af` | JLPT N3 underline color |
|
||||||
|
| `N4` | `#a6e3a1` | JLPT N4 underline color |
|
||||||
|
| `N5` | `#8aadf4` | JLPT N5 underline color |
|
||||||
|
|
||||||
### Texthooker
|
### Texthooker
|
||||||
|
|
||||||
Control whether the browser opens automatically when texthooker starts:
|
Control whether the browser opens automatically when texthooker starts:
|
||||||
|
|||||||
@@ -149,6 +149,7 @@
|
|||||||
// Primary and secondary subtitle styling.
|
// Primary and secondary subtitle styling.
|
||||||
// ==========================================
|
// ==========================================
|
||||||
"subtitleStyle": {
|
"subtitleStyle": {
|
||||||
|
"enableJlpt": false,
|
||||||
"fontFamily": "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif",
|
"fontFamily": "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif",
|
||||||
"fontSize": 35,
|
"fontSize": 35,
|
||||||
"fontColor": "#cad3f5",
|
"fontColor": "#cad3f5",
|
||||||
@@ -157,6 +158,13 @@
|
|||||||
"backgroundColor": "rgba(54, 58, 79, 0.5)",
|
"backgroundColor": "rgba(54, 58, 79, 0.5)",
|
||||||
"nPlusOneColor": "#c6a0f6",
|
"nPlusOneColor": "#c6a0f6",
|
||||||
"knownWordColor": "#a6da95",
|
"knownWordColor": "#a6da95",
|
||||||
|
"jlptColors": {
|
||||||
|
"N1": "#ed8796",
|
||||||
|
"N2": "#f5a97f",
|
||||||
|
"N3": "#f9e2af",
|
||||||
|
"N4": "#a6e3a1",
|
||||||
|
"N5": "#8aadf4"
|
||||||
|
},
|
||||||
"secondary": {
|
"secondary": {
|
||||||
"fontSize": 24,
|
"fontSize": 24,
|
||||||
"fontColor": "#ffffff",
|
"fontColor": "#ffffff",
|
||||||
|
|||||||
@@ -97,6 +97,10 @@
|
|||||||
"from": "vendor/yomitan",
|
"from": "vendor/yomitan",
|
||||||
"to": "yomitan"
|
"to": "yomitan"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"from": "vendor/yomitan-jlpt-vocab",
|
||||||
|
"to": "yomitan-jlpt-vocab"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"from": "assets",
|
"from": "assets",
|
||||||
"to": "assets"
|
"to": "assets"
|
||||||
|
|||||||
@@ -174,6 +174,7 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
|
|||||||
ffmpeg_path: "",
|
ffmpeg_path: "",
|
||||||
},
|
},
|
||||||
subtitleStyle: {
|
subtitleStyle: {
|
||||||
|
enableJlpt: false,
|
||||||
fontFamily:
|
fontFamily:
|
||||||
"Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif",
|
"Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif",
|
||||||
fontSize: 35,
|
fontSize: 35,
|
||||||
@@ -183,6 +184,13 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
|
|||||||
backgroundColor: "rgba(54, 58, 79, 0.5)",
|
backgroundColor: "rgba(54, 58, 79, 0.5)",
|
||||||
nPlusOneColor: "#c6a0f6",
|
nPlusOneColor: "#c6a0f6",
|
||||||
knownWordColor: "#a6da95",
|
knownWordColor: "#a6da95",
|
||||||
|
jlptColors: {
|
||||||
|
N1: "#ed8796",
|
||||||
|
N2: "#f5a97f",
|
||||||
|
N3: "#f9e2af",
|
||||||
|
N4: "#a6e3a1",
|
||||||
|
N5: "#8aadf4",
|
||||||
|
},
|
||||||
secondary: {
|
secondary: {
|
||||||
fontSize: 24,
|
fontSize: 24,
|
||||||
fontColor: "#ffffff",
|
fontColor: "#ffffff",
|
||||||
@@ -280,6 +288,13 @@ export const CONFIG_OPTION_REGISTRY: ConfigOptionRegistryEntry[] = [
|
|||||||
defaultValue: DEFAULT_CONFIG.websocket.port,
|
defaultValue: DEFAULT_CONFIG.websocket.port,
|
||||||
description: "Built-in subtitle websocket server port.",
|
description: "Built-in subtitle websocket server port.",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
path: "subtitleStyle.enableJlpt",
|
||||||
|
kind: "boolean",
|
||||||
|
defaultValue: DEFAULT_CONFIG.subtitleStyle.enableJlpt,
|
||||||
|
description: "Enable JLPT vocabulary level underlines. "
|
||||||
|
+ "When disabled, JLPT tagging lookup and underlines are skipped.",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
path: "ankiConnect.enabled",
|
path: "ankiConnect.enabled",
|
||||||
kind: "boolean",
|
kind: "boolean",
|
||||||
|
|||||||
@@ -442,6 +442,18 @@ export class ConfigService {
|
|||||||
: {}),
|
: {}),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const enableJlpt = asBoolean((src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt);
|
||||||
|
if (enableJlpt !== undefined) {
|
||||||
|
resolved.subtitleStyle.enableJlpt = enableJlpt;
|
||||||
|
} else if ((src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt !== undefined) {
|
||||||
|
warn(
|
||||||
|
"subtitleStyle.enableJlpt",
|
||||||
|
(src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt,
|
||||||
|
resolved.subtitleStyle.enableJlpt,
|
||||||
|
"Expected boolean.",
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isObject(src.ankiConnect)) {
|
if (isObject(src.ankiConnect)) {
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ export {
|
|||||||
} from "./runtime-config-service";
|
} from "./runtime-config-service";
|
||||||
export { openYomitanSettingsWindow } from "./yomitan-settings-service";
|
export { openYomitanSettingsWindow } from "./yomitan-settings-service";
|
||||||
export { createTokenizerDepsRuntimeService, tokenizeSubtitleService } from "./tokenizer-service";
|
export { createTokenizerDepsRuntimeService, tokenizeSubtitleService } from "./tokenizer-service";
|
||||||
|
export { createJlptVocabularyLookupService } from "./jlpt-vocab-service";
|
||||||
export { loadYomitanExtensionService } from "./yomitan-extension-loader-service";
|
export { loadYomitanExtensionService } from "./yomitan-extension-loader-service";
|
||||||
export {
|
export {
|
||||||
getJimakuLanguagePreferenceService,
|
getJimakuLanguagePreferenceService,
|
||||||
|
|||||||
29
src/core/services/jlpt-excluded-terms.ts
Normal file
29
src/core/services/jlpt-excluded-terms.ts
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
// Token-level lexical terms excluded from JLPT highlighting.
|
||||||
|
// These are not tied to POS and act as a safety layer for non-dictionary cases.
|
||||||
|
export const JLPT_EXCLUDED_TERMS = new Set([
|
||||||
|
"この",
|
||||||
|
"その",
|
||||||
|
"あの",
|
||||||
|
"どの",
|
||||||
|
"これ",
|
||||||
|
"それ",
|
||||||
|
"あれ",
|
||||||
|
"どれ",
|
||||||
|
"ここ",
|
||||||
|
"そこ",
|
||||||
|
"あそこ",
|
||||||
|
"どこ",
|
||||||
|
"こと",
|
||||||
|
"ああ",
|
||||||
|
"ええ",
|
||||||
|
"うう",
|
||||||
|
"おお",
|
||||||
|
"はは",
|
||||||
|
"へえ",
|
||||||
|
"ふう",
|
||||||
|
"ほう",
|
||||||
|
]);
|
||||||
|
|
||||||
|
export function shouldIgnoreJlptByTerm(term: string): boolean {
|
||||||
|
return JLPT_EXCLUDED_TERMS.has(term);
|
||||||
|
}
|
||||||
45
src/core/services/jlpt-ignored-mecab-pos1.ts
Normal file
45
src/core/services/jlpt-ignored-mecab-pos1.ts
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
// MeCab POS1 categories that should be excluded from JLPT-level token tagging.
|
||||||
|
// These are filtered out because they are typically functional or non-lexical words.
|
||||||
|
export type JlptIgnoredPos1Entry = {
|
||||||
|
pos1: string;
|
||||||
|
reason: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const JLPT_IGNORED_MECAB_POS1_ENTRIES = [
|
||||||
|
{
|
||||||
|
pos1: "助詞",
|
||||||
|
reason: "Particles (ko/kara/nagara etc.): mostly grammatical glue, not independent vocabulary.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pos1: "助動詞",
|
||||||
|
reason: "Auxiliary verbs (past tense, politeness, modality): grammar helpers.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pos1: "記号",
|
||||||
|
reason: "Symbols/punctuation and symbols-like tokens.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pos1: "補助記号",
|
||||||
|
reason: "Auxiliary symbols (e.g. bracket-like or markup tokens).",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pos1: "連体詞",
|
||||||
|
reason: "Adnominal forms (e.g. demonstratives like \"この\").",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pos1: "感動詞",
|
||||||
|
reason: "Interjections/onomatopoeia-style exclamations.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pos1: "接続詞",
|
||||||
|
reason: "Conjunctions that connect clauses, usually not target vocab items.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pos1: "接頭詞",
|
||||||
|
reason: "Prefixes/prefix-like grammatical elements.",
|
||||||
|
},
|
||||||
|
] as const satisfies readonly JlptIgnoredPos1Entry[];
|
||||||
|
|
||||||
|
export const JLPT_IGNORED_MECAB_POS1 = JLPT_IGNORED_MECAB_POS1_ENTRIES.map(
|
||||||
|
(entry) => entry.pos1,
|
||||||
|
);
|
||||||
23
src/core/services/jlpt-token-filter-config.ts
Normal file
23
src/core/services/jlpt-token-filter-config.ts
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import {
|
||||||
|
JlptIgnoredPos1Entry,
|
||||||
|
JLPT_IGNORED_MECAB_POS1,
|
||||||
|
JLPT_IGNORED_MECAB_POS1_ENTRIES,
|
||||||
|
} from "./jlpt-ignored-mecab-pos1";
|
||||||
|
|
||||||
|
export { JLPT_IGNORED_MECAB_POS1_ENTRIES, JlptIgnoredPos1Entry };
|
||||||
|
|
||||||
|
// Data-driven MeCab POS names (pos1) used for JLPT filtering.
|
||||||
|
export const JLPT_IGNORED_MECAB_POS1_LIST: readonly string[] =
|
||||||
|
JLPT_IGNORED_MECAB_POS1;
|
||||||
|
|
||||||
|
const JLPT_IGNORED_MECAB_POS1_SET = new Set<string>(
|
||||||
|
JLPT_IGNORED_MECAB_POS1_LIST,
|
||||||
|
);
|
||||||
|
|
||||||
|
export function getIgnoredPos1Entries(): readonly JlptIgnoredPos1Entry[] {
|
||||||
|
return JLPT_IGNORED_MECAB_POS1_ENTRIES;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function shouldIgnoreJlptForMecabPos1(pos1: string): boolean {
|
||||||
|
return JLPT_IGNORED_MECAB_POS1_SET.has(pos1);
|
||||||
|
}
|
||||||
168
src/core/services/jlpt-vocab-service.ts
Normal file
168
src/core/services/jlpt-vocab-service.ts
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
import * as fs from "fs";
|
||||||
|
import * as path from "path";
|
||||||
|
|
||||||
|
import type { JlptLevel } from "../../types";
|
||||||
|
|
||||||
|
export interface JlptVocabLookupOptions {
|
||||||
|
searchPaths: string[];
|
||||||
|
log: (message: string) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
const JLPT_BANK_FILES: { level: JlptLevel; filename: string }[] = [
|
||||||
|
{ level: "N1", filename: "term_meta_bank_1.json" },
|
||||||
|
{ level: "N2", filename: "term_meta_bank_2.json" },
|
||||||
|
{ level: "N3", filename: "term_meta_bank_3.json" },
|
||||||
|
{ level: "N4", filename: "term_meta_bank_4.json" },
|
||||||
|
{ level: "N5", filename: "term_meta_bank_5.json" },
|
||||||
|
];
|
||||||
|
const JLPT_LEVEL_PRECEDENCE: Record<JlptLevel, number> = {
|
||||||
|
N1: 5,
|
||||||
|
N2: 4,
|
||||||
|
N3: 3,
|
||||||
|
N4: 2,
|
||||||
|
N5: 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
const NOOP_LOOKUP = (): null => null;
|
||||||
|
|
||||||
|
function normalizeJlptTerm(value: string): string {
|
||||||
|
return value.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasFrequencyDisplayValue(meta: unknown): boolean {
|
||||||
|
if (!meta || typeof meta !== "object") return false;
|
||||||
|
const frequency = (meta as { frequency?: unknown }).frequency;
|
||||||
|
if (!frequency || typeof frequency !== "object") return false;
|
||||||
|
return Object.prototype.hasOwnProperty.call(
|
||||||
|
frequency as Record<string, unknown>,
|
||||||
|
"displayValue",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function addEntriesToMap(
|
||||||
|
rawEntries: unknown,
|
||||||
|
level: JlptLevel,
|
||||||
|
terms: Map<string, JlptLevel>,
|
||||||
|
log: (message: string) => void,
|
||||||
|
): void {
|
||||||
|
const shouldUpdateLevel = (
|
||||||
|
existingLevel: JlptLevel | undefined,
|
||||||
|
incomingLevel: JlptLevel,
|
||||||
|
): boolean =>
|
||||||
|
existingLevel === undefined ||
|
||||||
|
JLPT_LEVEL_PRECEDENCE[incomingLevel] >
|
||||||
|
JLPT_LEVEL_PRECEDENCE[existingLevel];
|
||||||
|
|
||||||
|
if (!Array.isArray(rawEntries)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const rawEntry of rawEntries) {
|
||||||
|
if (!Array.isArray(rawEntry)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const [term, _entryId, meta] = rawEntry as [unknown, unknown, unknown];
|
||||||
|
if (typeof term !== "string") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalizedTerm = normalizeJlptTerm(term);
|
||||||
|
if (!normalizedTerm) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hasFrequencyDisplayValue(meta)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const existingLevel = terms.get(normalizedTerm);
|
||||||
|
if (shouldUpdateLevel(existingLevel, level)) {
|
||||||
|
terms.set(normalizedTerm, level);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
log(
|
||||||
|
`JLPT dictionary already has ${normalizedTerm} as ${existingLevel}; keeping that level instead of ${level}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function collectDictionaryFromPath(
|
||||||
|
dictionaryPath: string,
|
||||||
|
log: (message: string) => void,
|
||||||
|
): Map<string, JlptLevel> {
|
||||||
|
const terms = new Map<string, JlptLevel>();
|
||||||
|
|
||||||
|
for (const bank of JLPT_BANK_FILES) {
|
||||||
|
const bankPath = path.join(dictionaryPath, bank.filename);
|
||||||
|
if (!fs.existsSync(bankPath)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let rawText: string;
|
||||||
|
try {
|
||||||
|
rawText = fs.readFileSync(bankPath, "utf-8");
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let rawEntries: unknown;
|
||||||
|
try {
|
||||||
|
rawEntries = JSON.parse(rawText) as unknown;
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
addEntriesToMap(rawEntries, bank.level, terms, log);
|
||||||
|
}
|
||||||
|
|
||||||
|
return terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function createJlptVocabularyLookupService(
|
||||||
|
options: JlptVocabLookupOptions,
|
||||||
|
): Promise<(term: string) => JlptLevel | null> {
|
||||||
|
const attemptedPaths: string[] = [];
|
||||||
|
let foundDirectoryCount = 0;
|
||||||
|
let foundBankCount = 0;
|
||||||
|
for (const dictionaryPath of options.searchPaths) {
|
||||||
|
attemptedPaths.push(dictionaryPath);
|
||||||
|
if (!fs.existsSync(dictionaryPath)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!fs.statSync(dictionaryPath).isDirectory()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
foundDirectoryCount += 1;
|
||||||
|
|
||||||
|
const terms = collectDictionaryFromPath(dictionaryPath, options.log);
|
||||||
|
if (terms.size > 0) {
|
||||||
|
foundBankCount += 1;
|
||||||
|
options.log(
|
||||||
|
`JLPT dictionary loaded from ${dictionaryPath} (${terms.size} entries)`,
|
||||||
|
);
|
||||||
|
return (term: string): JlptLevel | null => {
|
||||||
|
if (!term) return null;
|
||||||
|
const normalized = normalizeJlptTerm(term);
|
||||||
|
return normalized ? terms.get(normalized) ?? null : null;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
options.log(
|
||||||
|
`JLPT dictionary directory exists but contains no readable term_meta_bank_*.json files: ${dictionaryPath}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
options.log(
|
||||||
|
`JLPT dictionary not found. Searched ${attemptedPaths.length} candidate path(s): ${attemptedPaths.join(", ")}`,
|
||||||
|
);
|
||||||
|
if (foundDirectoryCount > 0 && foundBankCount === 0) {
|
||||||
|
options.log(
|
||||||
|
"JLPT dictionary directories found, but none contained valid term_meta_bank_*.json files.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return NOOP_LOOKUP;
|
||||||
|
}
|
||||||
@@ -92,6 +92,7 @@ export async function runAppReadyRuntimeService(
|
|||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
deps.loadSubtitlePosition();
|
deps.loadSubtitlePosition();
|
||||||
deps.resolveKeybindings();
|
deps.resolveKeybindings();
|
||||||
|
await deps.createMecabTokenizerAndCheck();
|
||||||
deps.createMpvClient();
|
deps.createMpvClient();
|
||||||
|
|
||||||
deps.reloadConfig();
|
deps.reloadConfig();
|
||||||
@@ -117,7 +118,6 @@ export async function runAppReadyRuntimeService(
|
|||||||
deps.log("mpv_websocket detected, skipping built-in WebSocket server");
|
deps.log("mpv_websocket detected, skipping built-in WebSocket server");
|
||||||
}
|
}
|
||||||
|
|
||||||
await deps.createMecabTokenizerAndCheck();
|
|
||||||
deps.createSubtitleTimingTracker();
|
deps.createSubtitleTimingTracker();
|
||||||
await deps.loadYomitanExtension();
|
await deps.loadYomitanExtension();
|
||||||
|
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ function makeDeps(
|
|||||||
setYomitanParserInitPromise: () => {},
|
setYomitanParserInitPromise: () => {},
|
||||||
isKnownWord: () => false,
|
isKnownWord: () => false,
|
||||||
getKnownWordMatchMode: () => "headword",
|
getKnownWordMatchMode: () => "headword",
|
||||||
|
getJlptLevel: () => null,
|
||||||
tokenizeWithMecab: async () => null,
|
tokenizeWithMecab: async () => null,
|
||||||
...overrides,
|
...overrides,
|
||||||
};
|
};
|
||||||
@@ -43,10 +44,171 @@ function makeDepsFromMecabTokenizer(
|
|||||||
getMecabTokenizer: () => ({
|
getMecabTokenizer: () => ({
|
||||||
tokenize,
|
tokenize,
|
||||||
}),
|
}),
|
||||||
|
getJlptLevel: () => null,
|
||||||
...overrides,
|
...overrides,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("tokenizeSubtitleService assigns JLPT level to parsed Yomitan tokens", async () => {
|
||||||
|
const result = await tokenizeSubtitleService(
|
||||||
|
"猫です",
|
||||||
|
makeDeps({
|
||||||
|
getYomitanExt: () => ({ id: "dummy-ext" } as any),
|
||||||
|
getYomitanParserWindow: () => ({
|
||||||
|
isDestroyed: () => false,
|
||||||
|
webContents: {
|
||||||
|
executeJavaScript: async () => [
|
||||||
|
{
|
||||||
|
source: "scanning-parser",
|
||||||
|
index: 0,
|
||||||
|
content: [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
text: "猫",
|
||||||
|
reading: "ねこ",
|
||||||
|
headwords: [[{ term: "猫" }]],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
text: "です",
|
||||||
|
reading: "です",
|
||||||
|
headwords: [[{ term: "です" }]],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as unknown as Electron.BrowserWindow),
|
||||||
|
tokenizeWithMecab: async () => null,
|
||||||
|
getJlptLevel: (text) => (text === "猫" ? "N5" : null),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.equal(result.tokens?.length, 1);
|
||||||
|
assert.equal(result.tokens?.[0]?.jlptLevel, "N5");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("tokenizeSubtitleService skips JLPT level for excluded demonstratives", async () => {
|
||||||
|
const result = await tokenizeSubtitleService(
|
||||||
|
"この",
|
||||||
|
makeDeps({
|
||||||
|
getYomitanExt: () => ({ id: "dummy-ext" } as any),
|
||||||
|
getYomitanParserWindow: () => ({
|
||||||
|
isDestroyed: () => false,
|
||||||
|
webContents: {
|
||||||
|
executeJavaScript: async () => [
|
||||||
|
{
|
||||||
|
source: "scanning-parser",
|
||||||
|
index: 0,
|
||||||
|
content: [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
text: "この",
|
||||||
|
reading: "この",
|
||||||
|
headwords: [[{ term: "この" }]],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as unknown as Electron.BrowserWindow),
|
||||||
|
tokenizeWithMecab: async () => null,
|
||||||
|
getJlptLevel: (text) => (text === "この" ? "N5" : null),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.equal(result.tokens?.length, 1);
|
||||||
|
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("tokenizeSubtitleService skips JLPT level for repeated kana SFX", async () => {
|
||||||
|
const result = await tokenizeSubtitleService(
|
||||||
|
"ああ",
|
||||||
|
makeDeps({
|
||||||
|
getYomitanExt: () => ({ id: "dummy-ext" } as any),
|
||||||
|
getYomitanParserWindow: () => ({
|
||||||
|
isDestroyed: () => false,
|
||||||
|
webContents: {
|
||||||
|
executeJavaScript: async () => [
|
||||||
|
{
|
||||||
|
source: "scanning-parser",
|
||||||
|
index: 0,
|
||||||
|
content: [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
text: "ああ",
|
||||||
|
reading: "ああ",
|
||||||
|
headwords: [[{ term: "ああ" }]],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
} as unknown as Electron.BrowserWindow),
|
||||||
|
tokenizeWithMecab: async () => null,
|
||||||
|
getJlptLevel: (text) => (text === "ああ" ? "N5" : null),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.equal(result.tokens?.length, 1);
|
||||||
|
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("tokenizeSubtitleService assigns JLPT level to mecab tokens", async () => {
|
||||||
|
const result = await tokenizeSubtitleService(
|
||||||
|
"猫です",
|
||||||
|
makeDepsFromMecabTokenizer(async () => [
|
||||||
|
{
|
||||||
|
word: "猫",
|
||||||
|
partOfSpeech: PartOfSpeech.noun,
|
||||||
|
pos1: "",
|
||||||
|
pos2: "",
|
||||||
|
pos3: "",
|
||||||
|
pos4: "",
|
||||||
|
inflectionType: "",
|
||||||
|
inflectionForm: "",
|
||||||
|
headword: "猫",
|
||||||
|
katakanaReading: "ネコ",
|
||||||
|
pronunciation: "ネコ",
|
||||||
|
},
|
||||||
|
], {
|
||||||
|
getJlptLevel: (text) => (text === "猫" ? "N4" : null),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.equal(result.tokens?.length, 1);
|
||||||
|
assert.equal(result.tokens?.[0]?.jlptLevel, "N4");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("tokenizeSubtitleService skips JLPT level for mecab tokens marked as ineligible", async () => {
|
||||||
|
const result = await tokenizeSubtitleService(
|
||||||
|
"は",
|
||||||
|
makeDepsFromMecabTokenizer(async () => [
|
||||||
|
{
|
||||||
|
word: "は",
|
||||||
|
partOfSpeech: PartOfSpeech.particle,
|
||||||
|
pos1: "助詞",
|
||||||
|
pos2: "",
|
||||||
|
pos3: "",
|
||||||
|
pos4: "",
|
||||||
|
inflectionType: "",
|
||||||
|
inflectionForm: "",
|
||||||
|
headword: "は",
|
||||||
|
katakanaReading: "ハ",
|
||||||
|
pronunciation: "ハ",
|
||||||
|
},
|
||||||
|
], {
|
||||||
|
getJlptLevel: (text) => (text === "は" ? "N5" : null),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.equal(result.tokens?.length, 1);
|
||||||
|
assert.equal(result.tokens?.[0]?.pos1, "助詞");
|
||||||
|
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
|
||||||
|
});
|
||||||
|
|
||||||
test("tokenizeSubtitleService returns null tokens for empty normalized text", async () => {
|
test("tokenizeSubtitleService returns null tokens for empty normalized text", async () => {
|
||||||
const result = await tokenizeSubtitleService(" \\n ", makeDeps());
|
const result = await tokenizeSubtitleService(" \\n ", makeDeps());
|
||||||
assert.deepEqual(result, { text: " \\n ", tokens: null });
|
assert.deepEqual(result, { text: " \\n ", tokens: null });
|
||||||
|
|||||||
@@ -1,20 +1,23 @@
|
|||||||
import { BrowserWindow, Extension, session } from "electron";
|
import { BrowserWindow, Extension, session } from "electron";
|
||||||
import { markNPlusOneTargets, mergeTokens } from "../../token-merger";
|
import { markNPlusOneTargets, mergeTokens } from "../../token-merger";
|
||||||
import {
|
import {
|
||||||
|
JlptLevel,
|
||||||
MergedToken,
|
MergedToken,
|
||||||
NPlusOneMatchMode,
|
NPlusOneMatchMode,
|
||||||
PartOfSpeech,
|
PartOfSpeech,
|
||||||
SubtitleData,
|
SubtitleData,
|
||||||
Token,
|
Token,
|
||||||
} from "../../types";
|
} from "../../types";
|
||||||
|
import { shouldIgnoreJlptForMecabPos1 } from "./jlpt-token-filter-config";
|
||||||
|
import { shouldIgnoreJlptByTerm } from "./jlpt-excluded-terms";
|
||||||
|
|
||||||
interface YomitanParseHeadword {
|
interface YomitanParseHeadword {
|
||||||
term?: unknown;
|
term?: unknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface YomitanParseSegment {
|
interface YomitanParseSegment {
|
||||||
text?: unknown;
|
text?: string;
|
||||||
reading?: unknown;
|
reading?: string;
|
||||||
headwords?: unknown;
|
headwords?: unknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -24,6 +27,20 @@ interface YomitanParseResultItem {
|
|||||||
content?: unknown;
|
content?: unknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type YomitanParseLine = YomitanParseSegment[];
|
||||||
|
|
||||||
|
const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
|
||||||
|
const KATAKANA_CODEPOINT_START = 0x30a1;
|
||||||
|
const KATAKANA_CODEPOINT_END = 0x30f6;
|
||||||
|
|
||||||
|
function isObject(value: unknown): value is Record<string, unknown> {
|
||||||
|
return Boolean(value && typeof value === "object");
|
||||||
|
}
|
||||||
|
|
||||||
|
function isString(value: unknown): value is string {
|
||||||
|
return typeof value === "string";
|
||||||
|
}
|
||||||
|
|
||||||
export interface TokenizerServiceDeps {
|
export interface TokenizerServiceDeps {
|
||||||
getYomitanExt: () => Extension | null;
|
getYomitanExt: () => Extension | null;
|
||||||
getYomitanParserWindow: () => BrowserWindow | null;
|
getYomitanParserWindow: () => BrowserWindow | null;
|
||||||
@@ -34,6 +51,8 @@ export interface TokenizerServiceDeps {
|
|||||||
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
||||||
isKnownWord: (text: string) => boolean;
|
isKnownWord: (text: string) => boolean;
|
||||||
getKnownWordMatchMode: () => NPlusOneMatchMode;
|
getKnownWordMatchMode: () => NPlusOneMatchMode;
|
||||||
|
getJlptLevel: (text: string) => JlptLevel | null;
|
||||||
|
getJlptEnabled?: () => boolean;
|
||||||
tokenizeWithMecab: (text: string) => Promise<MergedToken[] | null>;
|
tokenizeWithMecab: (text: string) => Promise<MergedToken[] | null>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -51,6 +70,8 @@ export interface TokenizerDepsRuntimeOptions {
|
|||||||
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
||||||
isKnownWord: (text: string) => boolean;
|
isKnownWord: (text: string) => boolean;
|
||||||
getKnownWordMatchMode: () => NPlusOneMatchMode;
|
getKnownWordMatchMode: () => NPlusOneMatchMode;
|
||||||
|
getJlptLevel: (text: string) => JlptLevel | null;
|
||||||
|
getJlptEnabled?: () => boolean;
|
||||||
getMecabTokenizer: () => MecabTokenizerLike | null;
|
getMecabTokenizer: () => MecabTokenizerLike | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,6 +88,8 @@ export function createTokenizerDepsRuntimeService(
|
|||||||
setYomitanParserInitPromise: options.setYomitanParserInitPromise,
|
setYomitanParserInitPromise: options.setYomitanParserInitPromise,
|
||||||
isKnownWord: options.isKnownWord,
|
isKnownWord: options.isKnownWord,
|
||||||
getKnownWordMatchMode: options.getKnownWordMatchMode,
|
getKnownWordMatchMode: options.getKnownWordMatchMode,
|
||||||
|
getJlptLevel: options.getJlptLevel,
|
||||||
|
getJlptEnabled: options.getJlptEnabled,
|
||||||
tokenizeWithMecab: async (text) => {
|
tokenizeWithMecab: async (text) => {
|
||||||
const mecabTokenizer = options.getMecabTokenizer();
|
const mecabTokenizer = options.getMecabTokenizer();
|
||||||
if (!mecabTokenizer) {
|
if (!mecabTokenizer) {
|
||||||
@@ -112,19 +135,205 @@ function applyKnownWordMarking(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function resolveJlptLookupText(token: MergedToken): string {
|
||||||
|
if (token.headword && token.headword.length > 0) {
|
||||||
|
return token.headword;
|
||||||
|
}
|
||||||
|
if (token.reading && token.reading.length > 0) {
|
||||||
|
return token.reading;
|
||||||
|
}
|
||||||
|
return token.surface;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeJlptTextForExclusion(text: string): string {
|
||||||
|
const raw = text.trim();
|
||||||
|
if (!raw) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
let normalized = "";
|
||||||
|
for (const char of raw) {
|
||||||
|
const code = char.codePointAt(0);
|
||||||
|
if (code === undefined) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
|
||||||
|
normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
normalized += char;
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isKanaChar(char: string): boolean {
|
||||||
|
const code = char.codePointAt(0);
|
||||||
|
if (code === undefined) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
(code >= 0x3041 && code <= 0x3096) ||
|
||||||
|
(code >= 0x309b && code <= 0x309f) ||
|
||||||
|
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||||
|
(code >= 0x30fd && code <= 0x30ff)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detects repeated-kana speech-like tokens (e.g. 「ああああ」, 「ははは」, 「うーん」 style patterns)
|
||||||
|
* so they are not JLPT-labeled when they are mostly expressive particles/sfx.
|
||||||
|
*/
|
||||||
|
function isRepeatedKanaSfx(text: string): boolean {
|
||||||
|
const normalized = text.trim();
|
||||||
|
if (!normalized) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const chars = [...normalized];
|
||||||
|
if (!chars.every(isKanaChar)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const counts = new Map<string, number>();
|
||||||
|
let hasAdjacentRepeat = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < chars.length; i += 1) {
|
||||||
|
const char = chars[i];
|
||||||
|
counts.set(char, (counts.get(char) ?? 0) + 1);
|
||||||
|
if (i > 0 && chars[i] === chars[i - 1]) {
|
||||||
|
hasAdjacentRepeat = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const topCount = Math.max(...counts.values());
|
||||||
|
if (chars.length <= 2) {
|
||||||
|
return hasAdjacentRepeat || topCount >= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasAdjacentRepeat) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return topCount >= Math.ceil(chars.length / 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isJlptEligibleToken(token: MergedToken): boolean {
|
||||||
|
if (token.pos1 && shouldIgnoreJlptForMecabPos1(token.pos1)) return false;
|
||||||
|
|
||||||
|
const candidates = [
|
||||||
|
resolveJlptLookupText(token),
|
||||||
|
token.surface,
|
||||||
|
token.reading,
|
||||||
|
token.headword,
|
||||||
|
].filter((candidate): candidate is string => typeof candidate === "string" && candidate.length > 0);
|
||||||
|
|
||||||
|
for (const candidate of candidates) {
|
||||||
|
const normalizedCandidate = normalizeJlptTextForExclusion(candidate);
|
||||||
|
if (!normalizedCandidate) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const trimmedCandidate = candidate.trim();
|
||||||
|
if (
|
||||||
|
shouldIgnoreJlptByTerm(trimmedCandidate) ||
|
||||||
|
shouldIgnoreJlptByTerm(normalizedCandidate)
|
||||||
|
) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
isRepeatedKanaSfx(candidate) ||
|
||||||
|
isRepeatedKanaSfx(normalizedCandidate)
|
||||||
|
) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isYomitanParseResultItem(
|
||||||
|
value: unknown,
|
||||||
|
): value is YomitanParseResultItem {
|
||||||
|
if (!isObject(value)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if ((value as YomitanParseResultItem).source !== "scanning-parser") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!Array.isArray((value as YomitanParseResultItem).content)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isYomitanParseLine(value: unknown): value is YomitanParseLine {
|
||||||
|
if (!Array.isArray(value)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return value.every((segment) => {
|
||||||
|
if (!isObject(segment)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const candidate = segment as YomitanParseSegment;
|
||||||
|
return isString(candidate.text);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function isYomitanHeadwordRows(value: unknown): value is YomitanParseHeadword[][] {
|
||||||
|
return (
|
||||||
|
Array.isArray(value) &&
|
||||||
|
value.every(
|
||||||
|
(group) =>
|
||||||
|
Array.isArray(group) &&
|
||||||
|
group.every((item) =>
|
||||||
|
isObject(item) && isString((item as YomitanParseHeadword).term),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
function extractYomitanHeadword(segment: YomitanParseSegment): string {
|
function extractYomitanHeadword(segment: YomitanParseSegment): string {
|
||||||
const headwords = segment.headwords;
|
const headwords = segment.headwords;
|
||||||
if (!Array.isArray(headwords) || headwords.length === 0) {
|
if (!isYomitanHeadwordRows(headwords)) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
const firstGroup = headwords[0];
|
for (const group of headwords) {
|
||||||
if (!Array.isArray(firstGroup) || firstGroup.length === 0) {
|
if (group.length > 0) {
|
||||||
return "";
|
const firstHeadword = group[0] as YomitanParseHeadword;
|
||||||
|
if (isString(firstHeadword?.term)) {
|
||||||
|
return firstHeadword.term;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const firstHeadword = firstGroup[0] as YomitanParseHeadword;
|
return "";
|
||||||
return typeof firstHeadword?.term === "string" ? firstHeadword.term : "";
|
}
|
||||||
|
|
||||||
|
function applyJlptMarking(
|
||||||
|
tokens: MergedToken[],
|
||||||
|
getJlptLevel: (text: string) => JlptLevel | null,
|
||||||
|
): MergedToken[] {
|
||||||
|
return tokens.map((token) => {
|
||||||
|
if (!isJlptEligibleToken(token)) {
|
||||||
|
return { ...token, jlptLevel: undefined };
|
||||||
|
}
|
||||||
|
|
||||||
|
const primaryLevel = getJlptLevel(resolveJlptLookupText(token));
|
||||||
|
const fallbackLevel = getJlptLevel(token.surface);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...token,
|
||||||
|
jlptLevel: primaryLevel ?? fallbackLevel ?? token.jlptLevel,
|
||||||
|
};
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function mapYomitanParseResultsToMergedTokens(
|
function mapYomitanParseResultsToMergedTokens(
|
||||||
@@ -136,14 +345,9 @@ function mapYomitanParseResultsToMergedTokens(
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const scanningItems = parseResults.filter((item) => {
|
const scanningItems = parseResults.filter(
|
||||||
const resultItem = item as YomitanParseResultItem;
|
(item): item is YomitanParseResultItem => isYomitanParseResultItem(item),
|
||||||
return (
|
);
|
||||||
resultItem &&
|
|
||||||
resultItem.source === "scanning-parser" &&
|
|
||||||
Array.isArray(resultItem.content)
|
|
||||||
);
|
|
||||||
}) as YomitanParseResultItem[];
|
|
||||||
|
|
||||||
if (scanningItems.length === 0) {
|
if (scanningItems.length === 0) {
|
||||||
return null;
|
return null;
|
||||||
@@ -158,24 +362,21 @@ function mapYomitanParseResultsToMergedTokens(
|
|||||||
|
|
||||||
const tokens: MergedToken[] = [];
|
const tokens: MergedToken[] = [];
|
||||||
let charOffset = 0;
|
let charOffset = 0;
|
||||||
|
let validLineCount = 0;
|
||||||
|
|
||||||
for (const line of content) {
|
for (const line of content) {
|
||||||
if (!Array.isArray(line)) {
|
if (!isYomitanParseLine(line)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
validLineCount += 1;
|
||||||
|
|
||||||
let surface = "";
|
let surface = "";
|
||||||
let reading = "";
|
let reading = "";
|
||||||
let headword = "";
|
let headword = "";
|
||||||
|
|
||||||
for (const rawSegment of line) {
|
for (const segment of line) {
|
||||||
const segment = rawSegment as YomitanParseSegment;
|
|
||||||
if (!segment || typeof segment !== "object") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const segmentText = segment.text;
|
const segmentText = segment.text;
|
||||||
if (typeof segmentText !== "string" || segmentText.length === 0) {
|
if (!segmentText || segmentText.length === 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -205,6 +406,7 @@ function mapYomitanParseResultsToMergedTokens(
|
|||||||
startPos: start,
|
startPos: start,
|
||||||
endPos: end,
|
endPos: end,
|
||||||
partOfSpeech: PartOfSpeech.other,
|
partOfSpeech: PartOfSpeech.other,
|
||||||
|
pos1: "",
|
||||||
isMerged: true,
|
isMerged: true,
|
||||||
isNPlusOneTarget: false,
|
isNPlusOneTarget: false,
|
||||||
isKnown: (() => {
|
isKnown: (() => {
|
||||||
@@ -218,9 +420,108 @@ function mapYomitanParseResultsToMergedTokens(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (validLineCount === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return tokens.length > 0 ? tokens : null;
|
return tokens.length > 0 ? tokens : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function pickClosestMecabPos1(
|
||||||
|
token: MergedToken,
|
||||||
|
mecabTokens: MergedToken[],
|
||||||
|
): string | undefined {
|
||||||
|
if (mecabTokens.length === 0) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
const tokenStart = token.startPos ?? 0;
|
||||||
|
const tokenEnd = token.endPos ?? tokenStart + token.surface.length;
|
||||||
|
|
||||||
|
let bestPos1: string | undefined;
|
||||||
|
let bestOverlap = 0;
|
||||||
|
let bestSpan = 0;
|
||||||
|
let bestStart = Number.MAX_SAFE_INTEGER;
|
||||||
|
|
||||||
|
for (const mecabToken of mecabTokens) {
|
||||||
|
if (!mecabToken.pos1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const mecabStart = mecabToken.startPos ?? 0;
|
||||||
|
const mecabEnd = mecabToken.endPos ?? mecabStart + mecabToken.surface.length;
|
||||||
|
const overlapStart = Math.max(tokenStart, mecabStart);
|
||||||
|
const overlapEnd = Math.min(tokenEnd, mecabEnd);
|
||||||
|
const overlap = Math.max(0, overlapEnd - overlapStart);
|
||||||
|
if (overlap === 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const span = mecabEnd - mecabStart;
|
||||||
|
if (
|
||||||
|
overlap > bestOverlap ||
|
||||||
|
(overlap === bestOverlap &&
|
||||||
|
(span > bestSpan ||
|
||||||
|
(span === bestSpan && mecabStart < bestStart)))
|
||||||
|
) {
|
||||||
|
bestOverlap = overlap;
|
||||||
|
bestSpan = span;
|
||||||
|
bestStart = mecabStart;
|
||||||
|
bestPos1 = mecabToken.pos1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bestOverlap > 0 ? bestPos1 : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function enrichYomitanPos1(
|
||||||
|
tokens: MergedToken[],
|
||||||
|
deps: TokenizerServiceDeps,
|
||||||
|
text: string,
|
||||||
|
): Promise<MergedToken[]> {
|
||||||
|
if (!tokens || tokens.length === 0) {
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mecabTokens: MergedToken[] | null = null;
|
||||||
|
try {
|
||||||
|
mecabTokens = await deps.tokenizeWithMecab(text);
|
||||||
|
} catch (err) {
|
||||||
|
const error = err as Error;
|
||||||
|
console.warn(
|
||||||
|
"Failed to enrich Yomitan tokens with MeCab POS:",
|
||||||
|
error.message,
|
||||||
|
`tokenCount=${tokens.length}`,
|
||||||
|
`textLength=${text.length}`,
|
||||||
|
);
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!mecabTokens || mecabTokens.length === 0) {
|
||||||
|
console.warn(
|
||||||
|
"MeCab enrichment returned no tokens; preserving Yomitan token output.",
|
||||||
|
`tokenCount=${tokens.length}`,
|
||||||
|
`textLength=${text.length}`,
|
||||||
|
);
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens.map((token) => {
|
||||||
|
if (token.pos1) {
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pos1 = pickClosestMecabPos1(token, mecabTokens);
|
||||||
|
if (!pos1) {
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
...token,
|
||||||
|
pos1,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
async function ensureYomitanParserWindow(
|
async function ensureYomitanParserWindow(
|
||||||
deps: TokenizerServiceDeps,
|
deps: TokenizerServiceDeps,
|
||||||
): Promise<boolean> {
|
): Promise<boolean> {
|
||||||
@@ -356,11 +657,16 @@ async function parseWithYomitanInternalParser(
|
|||||||
script,
|
script,
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
return mapYomitanParseResultsToMergedTokens(
|
const yomitanTokens = mapYomitanParseResultsToMergedTokens(
|
||||||
parseResults,
|
parseResults,
|
||||||
deps.isKnownWord,
|
deps.isKnownWord,
|
||||||
deps.getKnownWordMatchMode(),
|
deps.getKnownWordMatchMode(),
|
||||||
);
|
);
|
||||||
|
if (!yomitanTokens || yomitanTokens.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return enrichYomitanPos1(yomitanTokens, deps, text);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Yomitan parser request failed:", (err as Error).message);
|
console.error("Yomitan parser request failed:", (err as Error).message);
|
||||||
return null;
|
return null;
|
||||||
@@ -385,6 +691,7 @@ export async function tokenizeSubtitleService(
|
|||||||
.replace(/\n/g, " ")
|
.replace(/\n/g, " ")
|
||||||
.replace(/\s+/g, " ")
|
.replace(/\s+/g, " ")
|
||||||
.trim();
|
.trim();
|
||||||
|
const jlptEnabled = deps.getJlptEnabled?.() !== false;
|
||||||
|
|
||||||
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps);
|
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps);
|
||||||
if (yomitanTokens && yomitanTokens.length > 0) {
|
if (yomitanTokens && yomitanTokens.length > 0) {
|
||||||
@@ -393,7 +700,10 @@ export async function tokenizeSubtitleService(
|
|||||||
deps.isKnownWord,
|
deps.isKnownWord,
|
||||||
deps.getKnownWordMatchMode(),
|
deps.getKnownWordMatchMode(),
|
||||||
);
|
);
|
||||||
return { text: displayText, tokens: markNPlusOneTargets(knownMarkedTokens) };
|
const jlptMarkedTokens = jlptEnabled
|
||||||
|
? applyJlptMarking(knownMarkedTokens, deps.getJlptLevel)
|
||||||
|
: knownMarkedTokens.map((token) => ({ ...token, jlptLevel: undefined }));
|
||||||
|
return { text: displayText, tokens: markNPlusOneTargets(jlptMarkedTokens) };
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -404,7 +714,10 @@ export async function tokenizeSubtitleService(
|
|||||||
deps.isKnownWord,
|
deps.isKnownWord,
|
||||||
deps.getKnownWordMatchMode(),
|
deps.getKnownWordMatchMode(),
|
||||||
);
|
);
|
||||||
return { text: displayText, tokens: markNPlusOneTargets(knownMarkedTokens) };
|
const jlptMarkedTokens = jlptEnabled
|
||||||
|
? applyJlptMarking(knownMarkedTokens, deps.getJlptLevel)
|
||||||
|
: knownMarkedTokens.map((token) => ({ ...token, jlptLevel: undefined }));
|
||||||
|
return { text: displayText, tokens: markNPlusOneTargets(jlptMarkedTokens) };
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Tokenization error:", (err as Error).message);
|
console.error("Tokenization error:", (err as Error).message);
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ export async function loadYomitanExtensionService(
|
|||||||
deps: YomitanExtensionLoaderDeps,
|
deps: YomitanExtensionLoaderDeps,
|
||||||
): Promise<Extension | null> {
|
): Promise<Extension | null> {
|
||||||
const searchPaths = [
|
const searchPaths = [
|
||||||
|
path.join(__dirname, "..", "..", "vendor", "yomitan"),
|
||||||
path.join(__dirname, "..", "..", "..", "vendor", "yomitan"),
|
path.join(__dirname, "..", "..", "..", "vendor", "yomitan"),
|
||||||
path.join(process.resourcesPath, "yomitan"),
|
path.join(process.resourcesPath, "yomitan"),
|
||||||
"/usr/share/SubMiner/yomitan",
|
"/usr/share/SubMiner/yomitan",
|
||||||
|
|||||||
75
src/main.ts
75
src/main.ts
@@ -95,6 +95,7 @@ import {
|
|||||||
createOverlayContentMeasurementStoreService,
|
createOverlayContentMeasurementStoreService,
|
||||||
createOverlayWindowService,
|
createOverlayWindowService,
|
||||||
createTokenizerDepsRuntimeService,
|
createTokenizerDepsRuntimeService,
|
||||||
|
createJlptVocabularyLookupService,
|
||||||
cycleSecondarySubModeService,
|
cycleSecondarySubModeService,
|
||||||
enforceOverlayLayerOrderService,
|
enforceOverlayLayerOrderService,
|
||||||
ensureOverlayWindowLevelService,
|
ensureOverlayWindowLevelService,
|
||||||
@@ -227,6 +228,8 @@ const isDev =
|
|||||||
process.argv.includes("--dev") || process.argv.includes("--debug");
|
process.argv.includes("--dev") || process.argv.includes("--debug");
|
||||||
const texthookerService = new TexthookerService();
|
const texthookerService = new TexthookerService();
|
||||||
const subtitleWsService = new SubtitleWebSocketService();
|
const subtitleWsService = new SubtitleWebSocketService();
|
||||||
|
let jlptDictionaryLookupInitialized = false;
|
||||||
|
let jlptDictionaryLookupInitialization: Promise<void> | null = null;
|
||||||
const appLogger = {
|
const appLogger = {
|
||||||
logInfo: (message: string) => {
|
logInfo: (message: string) => {
|
||||||
console.log(message);
|
console.log(message);
|
||||||
@@ -464,6 +467,73 @@ function loadSubtitlePosition(): SubtitlePosition | null {
|
|||||||
return appState.subtitlePosition;
|
return appState.subtitlePosition;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getJlptDictionarySearchPaths(): string[] {
|
||||||
|
const homeDir = os.homedir();
|
||||||
|
const dictionaryRoots = [
|
||||||
|
// Development/runtime source trees where the repo is checked out.
|
||||||
|
path.join(__dirname, "..", "..", "vendor", "yomitan-jlpt-vocab"),
|
||||||
|
path.join(app.getAppPath(), "vendor", "yomitan-jlpt-vocab"),
|
||||||
|
|
||||||
|
// Packaged app resources (Electron build output layout).
|
||||||
|
path.join(process.resourcesPath, "yomitan-jlpt-vocab"),
|
||||||
|
path.join(process.resourcesPath, "app.asar", "vendor", "yomitan-jlpt-vocab"),
|
||||||
|
|
||||||
|
// User override/config directories for manually installed dictionaries.
|
||||||
|
USER_DATA_PATH,
|
||||||
|
app.getPath("userData"),
|
||||||
|
path.join(homeDir, ".config", "SubMiner"),
|
||||||
|
path.join(homeDir, ".config", "subminer"),
|
||||||
|
path.join(homeDir, "Library", "Application Support", "SubMiner"),
|
||||||
|
path.join(homeDir, "Library", "Application Support", "subminer"),
|
||||||
|
|
||||||
|
// Last-resort fallback: current working directory (local CLI/test runs).
|
||||||
|
process.cwd(),
|
||||||
|
];
|
||||||
|
|
||||||
|
const searchPaths: string[] = [];
|
||||||
|
for (const dictionaryRoot of dictionaryRoots) {
|
||||||
|
searchPaths.push(dictionaryRoot);
|
||||||
|
searchPaths.push(path.join(dictionaryRoot, "vendor", "yomitan-jlpt-vocab"));
|
||||||
|
searchPaths.push(path.join(dictionaryRoot, "yomitan-jlpt-vocab"));
|
||||||
|
}
|
||||||
|
|
||||||
|
const uniquePaths = new Set<string>();
|
||||||
|
for (const searchPath of searchPaths) {
|
||||||
|
uniquePaths.add(searchPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
return [...uniquePaths];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function initializeJlptDictionaryLookup(): Promise<void> {
|
||||||
|
appState.jlptLevelLookup = await createJlptVocabularyLookupService({
|
||||||
|
searchPaths: getJlptDictionarySearchPaths(),
|
||||||
|
log: (message) => {
|
||||||
|
console.log(`[JLPT] ${message}`);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function ensureJlptDictionaryLookup(): Promise<void> {
|
||||||
|
if (!getResolvedConfig().subtitleStyle.enableJlpt) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (jlptDictionaryLookupInitialized) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!jlptDictionaryLookupInitialization) {
|
||||||
|
jlptDictionaryLookupInitialization = initializeJlptDictionaryLookup()
|
||||||
|
.then(() => {
|
||||||
|
jlptDictionaryLookupInitialized = true;
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
jlptDictionaryLookupInitialization = null;
|
||||||
|
throw error;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
await jlptDictionaryLookupInitialization;
|
||||||
|
}
|
||||||
|
|
||||||
function saveSubtitlePosition(position: SubtitlePosition): void {
|
function saveSubtitlePosition(position: SubtitlePosition): void {
|
||||||
appState.subtitlePosition = position;
|
appState.subtitlePosition = position;
|
||||||
saveSubtitlePositionService({
|
saveSubtitlePositionService({
|
||||||
@@ -804,6 +874,7 @@ function updateMpvSubtitleRenderMetrics(
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function tokenizeSubtitle(text: string): Promise<SubtitleData> {
|
async function tokenizeSubtitle(text: string): Promise<SubtitleData> {
|
||||||
|
await ensureJlptDictionaryLookup();
|
||||||
return tokenizeSubtitleService(
|
return tokenizeSubtitleService(
|
||||||
text,
|
text,
|
||||||
createTokenizerDepsRuntimeService({
|
createTokenizerDepsRuntimeService({
|
||||||
@@ -825,6 +896,9 @@ async function tokenizeSubtitle(text: string): Promise<SubtitleData> {
|
|||||||
getKnownWordMatchMode: () =>
|
getKnownWordMatchMode: () =>
|
||||||
appState.ankiIntegration?.getKnownWordMatchMode() ??
|
appState.ankiIntegration?.getKnownWordMatchMode() ??
|
||||||
getResolvedConfig().ankiConnect.nPlusOne.matchMode,
|
getResolvedConfig().ankiConnect.nPlusOne.matchMode,
|
||||||
|
getJlptLevel: (text) => appState.jlptLevelLookup(text),
|
||||||
|
getJlptEnabled: () =>
|
||||||
|
getResolvedConfig().subtitleStyle.enableJlpt,
|
||||||
getMecabTokenizer: () => appState.mecabTokenizer,
|
getMecabTokenizer: () => appState.mecabTokenizer,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
@@ -1345,6 +1419,7 @@ registerIpcRuntimeServices({
|
|||||||
...resolvedConfig.subtitleStyle,
|
...resolvedConfig.subtitleStyle,
|
||||||
nPlusOneColor: resolvedConfig.ankiConnect.nPlusOne.nPlusOne,
|
nPlusOneColor: resolvedConfig.ankiConnect.nPlusOne.nPlusOne,
|
||||||
knownWordColor: resolvedConfig.ankiConnect.nPlusOne.knownWord,
|
knownWordColor: resolvedConfig.ankiConnect.nPlusOne.knownWord,
|
||||||
|
enableJlpt: resolvedConfig.subtitleStyle.enableJlpt,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
saveSubtitlePosition: (position: unknown) =>
|
saveSubtitlePosition: (position: unknown) =>
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import type {
|
|||||||
SecondarySubMode,
|
SecondarySubMode,
|
||||||
SubtitlePosition,
|
SubtitlePosition,
|
||||||
KikuFieldGroupingChoice,
|
KikuFieldGroupingChoice,
|
||||||
|
JlptLevel,
|
||||||
} from "../types";
|
} from "../types";
|
||||||
import type { CliArgs } from "../cli/args";
|
import type { CliArgs } from "../cli/args";
|
||||||
import type { SubtitleTimingTracker } from "../subtitle-timing-tracker";
|
import type { SubtitleTimingTracker } from "../subtitle-timing-tracker";
|
||||||
@@ -53,6 +54,7 @@ export interface AppState {
|
|||||||
backendOverride: string | null;
|
backendOverride: string | null;
|
||||||
autoStartOverlay: boolean;
|
autoStartOverlay: boolean;
|
||||||
texthookerOnlyMode: boolean;
|
texthookerOnlyMode: boolean;
|
||||||
|
jlptLevelLookup: (term: string) => JlptLevel | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface AppStateInitialValues {
|
export interface AppStateInitialValues {
|
||||||
@@ -112,6 +114,7 @@ export function createAppState(values: AppStateInitialValues): AppState {
|
|||||||
backendOverride: values.backendOverride ?? null,
|
backendOverride: values.backendOverride ?? null,
|
||||||
autoStartOverlay: values.autoStartOverlay ?? false,
|
autoStartOverlay: values.autoStartOverlay ?? false,
|
||||||
texthookerOnlyMode: values.texthookerOnlyMode ?? false,
|
texthookerOnlyMode: values.texthookerOnlyMode ?? false,
|
||||||
|
jlptLevelLookup: () => null,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -71,6 +71,11 @@ export type RendererState = {
|
|||||||
|
|
||||||
knownWordColor: string;
|
knownWordColor: string;
|
||||||
nPlusOneColor: string;
|
nPlusOneColor: string;
|
||||||
|
jlptN1Color: string;
|
||||||
|
jlptN2Color: string;
|
||||||
|
jlptN3Color: string;
|
||||||
|
jlptN4Color: string;
|
||||||
|
jlptN5Color: string;
|
||||||
|
|
||||||
keybindingsMap: Map<string, (string | number)[]>;
|
keybindingsMap: Map<string, (string | number)[]>;
|
||||||
chordPending: boolean;
|
chordPending: boolean;
|
||||||
@@ -130,6 +135,11 @@ export function createRendererState(): RendererState {
|
|||||||
|
|
||||||
knownWordColor: "#a6da95",
|
knownWordColor: "#a6da95",
|
||||||
nPlusOneColor: "#c6a0f6",
|
nPlusOneColor: "#c6a0f6",
|
||||||
|
jlptN1Color: "#ed8796",
|
||||||
|
jlptN2Color: "#f5a97f",
|
||||||
|
jlptN3Color: "#f9e2af",
|
||||||
|
jlptN4Color: "#a6e3a1",
|
||||||
|
jlptN5Color: "#8aadf4",
|
||||||
|
|
||||||
keybindingsMap: new Map(),
|
keybindingsMap: new Map(),
|
||||||
chordPending: false,
|
chordPending: false,
|
||||||
|
|||||||
@@ -250,6 +250,11 @@ body {
|
|||||||
color: #cad3f5;
|
color: #cad3f5;
|
||||||
--subtitle-known-word-color: #a6da95;
|
--subtitle-known-word-color: #a6da95;
|
||||||
--subtitle-n-plus-one-color: #c6a0f6;
|
--subtitle-n-plus-one-color: #c6a0f6;
|
||||||
|
--subtitle-jlpt-n1-color: #ed8796;
|
||||||
|
--subtitle-jlpt-n2-color: #f5a97f;
|
||||||
|
--subtitle-jlpt-n3-color: #f9e2af;
|
||||||
|
--subtitle-jlpt-n4-color: #a6e3a1;
|
||||||
|
--subtitle-jlpt-n5-color: #8aadf4;
|
||||||
text-shadow:
|
text-shadow:
|
||||||
2px 2px 4px rgba(0, 0, 0, 0.8),
|
2px 2px 4px rgba(0, 0, 0, 0.8),
|
||||||
-1px -1px 2px rgba(0, 0, 0, 0.5);
|
-1px -1px 2px rgba(0, 0, 0, 0.5);
|
||||||
@@ -296,6 +301,51 @@ body.settings-modal-open #subtitleContainer {
|
|||||||
text-shadow: 0 0 6px rgba(198, 160, 246, 0.35);
|
text-shadow: 0 0 6px rgba(198, 160, 246, 0.35);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#subtitleRoot .word.word-jlpt-n1 {
|
||||||
|
color: inherit;
|
||||||
|
text-decoration-line: underline;
|
||||||
|
text-decoration-thickness: 2px;
|
||||||
|
text-underline-offset: 4px;
|
||||||
|
text-decoration-color: var(--subtitle-jlpt-n1-color, #ed8796);
|
||||||
|
text-decoration-style: solid;
|
||||||
|
}
|
||||||
|
|
||||||
|
#subtitleRoot .word.word-jlpt-n2 {
|
||||||
|
color: inherit;
|
||||||
|
text-decoration-line: underline;
|
||||||
|
text-decoration-thickness: 2px;
|
||||||
|
text-underline-offset: 4px;
|
||||||
|
text-decoration-color: var(--subtitle-jlpt-n2-color, #f5a97f);
|
||||||
|
text-decoration-style: solid;
|
||||||
|
}
|
||||||
|
|
||||||
|
#subtitleRoot .word.word-jlpt-n3 {
|
||||||
|
color: inherit;
|
||||||
|
text-decoration-line: underline;
|
||||||
|
text-decoration-thickness: 2px;
|
||||||
|
text-underline-offset: 4px;
|
||||||
|
text-decoration-color: var(--subtitle-jlpt-n3-color, #f9e2af);
|
||||||
|
text-decoration-style: solid;
|
||||||
|
}
|
||||||
|
|
||||||
|
#subtitleRoot .word.word-jlpt-n4 {
|
||||||
|
color: inherit;
|
||||||
|
text-decoration-line: underline;
|
||||||
|
text-decoration-thickness: 2px;
|
||||||
|
text-underline-offset: 4px;
|
||||||
|
text-decoration-color: var(--subtitle-jlpt-n4-color, #a6e3a1);
|
||||||
|
text-decoration-style: solid;
|
||||||
|
}
|
||||||
|
|
||||||
|
#subtitleRoot .word.word-jlpt-n5 {
|
||||||
|
color: inherit;
|
||||||
|
text-decoration-line: underline;
|
||||||
|
text-decoration-thickness: 2px;
|
||||||
|
text-underline-offset: 4px;
|
||||||
|
text-decoration-color: var(--subtitle-jlpt-n5-color, #8aadf4);
|
||||||
|
text-decoration-style: solid;
|
||||||
|
}
|
||||||
|
|
||||||
#subtitleRoot .word:hover {
|
#subtitleRoot .word:hover {
|
||||||
background: rgba(255, 255, 255, 0.2);
|
background: rgba(255, 255, 255, 0.2);
|
||||||
border-radius: 3px;
|
border-radius: 3px;
|
||||||
|
|||||||
80
src/renderer/subtitle-render.test.ts
Normal file
80
src/renderer/subtitle-render.test.ts
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
import test from "node:test";
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import fs from "node:fs";
|
||||||
|
import path from "node:path";
|
||||||
|
|
||||||
|
import type { MergedToken } from "../types";
|
||||||
|
import { PartOfSpeech } from "../types.js";
|
||||||
|
import { computeWordClass } from "./subtitle-render.js";
|
||||||
|
|
||||||
|
function createToken(overrides: Partial<MergedToken>): MergedToken {
|
||||||
|
return {
|
||||||
|
surface: "",
|
||||||
|
reading: "",
|
||||||
|
headword: "",
|
||||||
|
startPos: 0,
|
||||||
|
endPos: 0,
|
||||||
|
partOfSpeech: PartOfSpeech.other,
|
||||||
|
isMerged: true,
|
||||||
|
isKnown: false,
|
||||||
|
isNPlusOneTarget: false,
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractClassBlock(cssText: string, level: number): string {
|
||||||
|
const selector = `#subtitleRoot .word.word-jlpt-n${level}`;
|
||||||
|
const start = cssText.indexOf(selector);
|
||||||
|
if (start < 0) return "";
|
||||||
|
|
||||||
|
const openBrace = cssText.indexOf("{", start);
|
||||||
|
if (openBrace < 0) return "";
|
||||||
|
const closeBrace = cssText.indexOf("}", openBrace);
|
||||||
|
if (closeBrace < 0) return "";
|
||||||
|
|
||||||
|
return cssText.slice(openBrace + 1, closeBrace);
|
||||||
|
}
|
||||||
|
|
||||||
|
test("computeWordClass preserves known and n+1 classes while adding JLPT classes", () => {
|
||||||
|
const knownJlpt = createToken({
|
||||||
|
isKnown: true,
|
||||||
|
jlptLevel: "N1",
|
||||||
|
surface: "猫",
|
||||||
|
});
|
||||||
|
const nPlusOneJlpt = createToken({
|
||||||
|
isNPlusOneTarget: true,
|
||||||
|
jlptLevel: "N2",
|
||||||
|
surface: "犬",
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(computeWordClass(knownJlpt), "word word-known word-jlpt-n1");
|
||||||
|
assert.equal(
|
||||||
|
computeWordClass(nPlusOneJlpt),
|
||||||
|
"word word-n-plus-one word-jlpt-n2",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("JLPT CSS rules use underline-only styling in renderer stylesheet", () => {
|
||||||
|
const distCssPath = path.join(process.cwd(), "dist", "renderer", "style.css");
|
||||||
|
const srcCssPath = path.join(process.cwd(), "src", "renderer", "style.css");
|
||||||
|
|
||||||
|
const cssPath = fs.existsSync(distCssPath)
|
||||||
|
? distCssPath
|
||||||
|
: srcCssPath;
|
||||||
|
if (!fs.existsSync(cssPath)) {
|
||||||
|
assert.fail(
|
||||||
|
"JLPT CSS file missing. Run `pnpm run build` first, or ensure src/renderer/style.css exists.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const cssText = fs.readFileSync(cssPath, "utf-8");
|
||||||
|
|
||||||
|
for (let level = 1; level <= 5; level += 1) {
|
||||||
|
const block = extractClassBlock(cssText, level);
|
||||||
|
assert.ok(block.length > 0, `word-jlpt-n${level} class should exist`);
|
||||||
|
assert.match(block, /text-decoration-line:\s*underline;/);
|
||||||
|
assert.match(block, /text-decoration-thickness:\s*2px;/);
|
||||||
|
assert.match(block, /text-underline-offset:\s*4px;/);
|
||||||
|
assert.match(block, /color:\s*inherit;/);
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -15,6 +15,15 @@ function normalizeSubtitle(text: string, trim = true): string {
|
|||||||
return trim ? normalized.trim() : normalized;
|
return trim ? normalized.trim() : normalized;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const HEX_COLOR_PATTERN =
|
||||||
|
/^#(?:[0-9a-fA-F]{3}|[0-9a-fA-F]{4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})$/;
|
||||||
|
|
||||||
|
function sanitizeHexColor(value: unknown, fallback: string): string {
|
||||||
|
return typeof value === "string" && HEX_COLOR_PATTERN.test(value.trim())
|
||||||
|
? value.trim()
|
||||||
|
: fallback;
|
||||||
|
}
|
||||||
|
|
||||||
function renderWithTokens(root: HTMLElement, tokens: MergedToken[]): void {
|
function renderWithTokens(root: HTMLElement, tokens: MergedToken[]): void {
|
||||||
const fragment = document.createDocumentFragment();
|
const fragment = document.createDocumentFragment();
|
||||||
|
|
||||||
@@ -50,16 +59,20 @@ function renderWithTokens(root: HTMLElement, tokens: MergedToken[]): void {
|
|||||||
root.appendChild(fragment);
|
root.appendChild(fragment);
|
||||||
}
|
}
|
||||||
|
|
||||||
function computeWordClass(token: MergedToken): string {
|
export function computeWordClass(token: MergedToken): string {
|
||||||
|
const classes = ["word"];
|
||||||
|
|
||||||
if (token.isNPlusOneTarget) {
|
if (token.isNPlusOneTarget) {
|
||||||
return "word word-n-plus-one";
|
classes.push("word-n-plus-one");
|
||||||
|
} else if (token.isKnown) {
|
||||||
|
classes.push("word-known");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (token.isKnown) {
|
if (token.jlptLevel) {
|
||||||
return "word word-known";
|
classes.push(`word-jlpt-${token.jlptLevel.toLowerCase()}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return "word";
|
return classes.join(" ");
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderCharacterLevel(root: HTMLElement, text: string): void {
|
function renderCharacterLevel(root: HTMLElement, text: string): void {
|
||||||
@@ -189,6 +202,22 @@ export function createSubtitleRenderer(ctx: RendererContext) {
|
|||||||
style.knownWordColor ?? ctx.state.knownWordColor ?? "#a6da95";
|
style.knownWordColor ?? ctx.state.knownWordColor ?? "#a6da95";
|
||||||
const nPlusOneColor =
|
const nPlusOneColor =
|
||||||
style.nPlusOneColor ?? ctx.state.nPlusOneColor ?? "#c6a0f6";
|
style.nPlusOneColor ?? ctx.state.nPlusOneColor ?? "#c6a0f6";
|
||||||
|
const jlptColors = {
|
||||||
|
N1: ctx.state.jlptN1Color ?? "#ed8796",
|
||||||
|
N2: ctx.state.jlptN2Color ?? "#f5a97f",
|
||||||
|
N3: ctx.state.jlptN3Color ?? "#f9e2af",
|
||||||
|
N4: ctx.state.jlptN4Color ?? "#a6e3a1",
|
||||||
|
N5: ctx.state.jlptN5Color ?? "#8aadf4",
|
||||||
|
...(style.jlptColors
|
||||||
|
? {
|
||||||
|
N1: sanitizeHexColor(style.jlptColors?.N1, ctx.state.jlptN1Color),
|
||||||
|
N2: sanitizeHexColor(style.jlptColors?.N2, ctx.state.jlptN2Color),
|
||||||
|
N3: sanitizeHexColor(style.jlptColors?.N3, ctx.state.jlptN3Color),
|
||||||
|
N4: sanitizeHexColor(style.jlptColors?.N4, ctx.state.jlptN4Color),
|
||||||
|
N5: sanitizeHexColor(style.jlptColors?.N5, ctx.state.jlptN5Color),
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
|
};
|
||||||
|
|
||||||
ctx.state.knownWordColor = knownWordColor;
|
ctx.state.knownWordColor = knownWordColor;
|
||||||
ctx.state.nPlusOneColor = nPlusOneColor;
|
ctx.state.nPlusOneColor = nPlusOneColor;
|
||||||
@@ -197,6 +226,16 @@ export function createSubtitleRenderer(ctx: RendererContext) {
|
|||||||
knownWordColor,
|
knownWordColor,
|
||||||
);
|
);
|
||||||
ctx.dom.subtitleRoot.style.setProperty("--subtitle-n-plus-one-color", nPlusOneColor);
|
ctx.dom.subtitleRoot.style.setProperty("--subtitle-n-plus-one-color", nPlusOneColor);
|
||||||
|
ctx.state.jlptN1Color = jlptColors.N1;
|
||||||
|
ctx.state.jlptN2Color = jlptColors.N2;
|
||||||
|
ctx.state.jlptN3Color = jlptColors.N3;
|
||||||
|
ctx.state.jlptN4Color = jlptColors.N4;
|
||||||
|
ctx.state.jlptN5Color = jlptColors.N5;
|
||||||
|
ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n1-color", jlptColors.N1);
|
||||||
|
ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n2-color", jlptColors.N2);
|
||||||
|
ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n3-color", jlptColors.N3);
|
||||||
|
ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n4-color", jlptColors.N4);
|
||||||
|
ctx.dom.subtitleRoot.style.setProperty("--subtitle-jlpt-n5-color", jlptColors.N5);
|
||||||
|
|
||||||
const secondaryStyle = style.secondary;
|
const secondaryStyle = style.secondary;
|
||||||
if (!secondaryStyle) return;
|
if (!secondaryStyle) return;
|
||||||
|
|||||||
@@ -223,6 +223,7 @@ export function mergeTokens(
|
|||||||
startPos: prev.startPos,
|
startPos: prev.startPos,
|
||||||
endPos: end,
|
endPos: end,
|
||||||
partOfSpeech: prev.partOfSpeech,
|
partOfSpeech: prev.partOfSpeech,
|
||||||
|
pos1: prev.pos1 ?? token.pos1,
|
||||||
pos2: prev.pos2 ?? token.pos2,
|
pos2: prev.pos2 ?? token.pos2,
|
||||||
pos3: prev.pos3 ?? token.pos3,
|
pos3: prev.pos3 ?? token.pos3,
|
||||||
isMerged: true,
|
isMerged: true,
|
||||||
@@ -245,6 +246,7 @@ export function mergeTokens(
|
|||||||
startPos: start,
|
startPos: start,
|
||||||
endPos: end,
|
endPos: end,
|
||||||
partOfSpeech: token.partOfSpeech,
|
partOfSpeech: token.partOfSpeech,
|
||||||
|
pos1: token.pos1,
|
||||||
pos2: token.pos2,
|
pos2: token.pos2,
|
||||||
pos3: token.pos3,
|
pos3: token.pos3,
|
||||||
isMerged: false,
|
isMerged: false,
|
||||||
|
|||||||
12
src/types.ts
12
src/types.ts
@@ -48,13 +48,17 @@ export interface MergedToken {
|
|||||||
startPos: number;
|
startPos: number;
|
||||||
endPos: number;
|
endPos: number;
|
||||||
partOfSpeech: PartOfSpeech;
|
partOfSpeech: PartOfSpeech;
|
||||||
|
pos1?: string;
|
||||||
pos2?: string;
|
pos2?: string;
|
||||||
pos3?: string;
|
pos3?: string;
|
||||||
isMerged: boolean;
|
isMerged: boolean;
|
||||||
isKnown: boolean;
|
isKnown: boolean;
|
||||||
isNPlusOneTarget: boolean;
|
isNPlusOneTarget: boolean;
|
||||||
|
jlptLevel?: JlptLevel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type JlptLevel = "N1" | "N2" | "N3" | "N4" | "N5";
|
||||||
|
|
||||||
export interface WindowGeometry {
|
export interface WindowGeometry {
|
||||||
x: number;
|
x: number;
|
||||||
y: number;
|
y: number;
|
||||||
@@ -262,6 +266,7 @@ export interface AnkiConnectConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface SubtitleStyleConfig {
|
export interface SubtitleStyleConfig {
|
||||||
|
enableJlpt?: boolean;
|
||||||
fontFamily?: string;
|
fontFamily?: string;
|
||||||
fontSize?: number;
|
fontSize?: number;
|
||||||
fontColor?: string;
|
fontColor?: string;
|
||||||
@@ -270,6 +275,13 @@ export interface SubtitleStyleConfig {
|
|||||||
backgroundColor?: string;
|
backgroundColor?: string;
|
||||||
nPlusOneColor?: string;
|
nPlusOneColor?: string;
|
||||||
knownWordColor?: string;
|
knownWordColor?: string;
|
||||||
|
jlptColors?: {
|
||||||
|
N1: string;
|
||||||
|
N2: string;
|
||||||
|
N3: string;
|
||||||
|
N4: string;
|
||||||
|
N5: string;
|
||||||
|
};
|
||||||
secondary?: {
|
secondary?: {
|
||||||
fontFamily?: string;
|
fontFamily?: string;
|
||||||
fontSize?: number;
|
fontSize?: number;
|
||||||
|
|||||||
1
vendor/yomitan-jlpt-vocab
vendored
Submodule
1
vendor/yomitan-jlpt-vocab
vendored
Submodule
Submodule vendor/yomitan-jlpt-vocab added at b062d4e38c
Reference in New Issue
Block a user