mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-27 18:22:41 -08:00
feat(subtitles): add line-break display toggle and narrow-space normalization
This commit is contained in:
@@ -0,0 +1,36 @@
|
|||||||
|
---
|
||||||
|
id: TASK-90
|
||||||
|
title: Normalize narrow Unicode whitespace in tokenizer input
|
||||||
|
status: Done
|
||||||
|
assignee: []
|
||||||
|
created_date: '2026-02-20 06:17'
|
||||||
|
updated_date: '2026-02-20 06:20'
|
||||||
|
labels: []
|
||||||
|
dependencies: []
|
||||||
|
priority: medium
|
||||||
|
---
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||||
|
Fix tokenizer behavior where subtitle lines containing narrow/invisible Unicode spacing between Japanese segments can be split/grouped incorrectly compared with normal space handling.
|
||||||
|
<!-- SECTION:DESCRIPTION:END -->
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
<!-- AC:BEGIN -->
|
||||||
|
- [x] #1 A regression test reproduces the subtitle sample containing narrow/invisible Unicode spacing and fails before fix.
|
||||||
|
- [x] #2 Tokenizer normalization treats narrow/invisible spacing variants consistently with regular spacing for grouping/highlight behavior.
|
||||||
|
- [x] #3 Existing tokenizer tests still pass.
|
||||||
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Implementation Notes
|
||||||
|
|
||||||
|
<!-- SECTION:NOTES:BEGIN -->
|
||||||
|
Linked from subagent session `codex-narrow-space-tokenizer-20260220T061716Z-p97s`.
|
||||||
|
|
||||||
|
Added `src/subtitle/stages/normalize.test.ts` regression for `\u200B` separator in subtitle sample and updated `normalizeTokenizerInput` to map `U+200B/U+2060/U+FEFF` to regular spaces before whitespace collapsing.
|
||||||
|
|
||||||
|
Validation:
|
||||||
|
- `bun run build && node --test dist/subtitle/stages/normalize.test.js`
|
||||||
|
- `node --test dist/core/services/tokenizer.test.js`
|
||||||
|
<!-- SECTION:NOTES:END -->
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
---
|
||||||
|
id: TASK-91
|
||||||
|
title: Add config toggle to preserve visible overlay subtitle line breaks
|
||||||
|
status: Done
|
||||||
|
assignee: []
|
||||||
|
created_date: '2026-02-20 06:35'
|
||||||
|
updated_date: '2026-02-20 06:42'
|
||||||
|
labels: []
|
||||||
|
dependencies: []
|
||||||
|
priority: medium
|
||||||
|
---
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||||
|
Add a `subtitleStyle` config option that keeps visible-overlay subtitle line breaks (newline/carriage-return normalized to line breaks) instead of flattening them to spaces. Default should preserve current behavior for consistency with texthooker.
|
||||||
|
<!-- SECTION:DESCRIPTION:END -->
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
<!-- AC:BEGIN -->
|
||||||
|
- [x] #1 New config option exists with default disabled and validation/docs coverage.
|
||||||
|
- [x] #2 When enabled, visible overlay preserves subtitle line breaks while rendering tokenized subtitles.
|
||||||
|
- [x] #3 When disabled, current rendering behavior remains unchanged.
|
||||||
|
- [x] #4 Relevant config + renderer tests pass.
|
||||||
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Implementation Notes
|
||||||
|
|
||||||
|
<!-- SECTION:NOTES:BEGIN -->
|
||||||
|
Added `subtitleStyle.preserveLineBreaks` (default `false`) to types/default config/registry/config validation and docs/example configs.
|
||||||
|
|
||||||
|
Renderer now supports line-break-preserving token output via `alignTokensToSourceText` in `src/renderer/subtitle-render.ts`, which inserts source-text separators (including `\n`) between token spans when enabled.
|
||||||
|
|
||||||
|
Validation:
|
||||||
|
- `bun run build && node --test dist/config/config.test.js dist/renderer/subtitle-render.test.js`
|
||||||
|
<!-- SECTION:NOTES:END -->
|
||||||
@@ -166,6 +166,7 @@
|
|||||||
// ==========================================
|
// ==========================================
|
||||||
"subtitleStyle": {
|
"subtitleStyle": {
|
||||||
"enableJlpt": false, // Enable JLPT vocabulary level underlines. When disabled, JLPT tagging lookup and underlines are skipped. Values: true | false
|
"enableJlpt": false, // Enable JLPT vocabulary level underlines. When disabled, JLPT tagging lookup and underlines are skipped. Values: true | false
|
||||||
|
"preserveLineBreaks": false, // Preserve line breaks in visible overlay subtitle rendering. When false, line breaks are flattened to spaces for a single-line flow. Values: true | false
|
||||||
"fontFamily": "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif", // Font family setting.
|
"fontFamily": "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif", // Font family setting.
|
||||||
"fontSize": 35, // Font size setting.
|
"fontSize": 35, // Font size setting.
|
||||||
"fontColor": "#cad3f5", // Font color setting.
|
"fontColor": "#cad3f5", // Font color setting.
|
||||||
|
|||||||
@@ -724,6 +724,7 @@ See `config.example.jsonc` for detailed configuration options.
|
|||||||
| `fontStyle` | string | `"normal"` or `"italic"` (default: `"normal"`) |
|
| `fontStyle` | string | `"normal"` or `"italic"` (default: `"normal"`) |
|
||||||
| `backgroundColor` | string | Any CSS color, including `"transparent"` (default: `"rgb(30, 32, 48, 0.88)"`) |
|
| `backgroundColor` | string | Any CSS color, including `"transparent"` (default: `"rgb(30, 32, 48, 0.88)"`) |
|
||||||
| `enableJlpt` | boolean | Enable JLPT level underline styling (`false` by default) |
|
| `enableJlpt` | boolean | Enable JLPT level underline styling (`false` by default) |
|
||||||
|
| `preserveLineBreaks` | boolean | Preserve line breaks in visible overlay subtitle rendering (`false` by default). Enable to mirror mpv line layout. |
|
||||||
| `frequencyDictionary.enabled` | boolean | Enable frequency highlighting from dictionary lookups (`false` by default) |
|
| `frequencyDictionary.enabled` | boolean | Enable frequency highlighting from dictionary lookups (`false` by default) |
|
||||||
| `frequencyDictionary.sourcePath` | string | Path to a local frequency dictionary root. Leave empty or omit to use the built-in bundled dictionary search paths. |
|
| `frequencyDictionary.sourcePath` | string | Path to a local frequency dictionary root. Leave empty or omit to use the built-in bundled dictionary search paths. |
|
||||||
| `frequencyDictionary.topX` | number | Only color tokens whose frequency rank is `<= topX` (`1000` by default) |
|
| `frequencyDictionary.topX` | number | Only color tokens whose frequency rank is `<= topX` (`1000` by default) |
|
||||||
|
|||||||
@@ -166,6 +166,7 @@
|
|||||||
// ==========================================
|
// ==========================================
|
||||||
"subtitleStyle": {
|
"subtitleStyle": {
|
||||||
"enableJlpt": false, // Enable JLPT vocabulary level underlines. When disabled, JLPT tagging lookup and underlines are skipped. Values: true | false
|
"enableJlpt": false, // Enable JLPT vocabulary level underlines. When disabled, JLPT tagging lookup and underlines are skipped. Values: true | false
|
||||||
|
"preserveLineBreaks": false, // Preserve line breaks in visible overlay subtitle rendering. When false, line breaks are flattened to spaces for a single-line flow. Values: true | false
|
||||||
"fontFamily": "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif", // Font family setting.
|
"fontFamily": "Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif", // Font family setting.
|
||||||
"fontSize": 35, // Font size setting.
|
"fontSize": 35, // Font size setting.
|
||||||
"fontColor": "#cad3f5", // Font color setting.
|
"fontColor": "#cad3f5", // Font color setting.
|
||||||
|
|||||||
@@ -6,10 +6,12 @@ Read first. Keep concise.
|
|||||||
| ------------ | -------------- | ---------------------------------------------------- | --------- | ------------------------------------- | ---------------------- |
|
| ------------ | -------------- | ---------------------------------------------------- | --------- | ------------------------------------- | ---------------------- |
|
||||||
| `codex-main` | `planner-exec` | `Fix frequency/N+1 regression in plugin --start flow` | `in_progress` | `docs/subagents/agents/codex-main.md` | `2026-02-19T19:36:46Z` |
|
| `codex-main` | `planner-exec` | `Fix frequency/N+1 regression in plugin --start flow` | `in_progress` | `docs/subagents/agents/codex-main.md` | `2026-02-19T19:36:46Z` |
|
||||||
| `codex-config-validation-20260219T172015Z-iiyf` | `codex-config-validation` | `Find root cause of config validation error for ~/.config/SubMiner/config.jsonc` | `completed` | `docs/subagents/agents/codex-config-validation-20260219T172015Z-iiyf.md` | `2026-02-19T17:26:17Z` |
|
| `codex-config-validation-20260219T172015Z-iiyf` | `codex-config-validation` | `Find root cause of config validation error for ~/.config/SubMiner/config.jsonc` | `completed` | `docs/subagents/agents/codex-config-validation-20260219T172015Z-iiyf.md` | `2026-02-19T17:26:17Z` |
|
||||||
| `codex-task85-20260219T233711Z-46hc` | `codex-task85` | `Resume TASK-85 maintainability refactor from latest handoff point` | `in_progress` | `docs/subagents/agents/codex-task85-20260219T233711Z-46hc.md` | `2026-02-20T05:31:05Z` |
|
| `codex-task85-20260219T233711Z-46hc` | `codex-task85` | `Resume TASK-85 maintainability refactor from latest handoff point` | `in_progress` | `docs/subagents/agents/codex-task85-20260219T233711Z-46hc.md` | `2026-02-20T05:50:43Z` |
|
||||||
| `codex-anilist-deeplink-20260219T233926Z` | `anilist-deeplink` | `Fix external subminer:// AniList callback handling from browser` | `done` | `docs/subagents/agents/codex-anilist-deeplink-20260219T233926Z.md` | `2026-02-19T23:59:21Z` |
|
| `codex-anilist-deeplink-20260219T233926Z` | `anilist-deeplink` | `Fix external subminer:// AniList callback handling from browser` | `done` | `docs/subagents/agents/codex-anilist-deeplink-20260219T233926Z.md` | `2026-02-19T23:59:21Z` |
|
||||||
| `codex-texthooker-highlights-20260220T002354Z-927c` | `codex-texthooker-highlights` | `Add optional texthooker highlight toggles for known/n+1/frequency/JLPT` | `completed` | `docs/subagents/agents/codex-texthooker-highlights-20260220T002354Z-927c.md` | `2026-02-20T00:30:49Z` |
|
| `codex-texthooker-highlights-20260220T002354Z-927c` | `codex-texthooker-highlights` | `Add optional texthooker highlight toggles for known/n+1/frequency/JLPT` | `completed` | `docs/subagents/agents/codex-texthooker-highlights-20260220T002354Z-927c.md` | `2026-02-20T00:30:49Z` |
|
||||||
| `codex-texthooker-ui-playwright-20260220T003827Z-k3p9` | `codex-texthooker-ui-playwright` | `Run Playwright MCP smoke/regression checks for texthooker-ui changes` | `completed` | `docs/subagents/agents/codex-texthooker-ui-playwright-20260220T003827Z-k3p9.md` | `2026-02-20T00:42:09Z` |
|
| `codex-texthooker-ui-playwright-20260220T003827Z-k3p9` | `codex-texthooker-ui-playwright` | `Run Playwright MCP smoke/regression checks for texthooker-ui changes` | `completed` | `docs/subagents/agents/codex-texthooker-ui-playwright-20260220T003827Z-k3p9.md` | `2026-02-20T00:42:09Z` |
|
||||||
| `codex-texthooker-color-ws-20260220T005844Z-r7m2` | `codex-texthooker-color-ws` | `Fix texthooker websocket payload so token highlight colors render` | `completed` | `docs/subagents/agents/codex-texthooker-color-ws-20260220T005844Z-r7m2.md` | `2026-02-20T01:01:00Z` |
|
| `codex-texthooker-color-ws-20260220T005844Z-r7m2` | `codex-texthooker-color-ws` | `Fix texthooker websocket payload so token highlight colors render` | `completed` | `docs/subagents/agents/codex-texthooker-color-ws-20260220T005844Z-r7m2.md` | `2026-02-20T01:01:00Z` |
|
||||||
| `codex-nplusone-pos1-20260220T012300Z-c5he` | `codex-nplusone-pos1` | `Fix N+1 false-negative when Yomitan functional tokens inflate unknown candidate count` | `completed` | `docs/subagents/agents/codex-nplusone-pos1-20260220T012300Z-c5he.md` | `2026-02-20T01:28:20Z` |
|
| `codex-nplusone-pos1-20260220T012300Z-c5he` | `codex-nplusone-pos1` | `Fix N+1 false-negative when Yomitan functional tokens inflate unknown candidate count` | `completed` | `docs/subagents/agents/codex-nplusone-pos1-20260220T012300Z-c5he.md` | `2026-02-20T01:28:20Z` |
|
||||||
| `codex-subtitle-bg-20260220T054247Z-h9cu` | `codex-subtitle-bg` | `Update default subtitle background color to requested RGBA value` | `completed` | `docs/subagents/agents/codex-subtitle-bg-20260220T054247Z-h9cu.md` | `2026-02-20T05:44:45Z` |
|
| `codex-subtitle-bg-20260220T054247Z-h9cu` | `codex-subtitle-bg` | `Update default subtitle background color to requested RGBA value` | `completed` | `docs/subagents/agents/codex-subtitle-bg-20260220T054247Z-h9cu.md` | `2026-02-20T05:44:45Z` |
|
||||||
|
| `codex-narrow-space-tokenizer-20260220T061716Z-p97s` | `codex-narrow-space-tokenizer` | `Fix tokenization when subtitle line contains narrow/invisible Unicode spacing between segments` | `completed` | `docs/subagents/agents/codex-narrow-space-tokenizer-20260220T061716Z-p97s.md` | `2026-02-20T06:20:07Z` |
|
||||||
|
| `codex-preserve-linebreaks-20260220T063538Z-s4nd` | `codex-preserve-linebreaks` | `Add config option to preserve subtitle line breaks in visible overlay rendering` | `completed` | `docs/subagents/agents/codex-preserve-linebreaks-20260220T063538Z-s4nd.md` | `2026-02-20T06:42:51Z` |
|
||||||
|
|||||||
@@ -0,0 +1,34 @@
|
|||||||
|
# Agent: `codex-narrow-space-tokenizer-20260220T061716Z-p97s`
|
||||||
|
|
||||||
|
- alias: `codex-narrow-space-tokenizer`
|
||||||
|
- mission: `Fix narrow/invisible subtitle spacing causing incorrect tokenizer boundaries.`
|
||||||
|
- status: `done`
|
||||||
|
- branch: `main`
|
||||||
|
- started_at: `2026-02-20T06:17:31Z`
|
||||||
|
- heartbeat_minutes: `5`
|
||||||
|
|
||||||
|
## Current Work (newest first)
|
||||||
|
- [2026-02-20T06:20:07Z] handoff: normalized invisible separators in tokenizer input; added regression test; targeted tests green.
|
||||||
|
- [2026-02-20T06:19:20Z] test: `bun run build && node --test dist/subtitle/stages/normalize.test.js` (pass, 1/1); `node --test dist/core/services/tokenizer.test.js` (pass, 43/43).
|
||||||
|
- [2026-02-20T06:18:38Z] edit: updated `normalizeTokenizerInput` to map `U+200B/U+2060/U+FEFF` to regular spaces before whitespace collapsing.
|
||||||
|
- [2026-02-20T06:18:02Z] test: added failing regression for subtitle sample with `\u200B` separator.
|
||||||
|
- [2026-02-20T06:17:31Z] intent: create TASK-90; TDD-first regression for narrow Unicode spacing in subtitle line `キリキリと かかってこい`.
|
||||||
|
- [2026-02-20T06:17:31Z] progress: coordination started; index row added; scanning tokenizer normalization points next.
|
||||||
|
|
||||||
|
## Files Touched
|
||||||
|
- `docs/subagents/INDEX.md`
|
||||||
|
- `docs/subagents/agents/codex-narrow-space-tokenizer-20260220T061716Z-p97s.md`
|
||||||
|
- `docs/subagents/collaboration.md`
|
||||||
|
- `backlog/tasks/task-90 - Normalize-narrow-Unicode-whitespace-in-tokenizer-input.md`
|
||||||
|
- `src/subtitle/stages/normalize.ts`
|
||||||
|
- `src/subtitle/stages/normalize.test.ts`
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
- issue likely Unicode spacing code point treated as token boundary.
|
||||||
|
- target behavior: collapse/normalize narrow spacing to standard spacing before lookup token grouping.
|
||||||
|
|
||||||
|
## Open Questions / Blockers
|
||||||
|
- possible overlap with TASK-85 refactor touching tokenizer paths.
|
||||||
|
|
||||||
|
## Next Step
|
||||||
|
- done.
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
# Agent: `codex-preserve-linebreaks-20260220T063538Z-s4nd`
|
||||||
|
|
||||||
|
- alias: `codex-preserve-linebreaks`
|
||||||
|
- mission: `Add config option to preserve subtitle line breaks in visible overlay rendering.`
|
||||||
|
- status: `done`
|
||||||
|
- branch: `main`
|
||||||
|
- started_at: `2026-02-20T06:35:38Z`
|
||||||
|
- heartbeat_minutes: `5`
|
||||||
|
|
||||||
|
## Current Work (newest first)
|
||||||
|
- [2026-02-20T06:42:51Z] handoff: TASK-91 complete; added config flag `subtitleStyle.preserveLineBreaks` (default false), renderer token-linebreak alignment path, tests/docs/examples updated.
|
||||||
|
- [2026-02-20T06:42:20Z] test: `bun run build && node --test dist/config/config.test.js dist/renderer/subtitle-render.test.js` pass (43/43); macOS helper compile falls back due sandboxed Swift cache write.
|
||||||
|
- [2026-02-20T06:41:07Z] edit: added `alignTokensToSourceText` helper + preserve-line-break render path in `src/renderer/subtitle-render.ts`; state/config plumbing added.
|
||||||
|
- [2026-02-20T06:39:34Z] test: added config parse/warn coverage + renderer helper newline-segment test.
|
||||||
|
- [2026-02-20T06:35:38Z] intent: create backlog ticket; implement opt-in config flag default-off; keep current normalization default behavior.
|
||||||
|
- [2026-02-20T06:35:38Z] progress: located normalization/render paths in `src/core/services/tokenizer.ts` and `src/renderer/subtitle-render.ts`.
|
||||||
|
|
||||||
|
## Files Touched
|
||||||
|
- `docs/subagents/INDEX.md`
|
||||||
|
- `docs/subagents/agents/codex-preserve-linebreaks-20260220T063538Z-s4nd.md`
|
||||||
|
- `docs/subagents/collaboration.md`
|
||||||
|
- `backlog/tasks/task-91 - Add-config-toggle-to-preserve-visible-overlay-subtitle-line-breaks.md`
|
||||||
|
- `src/types.ts`
|
||||||
|
- `src/config/definitions.ts`
|
||||||
|
- `src/config/service.ts`
|
||||||
|
- `src/config/config.test.ts`
|
||||||
|
- `src/renderer/state.ts`
|
||||||
|
- `src/renderer/subtitle-render.ts`
|
||||||
|
- `src/renderer/subtitle-render.test.ts`
|
||||||
|
- `docs/configuration.md`
|
||||||
|
- `config.example.jsonc`
|
||||||
|
- `docs/public/config.example.jsonc`
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
- request targets visible overlay rendering parity with MPV line breaks.
|
||||||
|
- default behavior must remain whitespace-collapsed for tokenizer/texthooker consistency.
|
||||||
|
|
||||||
|
## Open Questions / Blockers
|
||||||
|
- none.
|
||||||
|
|
||||||
|
## Next Step
|
||||||
|
- done.
|
||||||
@@ -7,3 +7,7 @@ Shared notes. Append-only.
|
|||||||
- [2026-02-20T00:01:40Z] [codex-anilist-deeplink|anilist-deeplink] preparing commit; scoping staged set to repo changes, excluding external reference dirs (vendor/yomitan-jlpt-vocab, mpv-anilist-updater).
|
- [2026-02-20T00:01:40Z] [codex-anilist-deeplink|anilist-deeplink] preparing commit; scoping staged set to repo changes, excluding external reference dirs (vendor/yomitan-jlpt-vocab, mpv-anilist-updater).
|
||||||
- [2026-02-20T05:42:54Z] [codex-subtitle-bg-20260220T054247Z-h9cu|codex-subtitle-bg] short config tweak requested: update default subtitle background color; scoping to config defaults/tests only.
|
- [2026-02-20T05:42:54Z] [codex-subtitle-bg-20260220T054247Z-h9cu|codex-subtitle-bg] short config tweak requested: update default subtitle background color; scoping to config defaults/tests only.
|
||||||
- [2026-02-20T05:44:45Z] [codex-subtitle-bg-20260220T054247Z-h9cu|codex-subtitle-bg] completed TASK-89; updated default subtitle background in config defaults/docs/examples/renderer CSS; config tests green.
|
- [2026-02-20T05:44:45Z] [codex-subtitle-bg-20260220T054247Z-h9cu|codex-subtitle-bg] completed TASK-89; updated default subtitle background in config defaults/docs/examples/renderer CSS; config tests green.
|
||||||
|
- [2026-02-20T06:17:31Z] [codex-narrow-space-tokenizer-20260220T061716Z-p97s|codex-narrow-space-tokenizer] potential overlap notice: investigating tokenizer whitespace normalization and tests (likely `src/core/services/tokenizer-service.ts` + tests); coordinating to avoid clobber with ongoing TASK-85 refactor touches.
|
||||||
|
- [2026-02-20T06:20:07Z] [codex-narrow-space-tokenizer-20260220T061716Z-p97s|codex-narrow-space-tokenizer] completed TASK-90 fix in `src/subtitle/stages/normalize.ts`; normalize `U+200B/U+2060/U+FEFF` to spaces for tokenizer input; added regression test `src/subtitle/stages/normalize.test.ts`; targeted tokenizer suite green.
|
||||||
|
- [2026-02-20T06:35:38Z] [codex-preserve-linebreaks-20260220T063538Z-s4nd|codex-preserve-linebreaks] overlap note: touching subtitle config + renderer render path (`src/types.ts`, `src/config/*`, `src/renderer/subtitle-render.ts`, docs/config examples) to add optional preserve-line-breaks behavior while keeping default normalization unchanged.
|
||||||
|
- [2026-02-20T06:42:51Z] [codex-preserve-linebreaks-20260220T063538Z-s4nd|codex-preserve-linebreaks] completed TASK-91; added `subtitleStyle.preserveLineBreaks` config (default false), renderer token/source alignment helper to preserve visible overlay line breaks when enabled, config+renderer tests green.
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ test('loads defaults when config is missing', () => {
|
|||||||
assert.equal(config.jellyfin.autoAnnounce, false);
|
assert.equal(config.jellyfin.autoAnnounce, false);
|
||||||
assert.equal(config.jellyfin.remoteControlDeviceName, 'SubMiner');
|
assert.equal(config.jellyfin.remoteControlDeviceName, 'SubMiner');
|
||||||
assert.equal(config.subtitleStyle.backgroundColor, 'rgb(30, 32, 48, 0.88)');
|
assert.equal(config.subtitleStyle.backgroundColor, 'rgb(30, 32, 48, 0.88)');
|
||||||
|
assert.equal(config.subtitleStyle.preserveLineBreaks, false);
|
||||||
assert.equal(config.immersionTracking.enabled, true);
|
assert.equal(config.immersionTracking.enabled, true);
|
||||||
assert.equal(config.immersionTracking.dbPath, '');
|
assert.equal(config.immersionTracking.dbPath, '');
|
||||||
assert.equal(config.immersionTracking.batchSize, 25);
|
assert.equal(config.immersionTracking.batchSize, 25);
|
||||||
@@ -38,6 +39,44 @@ test('loads defaults when config is missing', () => {
|
|||||||
assert.equal(config.immersionTracking.retention.vacuumIntervalDays, 7);
|
assert.equal(config.immersionTracking.retention.vacuumIntervalDays, 7);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('parses subtitleStyle.preserveLineBreaks and warns on invalid values', () => {
|
||||||
|
const validDir = makeTempDir();
|
||||||
|
fs.writeFileSync(
|
||||||
|
path.join(validDir, 'config.jsonc'),
|
||||||
|
`{
|
||||||
|
"subtitleStyle": {
|
||||||
|
"preserveLineBreaks": true
|
||||||
|
}
|
||||||
|
}`,
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
|
||||||
|
const validService = new ConfigService(validDir);
|
||||||
|
assert.equal(validService.getConfig().subtitleStyle.preserveLineBreaks, true);
|
||||||
|
|
||||||
|
const invalidDir = makeTempDir();
|
||||||
|
fs.writeFileSync(
|
||||||
|
path.join(invalidDir, 'config.jsonc'),
|
||||||
|
`{
|
||||||
|
"subtitleStyle": {
|
||||||
|
"preserveLineBreaks": "yes"
|
||||||
|
}
|
||||||
|
}`,
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
|
||||||
|
const invalidService = new ConfigService(invalidDir);
|
||||||
|
assert.equal(
|
||||||
|
invalidService.getConfig().subtitleStyle.preserveLineBreaks,
|
||||||
|
DEFAULT_CONFIG.subtitleStyle.preserveLineBreaks,
|
||||||
|
);
|
||||||
|
assert.ok(
|
||||||
|
invalidService
|
||||||
|
.getWarnings()
|
||||||
|
.some((warning) => warning.path === 'subtitleStyle.preserveLineBreaks'),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
test('parses anilist.enabled and warns for invalid value', () => {
|
test('parses anilist.enabled and warns for invalid value', () => {
|
||||||
const dir = makeTempDir();
|
const dir = makeTempDir();
|
||||||
fs.writeFileSync(
|
fs.writeFileSync(
|
||||||
@@ -885,6 +924,7 @@ test('template generator includes known keys', () => {
|
|||||||
assert.match(output, /"logging":/);
|
assert.match(output, /"logging":/);
|
||||||
assert.match(output, /"websocket":/);
|
assert.match(output, /"websocket":/);
|
||||||
assert.match(output, /"youtubeSubgen":/);
|
assert.match(output, /"youtubeSubgen":/);
|
||||||
|
assert.match(output, /"preserveLineBreaks": false/);
|
||||||
assert.match(output, /"nPlusOne"\s*:\s*\{/);
|
assert.match(output, /"nPlusOne"\s*:\s*\{/);
|
||||||
assert.match(output, /"nPlusOne": "#c6a0f6"/);
|
assert.match(output, /"nPlusOne": "#c6a0f6"/);
|
||||||
assert.match(output, /"knownWord": "#a6da95"/);
|
assert.match(output, /"knownWord": "#a6da95"/);
|
||||||
|
|||||||
@@ -172,6 +172,7 @@ export const DEFAULT_CONFIG: ResolvedConfig = {
|
|||||||
},
|
},
|
||||||
subtitleStyle: {
|
subtitleStyle: {
|
||||||
enableJlpt: false,
|
enableJlpt: false,
|
||||||
|
preserveLineBreaks: false,
|
||||||
fontFamily: 'Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif',
|
fontFamily: 'Noto Sans CJK JP Regular, Noto Sans CJK JP, Arial Unicode MS, Arial, sans-serif',
|
||||||
fontSize: 35,
|
fontSize: 35,
|
||||||
fontColor: '#cad3f5',
|
fontColor: '#cad3f5',
|
||||||
@@ -343,6 +344,14 @@ export const CONFIG_OPTION_REGISTRY: ConfigOptionRegistryEntry[] = [
|
|||||||
'Enable JLPT vocabulary level underlines. ' +
|
'Enable JLPT vocabulary level underlines. ' +
|
||||||
'When disabled, JLPT tagging lookup and underlines are skipped.',
|
'When disabled, JLPT tagging lookup and underlines are skipped.',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
path: 'subtitleStyle.preserveLineBreaks',
|
||||||
|
kind: 'boolean',
|
||||||
|
defaultValue: DEFAULT_CONFIG.subtitleStyle.preserveLineBreaks,
|
||||||
|
description:
|
||||||
|
'Preserve line breaks in visible overlay subtitle rendering. ' +
|
||||||
|
'When false, line breaks are flattened to spaces for a single-line flow.',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
path: 'subtitleStyle.frequencyDictionary.enabled',
|
path: 'subtitleStyle.frequencyDictionary.enabled',
|
||||||
kind: 'boolean',
|
kind: 'boolean',
|
||||||
|
|||||||
@@ -746,6 +746,8 @@ export class ConfigService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isObject(src.subtitleStyle)) {
|
if (isObject(src.subtitleStyle)) {
|
||||||
|
const fallbackSubtitleStyleEnableJlpt = resolved.subtitleStyle.enableJlpt;
|
||||||
|
const fallbackSubtitleStylePreserveLineBreaks = resolved.subtitleStyle.preserveLineBreaks;
|
||||||
resolved.subtitleStyle = {
|
resolved.subtitleStyle = {
|
||||||
...resolved.subtitleStyle,
|
...resolved.subtitleStyle,
|
||||||
...(src.subtitleStyle as ResolvedConfig['subtitleStyle']),
|
...(src.subtitleStyle as ResolvedConfig['subtitleStyle']),
|
||||||
@@ -761,6 +763,7 @@ export class ConfigService {
|
|||||||
if (enableJlpt !== undefined) {
|
if (enableJlpt !== undefined) {
|
||||||
resolved.subtitleStyle.enableJlpt = enableJlpt;
|
resolved.subtitleStyle.enableJlpt = enableJlpt;
|
||||||
} else if ((src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt !== undefined) {
|
} else if ((src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt !== undefined) {
|
||||||
|
resolved.subtitleStyle.enableJlpt = fallbackSubtitleStyleEnableJlpt;
|
||||||
warn(
|
warn(
|
||||||
'subtitleStyle.enableJlpt',
|
'subtitleStyle.enableJlpt',
|
||||||
(src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt,
|
(src.subtitleStyle as { enableJlpt?: unknown }).enableJlpt,
|
||||||
@@ -769,6 +772,23 @@ export class ConfigService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const preserveLineBreaks = asBoolean(
|
||||||
|
(src.subtitleStyle as { preserveLineBreaks?: unknown }).preserveLineBreaks,
|
||||||
|
);
|
||||||
|
if (preserveLineBreaks !== undefined) {
|
||||||
|
resolved.subtitleStyle.preserveLineBreaks = preserveLineBreaks;
|
||||||
|
} else if (
|
||||||
|
(src.subtitleStyle as { preserveLineBreaks?: unknown }).preserveLineBreaks !== undefined
|
||||||
|
) {
|
||||||
|
resolved.subtitleStyle.preserveLineBreaks = fallbackSubtitleStylePreserveLineBreaks;
|
||||||
|
warn(
|
||||||
|
'subtitleStyle.preserveLineBreaks',
|
||||||
|
(src.subtitleStyle as { preserveLineBreaks?: unknown }).preserveLineBreaks,
|
||||||
|
resolved.subtitleStyle.preserveLineBreaks,
|
||||||
|
'Expected boolean.',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const frequencyDictionary = isObject(
|
const frequencyDictionary = isObject(
|
||||||
(src.subtitleStyle as { frequencyDictionary?: unknown }).frequencyDictionary,
|
(src.subtitleStyle as { frequencyDictionary?: unknown }).frequencyDictionary,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -79,6 +79,7 @@ export type RendererState = {
|
|||||||
jlptN3Color: string;
|
jlptN3Color: string;
|
||||||
jlptN4Color: string;
|
jlptN4Color: string;
|
||||||
jlptN5Color: string;
|
jlptN5Color: string;
|
||||||
|
preserveSubtitleLineBreaks: boolean;
|
||||||
frequencyDictionaryEnabled: boolean;
|
frequencyDictionaryEnabled: boolean;
|
||||||
frequencyDictionaryTopX: number;
|
frequencyDictionaryTopX: number;
|
||||||
frequencyDictionaryMode: 'single' | 'banded';
|
frequencyDictionaryMode: 'single' | 'banded';
|
||||||
@@ -155,6 +156,7 @@ export function createRendererState(): RendererState {
|
|||||||
jlptN3Color: '#f9e2af',
|
jlptN3Color: '#f9e2af',
|
||||||
jlptN4Color: '#a6e3a1',
|
jlptN4Color: '#a6e3a1',
|
||||||
jlptN5Color: '#8aadf4',
|
jlptN5Color: '#8aadf4',
|
||||||
|
preserveSubtitleLineBreaks: false,
|
||||||
frequencyDictionaryEnabled: false,
|
frequencyDictionaryEnabled: false,
|
||||||
frequencyDictionaryTopX: 1000,
|
frequencyDictionaryTopX: 1000,
|
||||||
frequencyDictionaryMode: 'single',
|
frequencyDictionaryMode: 'single',
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import path from 'node:path';
|
|||||||
|
|
||||||
import type { MergedToken } from '../types';
|
import type { MergedToken } from '../types';
|
||||||
import { PartOfSpeech } from '../types.js';
|
import { PartOfSpeech } from '../types.js';
|
||||||
import { computeWordClass } from './subtitle-render.js';
|
import { alignTokensToSourceText, computeWordClass } from './subtitle-render.js';
|
||||||
|
|
||||||
function createToken(overrides: Partial<MergedToken>): MergedToken {
|
function createToken(overrides: Partial<MergedToken>): MergedToken {
|
||||||
return {
|
return {
|
||||||
@@ -203,6 +203,19 @@ test('computeWordClass skips frequency class when rank is out of topX', () => {
|
|||||||
assert.equal(actual, 'word');
|
assert.equal(actual, 'word');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('alignTokensToSourceText preserves newline separators between adjacent token surfaces', () => {
|
||||||
|
const tokens = [
|
||||||
|
createToken({ surface: 'キリキリと', reading: 'きりきりと', headword: 'キリキリと' }),
|
||||||
|
createToken({ surface: 'かかってこい', reading: 'かかってこい', headword: 'かかってこい' }),
|
||||||
|
];
|
||||||
|
|
||||||
|
const segments = alignTokensToSourceText(tokens, 'キリキリと\nかかってこい');
|
||||||
|
assert.deepEqual(
|
||||||
|
segments.map((segment) => (segment.kind === 'text' ? `text:${segment.text}` : 'token')),
|
||||||
|
['token', 'text:\n', 'token'],
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
test('JLPT CSS rules use underline-only styling in renderer stylesheet', () => {
|
test('JLPT CSS rules use underline-only styling in renderer stylesheet', () => {
|
||||||
const distCssPath = path.join(process.cwd(), 'dist', 'renderer', 'style.css');
|
const distCssPath = path.join(process.cwd(), 'dist', 'renderer', 'style.css');
|
||||||
const srcCssPath = path.join(process.cwd(), 'src', 'renderer', 'style.css');
|
const srcCssPath = path.join(process.cwd(), 'src', 'renderer', 'style.css');
|
||||||
|
|||||||
@@ -9,11 +9,15 @@ type FrequencyRenderSettings = {
|
|||||||
bandedColors: [string, string, string, string, string];
|
bandedColors: [string, string, string, string, string];
|
||||||
};
|
};
|
||||||
|
|
||||||
function normalizeSubtitle(text: string, trim = true): string {
|
function normalizeSubtitle(text: string, trim = true, collapseLineBreaks = false): string {
|
||||||
if (!text) return '';
|
if (!text) return '';
|
||||||
|
|
||||||
let normalized = text.replace(/\\N/g, '\n').replace(/\\n/g, '\n');
|
let normalized = text.replace(/\\N/g, '\n').replace(/\\n/g, '\n');
|
||||||
normalized = normalized.replace(/\{[^}]*\}/g, '');
|
normalized = normalized.replace(/\{[^}]*\}/g, '');
|
||||||
|
if (collapseLineBreaks) {
|
||||||
|
normalized = normalized.replace(/\n/g, ' ');
|
||||||
|
normalized = normalized.replace(/\s+/g, ' ');
|
||||||
|
}
|
||||||
|
|
||||||
return trim ? normalized.trim() : normalized;
|
return trim ? normalized.trim() : normalized;
|
||||||
}
|
}
|
||||||
@@ -90,6 +94,8 @@ function renderWithTokens(
|
|||||||
root: HTMLElement,
|
root: HTMLElement,
|
||||||
tokens: MergedToken[],
|
tokens: MergedToken[],
|
||||||
frequencyRenderSettings?: Partial<FrequencyRenderSettings>,
|
frequencyRenderSettings?: Partial<FrequencyRenderSettings>,
|
||||||
|
sourceText?: string,
|
||||||
|
preserveLineBreaks = false,
|
||||||
): void {
|
): void {
|
||||||
const resolvedFrequencyRenderSettings = {
|
const resolvedFrequencyRenderSettings = {
|
||||||
...DEFAULT_FREQUENCY_RENDER_SETTINGS,
|
...DEFAULT_FREQUENCY_RENDER_SETTINGS,
|
||||||
@@ -110,6 +116,29 @@ function renderWithTokens(
|
|||||||
|
|
||||||
const fragment = document.createDocumentFragment();
|
const fragment = document.createDocumentFragment();
|
||||||
|
|
||||||
|
if (preserveLineBreaks && sourceText) {
|
||||||
|
const normalizedSource = normalizeSubtitle(sourceText, true, false);
|
||||||
|
const segments = alignTokensToSourceText(tokens, normalizedSource);
|
||||||
|
|
||||||
|
for (const segment of segments) {
|
||||||
|
if (segment.kind === 'text') {
|
||||||
|
renderPlainTextPreserveLineBreaks(fragment, segment.text);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const token = segment.token;
|
||||||
|
const span = document.createElement('span');
|
||||||
|
span.className = computeWordClass(token, resolvedFrequencyRenderSettings);
|
||||||
|
span.textContent = token.surface;
|
||||||
|
if (token.reading) span.dataset.reading = token.reading;
|
||||||
|
if (token.headword) span.dataset.headword = token.headword;
|
||||||
|
fragment.appendChild(span);
|
||||||
|
}
|
||||||
|
|
||||||
|
root.appendChild(fragment);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (const token of tokens) {
|
for (const token of tokens) {
|
||||||
const surface = token.surface;
|
const surface = token.surface;
|
||||||
|
|
||||||
@@ -142,6 +171,50 @@ function renderWithTokens(
|
|||||||
root.appendChild(fragment);
|
root.appendChild(fragment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type SubtitleRenderSegment = { kind: 'text'; text: string } | { kind: 'token'; token: MergedToken };
|
||||||
|
|
||||||
|
export function alignTokensToSourceText(
|
||||||
|
tokens: MergedToken[],
|
||||||
|
sourceText: string,
|
||||||
|
): SubtitleRenderSegment[] {
|
||||||
|
if (tokens.length === 0) {
|
||||||
|
return sourceText ? [{ kind: 'text', text: sourceText }] : [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const segments: SubtitleRenderSegment[] = [];
|
||||||
|
let cursor = 0;
|
||||||
|
|
||||||
|
for (const token of tokens) {
|
||||||
|
const surface = token.surface;
|
||||||
|
if (!surface) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const foundIndex = sourceText.indexOf(surface, cursor);
|
||||||
|
if (foundIndex < 0) {
|
||||||
|
if (cursor < sourceText.length) {
|
||||||
|
segments.push({ kind: 'text', text: sourceText.slice(cursor) });
|
||||||
|
}
|
||||||
|
segments.push({ kind: 'token', token });
|
||||||
|
cursor = sourceText.length;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (foundIndex > cursor) {
|
||||||
|
segments.push({ kind: 'text', text: sourceText.slice(cursor, foundIndex) });
|
||||||
|
}
|
||||||
|
|
||||||
|
segments.push({ kind: 'token', token });
|
||||||
|
cursor = foundIndex + surface.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cursor < sourceText.length) {
|
||||||
|
segments.push({ kind: 'text', text: sourceText.slice(cursor) });
|
||||||
|
}
|
||||||
|
|
||||||
|
return segments;
|
||||||
|
}
|
||||||
|
|
||||||
export function computeWordClass(
|
export function computeWordClass(
|
||||||
token: MergedToken,
|
token: MergedToken,
|
||||||
frequencySettings?: Partial<FrequencyRenderSettings>,
|
frequencySettings?: Partial<FrequencyRenderSettings>,
|
||||||
@@ -199,7 +272,7 @@ function renderCharacterLevel(root: HTMLElement, text: string): void {
|
|||||||
root.appendChild(fragment);
|
root.appendChild(fragment);
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderPlainTextPreserveLineBreaks(root: HTMLElement, text: string): void {
|
function renderPlainTextPreserveLineBreaks(root: ParentNode, text: string): void {
|
||||||
const lines = text.split('\n');
|
const lines = text.split('\n');
|
||||||
const fragment = document.createDocumentFragment();
|
const fragment = document.createDocumentFragment();
|
||||||
|
|
||||||
@@ -246,7 +319,13 @@ export function createSubtitleRenderer(ctx: RendererContext) {
|
|||||||
|
|
||||||
const normalized = normalizeSubtitle(text);
|
const normalized = normalizeSubtitle(text);
|
||||||
if (tokens && tokens.length > 0) {
|
if (tokens && tokens.length > 0) {
|
||||||
renderWithTokens(ctx.dom.subtitleRoot, tokens, getFrequencyRenderSettings());
|
renderWithTokens(
|
||||||
|
ctx.dom.subtitleRoot,
|
||||||
|
tokens,
|
||||||
|
getFrequencyRenderSettings(),
|
||||||
|
text,
|
||||||
|
ctx.state.preserveSubtitleLineBreaks,
|
||||||
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
renderCharacterLevel(ctx.dom.subtitleRoot, normalized);
|
renderCharacterLevel(ctx.dom.subtitleRoot, normalized);
|
||||||
@@ -346,6 +425,7 @@ export function createSubtitleRenderer(ctx: RendererContext) {
|
|||||||
ctx.state.jlptN3Color = jlptColors.N3;
|
ctx.state.jlptN3Color = jlptColors.N3;
|
||||||
ctx.state.jlptN4Color = jlptColors.N4;
|
ctx.state.jlptN4Color = jlptColors.N4;
|
||||||
ctx.state.jlptN5Color = jlptColors.N5;
|
ctx.state.jlptN5Color = jlptColors.N5;
|
||||||
|
ctx.state.preserveSubtitleLineBreaks = style.preserveLineBreaks ?? false;
|
||||||
ctx.dom.subtitleRoot.style.setProperty('--subtitle-jlpt-n1-color', jlptColors.N1);
|
ctx.dom.subtitleRoot.style.setProperty('--subtitle-jlpt-n1-color', jlptColors.N1);
|
||||||
ctx.dom.subtitleRoot.style.setProperty('--subtitle-jlpt-n2-color', jlptColors.N2);
|
ctx.dom.subtitleRoot.style.setProperty('--subtitle-jlpt-n2-color', jlptColors.N2);
|
||||||
ctx.dom.subtitleRoot.style.setProperty('--subtitle-jlpt-n3-color', jlptColors.N3);
|
ctx.dom.subtitleRoot.style.setProperty('--subtitle-jlpt-n3-color', jlptColors.N3);
|
||||||
|
|||||||
10
src/subtitle/stages/normalize.test.ts
Normal file
10
src/subtitle/stages/normalize.test.ts
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
import test from 'node:test';
|
||||||
|
import assert from 'node:assert/strict';
|
||||||
|
import { normalizeTokenizerInput } from './normalize';
|
||||||
|
|
||||||
|
test('normalizeTokenizerInput collapses zero-width separators between Japanese segments', () => {
|
||||||
|
const input = 'キリキリと\u200bかかってこい\nこのヘナチョコ冒険者どもめが!';
|
||||||
|
const normalized = normalizeTokenizerInput(input);
|
||||||
|
|
||||||
|
assert.equal(normalized, 'キリキリと かかってこい このヘナチョコ冒険者どもめが!');
|
||||||
|
});
|
||||||
@@ -2,6 +2,12 @@ export function normalizeDisplayText(text: string): string {
|
|||||||
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
|
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const INVISIBLE_SEPARATOR_PATTERN = /[\u200b\u2060\ufeff]/g;
|
||||||
|
|
||||||
export function normalizeTokenizerInput(displayText: string): string {
|
export function normalizeTokenizerInput(displayText: string): string {
|
||||||
return displayText.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
|
return displayText
|
||||||
|
.replace(/\n/g, ' ')
|
||||||
|
.replace(INVISIBLE_SEPARATOR_PATTERN, ' ')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -270,6 +270,7 @@ export interface AnkiConnectConfig {
|
|||||||
|
|
||||||
export interface SubtitleStyleConfig {
|
export interface SubtitleStyleConfig {
|
||||||
enableJlpt?: boolean;
|
enableJlpt?: boolean;
|
||||||
|
preserveLineBreaks?: boolean;
|
||||||
fontFamily?: string;
|
fontFamily?: string;
|
||||||
fontSize?: number;
|
fontSize?: number;
|
||||||
fontColor?: string;
|
fontColor?: string;
|
||||||
|
|||||||
Reference in New Issue
Block a user