mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-06 19:57:26 -08:00
feat: merge AniList character dictionaries by recent usage
This commit is contained in:
@@ -0,0 +1,57 @@
|
|||||||
|
---
|
||||||
|
id: TASK-89
|
||||||
|
title: Replace per-anime Yomitan imports with merged usage-based character dictionary
|
||||||
|
status: Done
|
||||||
|
assignee:
|
||||||
|
- '@codex'
|
||||||
|
created_date: '2026-03-06 07:59'
|
||||||
|
updated_date: '2026-03-06 08:09'
|
||||||
|
labels:
|
||||||
|
- character-dictionary
|
||||||
|
- yomitan
|
||||||
|
- anilist
|
||||||
|
dependencies: []
|
||||||
|
references:
|
||||||
|
- >-
|
||||||
|
/home/sudacode/projects/japanese/SubMiner/src/main/character-dictionary-runtime.ts
|
||||||
|
- >-
|
||||||
|
/home/sudacode/projects/japanese/SubMiner/src/main/runtime/character-dictionary-auto-sync.ts
|
||||||
|
- >-
|
||||||
|
/home/sudacode/projects/japanese/SubMiner/src/config/definitions/defaults-integrations.ts
|
||||||
|
priority: high
|
||||||
|
---
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||||
|
Replace TTL-based per-anime character dictionary imports with a single merged Yomitan dictionary built from locally stored per-media metadata snapshots. Retain only most-recently-used anime up to configured maxLoaded, rebuild merged import when retained set membership/order changes, and avoid rebuilding on revisits that do not change the retained set.
|
||||||
|
<!-- SECTION:DESCRIPTION:END -->
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
<!-- AC:BEGIN -->
|
||||||
|
- [x] #1 Character dictionary retention becomes usage-based rather than TTL-based.
|
||||||
|
- [x] #2 Only one Yomitan character dictionary import is maintained and updated as a merged dictionary.
|
||||||
|
- [x] #3 Local storage keeps only metadata/snapshots needed to rebuild the merged dictionary; per-anime source zip cache is removed.
|
||||||
|
- [x] #4 Merged dictionary rebuild occurs when retained-set membership or order changes, not on unchanged revisits.
|
||||||
|
- [x] #5 Tests cover merged rebuild, MRU eviction, and no-op revisits.
|
||||||
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Implementation Notes
|
||||||
|
|
||||||
|
<!-- SECTION:NOTES:BEGIN -->
|
||||||
|
Replaced per-media auto-sync imports with one merged Yomitan dictionary. Added snapshot persistence in `src/main/character-dictionary-runtime.ts` so auto-sync stores normalized per-media term/image metadata locally under `character-dictionaries/snapshots/` and rebuilds `merged.zip` from the MRU retained media ids.
|
||||||
|
|
||||||
|
Updated `src/main/runtime/character-dictionary-auto-sync.ts` to keep only MRU `activeMediaIds` plus merged revision/title state, rebuild/import the merged dictionary only when retained-set membership/order changes or the merged import is missing/stale, and skip rebuild on unchanged revisits.
|
||||||
|
|
||||||
|
Kept manual `generateForCurrentMedia` support by generating a one-off per-media zip from the stored snapshot, but removed the old per-media zip cache path from auto-sync state.
|
||||||
|
|
||||||
|
Updated config/help text to describe usage-based merged retention and mark legacy TTL/eviction knobs as ignored.
|
||||||
|
<!-- SECTION:NOTES:END -->
|
||||||
|
|
||||||
|
## Final Summary
|
||||||
|
|
||||||
|
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
|
||||||
|
Implemented MRU-based merged character dictionary sync. Auto-sync now stores per-media normalized snapshots locally, rebuilds a single merged Yomitan dictionary when the retained anime set/order changes, and keeps `maxLoaded` as the cap on most-recently-used anime included in that merged import. Unchanged revisits no longer rebuild/import the dictionary.
|
||||||
|
|
||||||
|
Validation: `bun test src/main/runtime/character-dictionary-auto-sync.test.ts src/main/character-dictionary-runtime.test.ts`, `bun run tsc --noEmit`.
|
||||||
|
<!-- SECTION:FINAL_SUMMARY:END -->
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
---
|
||||||
|
id: TASK-91
|
||||||
|
title: >-
|
||||||
|
Keep unsupported subtitle characters visible while excluding them from token
|
||||||
|
hover
|
||||||
|
status: Done
|
||||||
|
assignee: []
|
||||||
|
created_date: '2026-03-06 08:29'
|
||||||
|
updated_date: '2026-03-06 08:32'
|
||||||
|
labels:
|
||||||
|
- bug
|
||||||
|
- tokenizer
|
||||||
|
- renderer
|
||||||
|
dependencies: []
|
||||||
|
priority: medium
|
||||||
|
---
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||||
|
Tokenizer/rendering bug: symbols and other unsupported characters with no lookup result are removed from the rendered subtitle line after tokenization, causing the displayed line to diverge from the source subtitle text. Update rendering so unsupported spans remain visible as plain text but are not tokenized/hoverable, and add regression coverage.
|
||||||
|
<!-- SECTION:DESCRIPTION:END -->
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
<!-- AC:BEGIN -->
|
||||||
|
- [x] #1 Subtitle rendering preserves unsupported symbols and special characters from the original line.
|
||||||
|
- [x] #2 Unsupported symbols and special characters do not create interactive token hover targets.
|
||||||
|
- [x] #3 Regression tests cover a mixed line containing tokenizable text plus unsupported characters.
|
||||||
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Final Summary
|
||||||
|
|
||||||
|
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
|
||||||
|
Updated tokenized subtitle rendering to preserve unsupported punctuation and symbol spans as plain text while keeping only matched tokens interactive. Added renderer and alignment regression coverage for mixed lines so hover offsets stay correct after non-tokenizable characters remain visible.
|
||||||
|
<!-- SECTION:FINAL_SUMMARY:END -->
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
---
|
||||||
|
id: TASK-92
|
||||||
|
title: Fix merged Yomitan headword selection for katakana subtitle tokens
|
||||||
|
status: Done
|
||||||
|
assignee: []
|
||||||
|
created_date: '2026-03-06 08:43'
|
||||||
|
updated_date: '2026-03-06 08:43'
|
||||||
|
labels:
|
||||||
|
- bug
|
||||||
|
- tokenizer
|
||||||
|
- yomitan
|
||||||
|
dependencies: []
|
||||||
|
priority: medium
|
||||||
|
---
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||||
|
Tokenizer/parser-selection bug: when a scanning-parser line is merged from multiple segments, the merged token currently keeps the first segment headword even if a later segment provides the full dictionary-backed term. This truncates katakana names such as バニール to バニ in the lookup payload and prevents correct dictionary matching. Also align kana classification so the prolonged sound mark is treated as kana in tokenizer heuristics.
|
||||||
|
<!-- SECTION:DESCRIPTION:END -->
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
<!-- AC:BEGIN -->
|
||||||
|
- [x] #1 Merged scanning-parser tokens prefer a full cross-segment headword when one segment expands to the full term.
|
||||||
|
- [x] #2 Standalone later segment headwords do not override the primary token headword in normal content-word + auxiliary merges.
|
||||||
|
- [x] #3 Katakana prolonged sound mark is treated as kana in tokenizer heuristics.
|
||||||
|
- [x] #4 Regression tests cover the merged katakana headword case.
|
||||||
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Final Summary
|
||||||
|
|
||||||
|
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
|
||||||
|
Adjusted merged scanning-parser headword selection so later segments only override the first headword when they provide an expanded cross-segment dictionary term, which fixes truncated katakana lookups like バニール -> バニ. Also updated kana classification to include the katakana prolonged sound mark and added regression coverage for both the expanded-headword case and the normal content-word-plus-auxiliary case.
|
||||||
|
<!-- SECTION:FINAL_SUMMARY:END -->
|
||||||
92
docs/plans/2026-03-06-merged-character-dictionary.md
Normal file
92
docs/plans/2026-03-06-merged-character-dictionary.md
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
# Merged Character Dictionary Implementation Plan
|
||||||
|
|
||||||
|
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||||
|
|
||||||
|
**Goal:** Replace per-anime character dictionary imports with one merged Yomitan dictionary driven by MRU usage retention.
|
||||||
|
|
||||||
|
**Architecture:** Persist normalized per-media character dictionary snapshots locally, maintain MRU retained media ids in auto-sync state, and rebuild a single merged Yomitan zip only when the retained set changes. Keep external AniList fetches only for media without a local snapshot; normal revisits stay local.
|
||||||
|
|
||||||
|
**Tech Stack:** TypeScript, Bun test, Node fs/path, existing Yomitan zip generation helpers.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 1: Lock in merged auto-sync behavior
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `src/main/runtime/character-dictionary-auto-sync.test.ts`
|
||||||
|
- Test: `src/main/runtime/character-dictionary-auto-sync.test.ts`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Add tests for:
|
||||||
|
- single merged dictionary title/import replacing per-media imports
|
||||||
|
- MRU reorder causing rebuild only when order changes
|
||||||
|
- unchanged revisit skipping rebuild/import
|
||||||
|
- capped retained set evicting least-recently-used media
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run: `bun test src/main/runtime/character-dictionary-auto-sync.test.ts`
|
||||||
|
Expected: FAIL on old per-media import assumptions / missing merged behavior
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Update auto-sync runtime to track retained media ids and merged revision/hash, call merged zip builder, and replace one imported Yomitan dictionary.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run: `bun test src/main/runtime/character-dictionary-auto-sync.test.ts`
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
### Task 2: Add snapshot + merged-zip runtime support
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `src/main/character-dictionary-runtime.ts`
|
||||||
|
- Modify: `src/main/character-dictionary-runtime.test.ts`
|
||||||
|
- Test: `src/main/character-dictionary-runtime.test.ts`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Add tests for:
|
||||||
|
- saving/loading normalized per-media snapshots without per-media zip cache
|
||||||
|
- building merged zip from retained media snapshots with stable dictionary title
|
||||||
|
- preserving images/terms from multiple media in merged output
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run: `bun test src/main/character-dictionary-runtime.test.ts`
|
||||||
|
Expected: FAIL because snapshot/merged APIs do not exist yet
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Refactor dictionary runtime to expose snapshot generation/loading and merged zip building from stored metadata/images.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run: `bun test src/main/character-dictionary-runtime.test.ts`
|
||||||
|
Expected: PASS
|
||||||
|
|
||||||
|
### Task 3: Wire app/runtime config and docs
|
||||||
|
|
||||||
|
**Files:**
|
||||||
|
- Modify: `src/main.ts`
|
||||||
|
- Modify: `src/config/definitions/options-integrations.ts`
|
||||||
|
- Modify: `README.md`
|
||||||
|
|
||||||
|
**Step 1: Write the failing test**
|
||||||
|
|
||||||
|
Add or update tests if needed for new dependency wiring / docs-adjacent config description expectations.
|
||||||
|
|
||||||
|
**Step 2: Run test to verify it fails**
|
||||||
|
|
||||||
|
Run: `bun test src/main/runtime/character-dictionary-auto-sync.test.ts src/main/character-dictionary-runtime.test.ts`
|
||||||
|
Expected: FAIL until wiring matches merged flow
|
||||||
|
|
||||||
|
**Step 3: Write minimal implementation**
|
||||||
|
|
||||||
|
Swap app wiring to new snapshot + merged build API, update config/docs text from TTL semantics to usage-based merged retention.
|
||||||
|
|
||||||
|
**Step 4: Run test to verify it passes**
|
||||||
|
|
||||||
|
Run: `bun test src/main/runtime/character-dictionary-auto-sync.test.ts src/main/character-dictionary-runtime.test.ts && bun run tsc --noEmit`
|
||||||
|
Expected: PASS
|
||||||
@@ -146,20 +146,23 @@ export function buildIntegrationConfigOptionRegistry(
|
|||||||
path: 'anilist.characterDictionary.refreshTtlHours',
|
path: 'anilist.characterDictionary.refreshTtlHours',
|
||||||
kind: 'number',
|
kind: 'number',
|
||||||
defaultValue: defaultConfig.anilist.characterDictionary.refreshTtlHours,
|
defaultValue: defaultConfig.anilist.characterDictionary.refreshTtlHours,
|
||||||
description: 'TTL in hours before refreshing the currently watched media dictionary.',
|
description:
|
||||||
|
'Legacy setting; merged character dictionary retention is now usage-based and this value is ignored.',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
path: 'anilist.characterDictionary.maxLoaded',
|
path: 'anilist.characterDictionary.maxLoaded',
|
||||||
kind: 'number',
|
kind: 'number',
|
||||||
defaultValue: defaultConfig.anilist.characterDictionary.maxLoaded,
|
defaultValue: defaultConfig.anilist.characterDictionary.maxLoaded,
|
||||||
description: 'Maximum number of auto-synced AniList dictionaries kept loaded at once.',
|
description:
|
||||||
|
'Maximum number of most-recently-used anime snapshots included in the merged Yomitan character dictionary.',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
path: 'anilist.characterDictionary.evictionPolicy',
|
path: 'anilist.characterDictionary.evictionPolicy',
|
||||||
kind: 'enum',
|
kind: 'enum',
|
||||||
enumValues: ['disable', 'delete'],
|
enumValues: ['disable', 'delete'],
|
||||||
defaultValue: defaultConfig.anilist.characterDictionary.evictionPolicy,
|
defaultValue: defaultConfig.anilist.characterDictionary.evictionPolicy,
|
||||||
description: 'Eviction behavior when maxLoaded is exceeded.',
|
description:
|
||||||
|
'Legacy setting; merged character dictionary eviction is usage-based and this value is ignored.',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
path: 'anilist.characterDictionary.profileScope',
|
path: 'anilist.characterDictionary.profileScope',
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ const INTEGRATION_TEMPLATE_SECTIONS: ConfigTemplateSection[] = [
|
|||||||
title: 'Anilist',
|
title: 'Anilist',
|
||||||
description: [
|
description: [
|
||||||
'Anilist API credentials and update behavior.',
|
'Anilist API credentials and update behavior.',
|
||||||
'Includes optional auto-sync for per-media character dictionaries in bundled Yomitan.',
|
'Includes optional auto-sync for a merged MRU-based character dictionary in bundled Yomitan.',
|
||||||
'Character dictionaries are keyed by AniList media ID (no season/franchise merge).',
|
'Character dictionaries are keyed by AniList media ID (no season/franchise merge).',
|
||||||
],
|
],
|
||||||
key: 'anilist',
|
key: 'anilist',
|
||||||
|
|||||||
@@ -263,6 +263,7 @@ function isKanaChar(char: string): boolean {
|
|||||||
return (
|
return (
|
||||||
(code >= 0x3041 && code <= 0x3096) ||
|
(code >= 0x3041 && code <= 0x3096) ||
|
||||||
(code >= 0x309b && code <= 0x309f) ||
|
(code >= 0x309b && code <= 0x309f) ||
|
||||||
|
code === 0x30fc ||
|
||||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||||
(code >= 0x30fd && code <= 0x30ff)
|
(code >= 0x30fd && code <= 0x30ff)
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -231,6 +231,7 @@ function isKanaChar(char: string): boolean {
|
|||||||
return (
|
return (
|
||||||
(code >= 0x3041 && code <= 0x3096) ||
|
(code >= 0x3041 && code <= 0x3096) ||
|
||||||
(code >= 0x309b && code <= 0x309f) ||
|
(code >= 0x309b && code <= 0x309f) ||
|
||||||
|
code === 0x30fc ||
|
||||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||||
(code >= 0x30fd && code <= 0x30ff)
|
(code >= 0x30fd && code <= 0x30ff)
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -127,3 +127,88 @@ test('drops scanning parser tokens which have no dictionary headword', () => {
|
|||||||
],
|
],
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('prefers the longest dictionary headword across merged segments', () => {
|
||||||
|
const parseResults = [
|
||||||
|
makeParseItem('scanning-parser', [
|
||||||
|
[
|
||||||
|
{ text: 'バニ', reading: 'ばに', headword: 'バニ' },
|
||||||
|
{ text: 'ール', reading: 'ーる', headword: 'バニール' },
|
||||||
|
],
|
||||||
|
]),
|
||||||
|
];
|
||||||
|
|
||||||
|
const tokens = selectYomitanParseTokens(parseResults, () => false, 'headword');
|
||||||
|
assert.deepEqual(
|
||||||
|
tokens?.map((token) => ({
|
||||||
|
surface: token.surface,
|
||||||
|
reading: token.reading,
|
||||||
|
headword: token.headword,
|
||||||
|
})),
|
||||||
|
[
|
||||||
|
{
|
||||||
|
surface: 'バニール',
|
||||||
|
reading: 'ばにーる',
|
||||||
|
headword: 'バニール',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('keeps the first headword when later segments are standalone words', () => {
|
||||||
|
const parseResults = [
|
||||||
|
makeParseItem('scanning-parser', [
|
||||||
|
[
|
||||||
|
{ text: '猫', reading: 'ねこ', headword: '猫' },
|
||||||
|
{ text: 'です', reading: 'です', headword: 'です' },
|
||||||
|
],
|
||||||
|
]),
|
||||||
|
];
|
||||||
|
|
||||||
|
const tokens = selectYomitanParseTokens(parseResults, () => false, 'headword');
|
||||||
|
assert.deepEqual(
|
||||||
|
tokens?.map((token) => ({
|
||||||
|
surface: token.surface,
|
||||||
|
reading: token.reading,
|
||||||
|
headword: token.headword,
|
||||||
|
})),
|
||||||
|
[
|
||||||
|
{
|
||||||
|
surface: '猫です',
|
||||||
|
reading: 'ねこです',
|
||||||
|
headword: '猫',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('merges trailing katakana continuation without headword into previous token', () => {
|
||||||
|
const parseResults = [
|
||||||
|
makeParseItem('scanning-parser', [
|
||||||
|
[{ text: 'カズ', reading: 'かず', headword: 'カズマ' }],
|
||||||
|
[{ text: 'マ', reading: 'ま' }],
|
||||||
|
[{ text: '魔王軍', reading: 'まおうぐん', headword: '魔王軍' }],
|
||||||
|
]),
|
||||||
|
];
|
||||||
|
|
||||||
|
const tokens = selectYomitanParseTokens(parseResults, () => false, 'headword');
|
||||||
|
assert.deepEqual(
|
||||||
|
tokens?.map((token) => ({
|
||||||
|
surface: token.surface,
|
||||||
|
reading: token.reading,
|
||||||
|
headword: token.headword,
|
||||||
|
})),
|
||||||
|
[
|
||||||
|
{
|
||||||
|
surface: 'カズマ',
|
||||||
|
reading: 'かずま',
|
||||||
|
headword: 'カズマ',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
surface: '魔王軍',
|
||||||
|
reading: 'まおうぐん',
|
||||||
|
headword: '魔王軍',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ function isKanaChar(char: string): boolean {
|
|||||||
return (
|
return (
|
||||||
(code >= 0x3041 && code <= 0x3096) ||
|
(code >= 0x3041 && code <= 0x3096) ||
|
||||||
(code >= 0x309b && code <= 0x309f) ||
|
(code >= 0x309b && code <= 0x309f) ||
|
||||||
|
code === 0x30fc ||
|
||||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||||
(code >= 0x30fd && code <= 0x30ff)
|
(code >= 0x30fd && code <= 0x30ff)
|
||||||
);
|
);
|
||||||
@@ -111,6 +112,51 @@ function extractYomitanHeadword(segment: YomitanParseSegment): string {
|
|||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function selectMergedHeadword(
|
||||||
|
firstHeadword: string,
|
||||||
|
expandedHeadwords: string[],
|
||||||
|
surface: string,
|
||||||
|
): string {
|
||||||
|
if (expandedHeadwords.length > 0) {
|
||||||
|
const exactSurfaceMatch = expandedHeadwords.find((headword) => headword === surface);
|
||||||
|
if (exactSurfaceMatch) {
|
||||||
|
return exactSurfaceMatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
return expandedHeadwords.reduce((best, current) => {
|
||||||
|
if (current.length !== best.length) {
|
||||||
|
return current.length > best.length ? current : best;
|
||||||
|
}
|
||||||
|
return best;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!firstHeadword) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
return firstHeadword;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isKanaOnlyText(text: string): boolean {
|
||||||
|
return text.length > 0 && Array.from(text).every((char) => isKanaChar(char));
|
||||||
|
}
|
||||||
|
|
||||||
|
function shouldMergeKanaContinuation(
|
||||||
|
previousToken: MergedToken | undefined,
|
||||||
|
continuationSurface: string,
|
||||||
|
): previousToken is MergedToken {
|
||||||
|
if (!previousToken || !continuationSurface || !isKanaOnlyText(continuationSurface)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!previousToken.headword || previousToken.headword.length <= previousToken.surface.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const appendedSurface = previousToken.surface + continuationSurface;
|
||||||
|
return previousToken.headword.startsWith(appendedSurface);
|
||||||
|
}
|
||||||
|
|
||||||
export function mapYomitanParseResultItemToMergedTokens(
|
export function mapYomitanParseResultItemToMergedTokens(
|
||||||
parseResult: YomitanParseResultItem,
|
parseResult: YomitanParseResultItem,
|
||||||
isKnownWord: (text: string) => boolean,
|
isKnownWord: (text: string) => boolean,
|
||||||
@@ -140,7 +186,8 @@ export function mapYomitanParseResultItemToMergedTokens(
|
|||||||
|
|
||||||
let combinedSurface = '';
|
let combinedSurface = '';
|
||||||
let combinedReading = '';
|
let combinedReading = '';
|
||||||
let combinedHeadword = '';
|
let firstHeadword = '';
|
||||||
|
const expandedHeadwords: string[] = [];
|
||||||
|
|
||||||
for (const segment of line) {
|
for (const segment of line) {
|
||||||
const segmentText = segment.text;
|
const segmentText = segment.text;
|
||||||
@@ -152,8 +199,14 @@ export function mapYomitanParseResultItemToMergedTokens(
|
|||||||
if (typeof segment.reading === 'string') {
|
if (typeof segment.reading === 'string') {
|
||||||
combinedReading += segment.reading;
|
combinedReading += segment.reading;
|
||||||
}
|
}
|
||||||
if (!combinedHeadword) {
|
const segmentHeadword = extractYomitanHeadword(segment);
|
||||||
combinedHeadword = extractYomitanHeadword(segment);
|
if (segmentHeadword) {
|
||||||
|
if (!firstHeadword) {
|
||||||
|
firstHeadword = segmentHeadword;
|
||||||
|
}
|
||||||
|
if (segmentHeadword.length > segmentText.length) {
|
||||||
|
expandedHeadwords.push(segmentHeadword);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -164,7 +217,20 @@ export function mapYomitanParseResultItemToMergedTokens(
|
|||||||
const start = charOffset;
|
const start = charOffset;
|
||||||
const end = start + combinedSurface.length;
|
const end = start + combinedSurface.length;
|
||||||
charOffset = end;
|
charOffset = end;
|
||||||
|
const combinedHeadword = selectMergedHeadword(
|
||||||
|
firstHeadword,
|
||||||
|
expandedHeadwords,
|
||||||
|
combinedSurface,
|
||||||
|
);
|
||||||
if (!combinedHeadword) {
|
if (!combinedHeadword) {
|
||||||
|
const previousToken = tokens[tokens.length - 1];
|
||||||
|
if (shouldMergeKanaContinuation(previousToken, combinedSurface)) {
|
||||||
|
previousToken.surface += combinedSurface;
|
||||||
|
previousToken.reading += combinedReading;
|
||||||
|
previousToken.endPos = end;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// No dictionary-backed headword for this merged unit; skip it entirely so
|
// No dictionary-backed headword for this merged unit; skip it entirely so
|
||||||
// downstream keyboard/frequency/JLPT flows only operate on lookup-backed tokens.
|
// downstream keyboard/frequency/JLPT flows only operate on lookup-backed tokens.
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
27
src/main.ts
27
src/main.ts
@@ -361,7 +361,6 @@ import {
|
|||||||
registerGlobalShortcuts as registerGlobalShortcutsCore,
|
registerGlobalShortcuts as registerGlobalShortcutsCore,
|
||||||
replayCurrentSubtitleRuntime,
|
replayCurrentSubtitleRuntime,
|
||||||
resolveJellyfinPlaybackPlanRuntime,
|
resolveJellyfinPlaybackPlanRuntime,
|
||||||
removeYomitanDictionarySettings,
|
|
||||||
runStartupBootstrapRuntime,
|
runStartupBootstrapRuntime,
|
||||||
saveSubtitlePosition as saveSubtitlePositionCore,
|
saveSubtitlePosition as saveSubtitlePositionCore,
|
||||||
clearYomitanParserCachesForWindow,
|
clearYomitanParserCachesForWindow,
|
||||||
@@ -1236,9 +1235,16 @@ const characterDictionaryRuntime = createCharacterDictionaryRuntimeService({
|
|||||||
|
|
||||||
const characterDictionaryAutoSyncRuntime = createCharacterDictionaryAutoSyncRuntimeService({
|
const characterDictionaryAutoSyncRuntime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||||
userDataPath: USER_DATA_PATH,
|
userDataPath: USER_DATA_PATH,
|
||||||
getConfig: () => getResolvedConfig().anilist.characterDictionary,
|
getConfig: () => {
|
||||||
generateCharacterDictionary: (options) =>
|
const config = getResolvedConfig().anilist.characterDictionary;
|
||||||
characterDictionaryRuntime.generateForCurrentMedia(undefined, options),
|
return {
|
||||||
|
enabled: config.enabled,
|
||||||
|
maxLoaded: config.maxLoaded,
|
||||||
|
profileScope: config.profileScope,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
getOrCreateCurrentSnapshot: () => characterDictionaryRuntime.getOrCreateCurrentSnapshot(),
|
||||||
|
buildMergedDictionary: (mediaIds) => characterDictionaryRuntime.buildMergedDictionary(mediaIds),
|
||||||
getYomitanDictionaryInfo: async () => {
|
getYomitanDictionaryInfo: async () => {
|
||||||
await ensureYomitanExtensionLoaded();
|
await ensureYomitanExtensionLoaded();
|
||||||
return await getYomitanDictionaryInfo(getYomitanParserRuntimeDeps(), {
|
return await getYomitanDictionaryInfo(getYomitanParserRuntimeDeps(), {
|
||||||
@@ -1272,19 +1278,6 @@ const characterDictionaryAutoSyncRuntime = createCharacterDictionaryAutoSyncRunt
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
removeYomitanDictionarySettings: async (dictionaryTitle, profileScope, mode) => {
|
|
||||||
await ensureYomitanExtensionLoaded();
|
|
||||||
return await removeYomitanDictionarySettings(
|
|
||||||
dictionaryTitle,
|
|
||||||
profileScope,
|
|
||||||
mode,
|
|
||||||
getYomitanParserRuntimeDeps(),
|
|
||||||
{
|
|
||||||
error: (message, ...args) => logger.error(message, ...args),
|
|
||||||
info: (message, ...args) => logger.info(message, ...args),
|
|
||||||
},
|
|
||||||
);
|
|
||||||
},
|
|
||||||
now: () => Date.now(),
|
now: () => Date.now(),
|
||||||
schedule: (fn, delayMs) => setTimeout(fn, delayMs),
|
schedule: (fn, delayMs) => setTimeout(fn, delayMs),
|
||||||
clearSchedule: (timer) => clearTimeout(timer),
|
clearSchedule: (timer) => clearTimeout(timer),
|
||||||
|
|||||||
@@ -178,7 +178,7 @@ test('generateForCurrentMedia emits structured-content glossary so image stays w
|
|||||||
|
|
||||||
const image = entry.content[0] as Record<string, unknown>;
|
const image = entry.content[0] as Record<string, unknown>;
|
||||||
assert.equal(image.tag, 'img');
|
assert.equal(image.tag, 'img');
|
||||||
assert.equal(image.path, 'img/c123.png');
|
assert.equal(image.path, 'img/m130298-c123.png');
|
||||||
assert.equal(image.sizeUnits, 'em');
|
assert.equal(image.sizeUnits, 'em');
|
||||||
|
|
||||||
const descriptionLine = entry.content[5];
|
const descriptionLine = entry.content[5];
|
||||||
@@ -196,37 +196,10 @@ test('generateForCurrentMedia emits structured-content glossary so image stays w
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
test('generateForCurrentMedia regenerates dictionary when cached format version is stale', async () => {
|
test('getOrCreateCurrentSnapshot persists and reuses normalized snapshot data', async () => {
|
||||||
const userDataPath = makeTempDir();
|
const userDataPath = makeTempDir();
|
||||||
const dictionariesDir = path.join(userDataPath, 'character-dictionaries');
|
|
||||||
fs.mkdirSync(dictionariesDir, { recursive: true });
|
|
||||||
|
|
||||||
const staleZipPath = path.join(dictionariesDir, 'anilist-130298.zip');
|
|
||||||
fs.writeFileSync(staleZipPath, Buffer.from('not-a-real-zip'));
|
|
||||||
fs.writeFileSync(
|
|
||||||
path.join(dictionariesDir, 'cache.json'),
|
|
||||||
JSON.stringify(
|
|
||||||
{
|
|
||||||
anilistById: {
|
|
||||||
'130298': {
|
|
||||||
mediaId: 130298,
|
|
||||||
mediaTitle: 'The Eminence in Shadow',
|
|
||||||
entryCount: 1,
|
|
||||||
zipPath: staleZipPath,
|
|
||||||
updatedAt: 1_700_000_000_000,
|
|
||||||
formatVersion: 6,
|
|
||||||
dictionaryTitle: 'SubMiner Character Dictionary (AniList 130298)',
|
|
||||||
revision: 'stale-revision',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
null,
|
|
||||||
2,
|
|
||||||
),
|
|
||||||
'utf8',
|
|
||||||
);
|
|
||||||
|
|
||||||
const originalFetch = globalThis.fetch;
|
const originalFetch = globalThis.fetch;
|
||||||
|
let searchQueryCount = 0;
|
||||||
let characterQueryCount = 0;
|
let characterQueryCount = 0;
|
||||||
|
|
||||||
globalThis.fetch = (async (input: string | URL | Request, init?: RequestInit) => {
|
globalThis.fetch = (async (input: string | URL | Request, init?: RequestInit) => {
|
||||||
@@ -237,6 +210,7 @@ test('generateForCurrentMedia regenerates dictionary when cached format version
|
|||||||
};
|
};
|
||||||
|
|
||||||
if (body.query?.includes('Page(perPage: 10)')) {
|
if (body.query?.includes('Page(perPage: 10)')) {
|
||||||
|
searchQueryCount += 1;
|
||||||
return new Response(
|
return new Response(
|
||||||
JSON.stringify({
|
JSON.stringify({
|
||||||
data: {
|
data: {
|
||||||
@@ -328,17 +302,227 @@ test('generateForCurrentMedia regenerates dictionary when cached format version
|
|||||||
now: () => 1_700_000_000_100,
|
now: () => 1_700_000_000_100,
|
||||||
});
|
});
|
||||||
|
|
||||||
const result = await runtime.generateForCurrentMedia(undefined, {
|
const first = await runtime.getOrCreateCurrentSnapshot();
|
||||||
refreshTtlMs: 60 * 60 * 1000,
|
const second = await runtime.getOrCreateCurrentSnapshot();
|
||||||
});
|
|
||||||
assert.equal(result.fromCache, false);
|
|
||||||
assert.equal(characterQueryCount, 1);
|
|
||||||
|
|
||||||
const termBank = JSON.parse(readStoredZipEntry(result.zipPath, 'term_bank_1.json').toString('utf8')) as Array<
|
assert.equal(first.fromCache, false);
|
||||||
|
assert.equal(second.fromCache, true);
|
||||||
|
assert.equal(searchQueryCount, 2);
|
||||||
|
assert.equal(characterQueryCount, 1);
|
||||||
|
assert.equal(
|
||||||
|
fs.existsSync(path.join(userDataPath, 'character-dictionaries', 'cache.json')),
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
|
||||||
|
const snapshotPath = path.join(
|
||||||
|
userDataPath,
|
||||||
|
'character-dictionaries',
|
||||||
|
'snapshots',
|
||||||
|
'anilist-130298.json',
|
||||||
|
);
|
||||||
|
const snapshot = JSON.parse(fs.readFileSync(snapshotPath, 'utf8')) as {
|
||||||
|
mediaId: number;
|
||||||
|
entryCount: number;
|
||||||
|
termEntries: Array<
|
||||||
[string, string, string, string, number, Array<string | Record<string, unknown>>, number, string]
|
[string, string, string, string, number, Array<string | Record<string, unknown>>, number, string]
|
||||||
>;
|
>;
|
||||||
const alpha = termBank.find(([term]) => term === 'アルファ');
|
};
|
||||||
|
assert.equal(snapshot.mediaId, 130298);
|
||||||
|
assert.equal(snapshot.entryCount > 0, true);
|
||||||
|
const alpha = snapshot.termEntries.find(([term]) => term === 'アルファ');
|
||||||
assert.ok(alpha);
|
assert.ok(alpha);
|
||||||
|
} finally {
|
||||||
|
globalThis.fetch = originalFetch;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('buildMergedDictionary combines stored snapshots into one stable dictionary', async () => {
|
||||||
|
const userDataPath = makeTempDir();
|
||||||
|
const originalFetch = globalThis.fetch;
|
||||||
|
const current = { title: 'The Eminence in Shadow', episode: 5 };
|
||||||
|
|
||||||
|
globalThis.fetch = (async (input: string | URL | Request, init?: RequestInit) => {
|
||||||
|
const url = typeof input === 'string' ? input : input instanceof URL ? input.href : input.url;
|
||||||
|
if (url === GRAPHQL_URL) {
|
||||||
|
const body = JSON.parse(String(init?.body ?? '{}')) as {
|
||||||
|
query?: string;
|
||||||
|
variables?: Record<string, unknown>;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (body.query?.includes('Page(perPage: 10)')) {
|
||||||
|
if (body.variables?.search === 'The Eminence in Shadow') {
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
data: {
|
||||||
|
Page: {
|
||||||
|
media: [
|
||||||
|
{
|
||||||
|
id: 130298,
|
||||||
|
episodes: 20,
|
||||||
|
title: {
|
||||||
|
romaji: 'Kage no Jitsuryokusha ni Naritakute!',
|
||||||
|
english: 'The Eminence in Shadow',
|
||||||
|
native: '陰の実力者になりたくて!',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
status: 200,
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
data: {
|
||||||
|
Page: {
|
||||||
|
media: [
|
||||||
|
{
|
||||||
|
id: 21,
|
||||||
|
episodes: 28,
|
||||||
|
title: {
|
||||||
|
romaji: 'Sousou no Frieren',
|
||||||
|
english: 'Frieren: Beyond Journey’s End',
|
||||||
|
native: '葬送のフリーレン',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
status: 200,
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (body.query?.includes('characters(page: $page')) {
|
||||||
|
const mediaId = Number(body.variables?.id);
|
||||||
|
if (mediaId === 130298) {
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
data: {
|
||||||
|
Media: {
|
||||||
|
title: {
|
||||||
|
english: 'The Eminence in Shadow',
|
||||||
|
},
|
||||||
|
characters: {
|
||||||
|
pageInfo: { hasNextPage: false },
|
||||||
|
edges: [
|
||||||
|
{
|
||||||
|
role: 'MAIN',
|
||||||
|
node: {
|
||||||
|
id: 111,
|
||||||
|
description: 'Leader of Shadow Garden.',
|
||||||
|
image: {
|
||||||
|
large: 'https://example.com/alpha.png',
|
||||||
|
medium: null,
|
||||||
|
},
|
||||||
|
name: {
|
||||||
|
full: 'Alpha',
|
||||||
|
native: 'アルファ',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
status: 200,
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
data: {
|
||||||
|
Media: {
|
||||||
|
title: {
|
||||||
|
english: 'Frieren: Beyond Journey’s End',
|
||||||
|
},
|
||||||
|
characters: {
|
||||||
|
pageInfo: { hasNextPage: false },
|
||||||
|
edges: [
|
||||||
|
{
|
||||||
|
role: 'MAIN',
|
||||||
|
node: {
|
||||||
|
id: 222,
|
||||||
|
description: 'Elven mage.',
|
||||||
|
image: {
|
||||||
|
large: 'https://example.com/frieren.png',
|
||||||
|
medium: null,
|
||||||
|
},
|
||||||
|
name: {
|
||||||
|
full: 'Frieren',
|
||||||
|
native: 'フリーレン',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
status: 200,
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (url === 'https://example.com/alpha.png' || url === 'https://example.com/frieren.png') {
|
||||||
|
return new Response(PNG_1X1, {
|
||||||
|
status: 200,
|
||||||
|
headers: { 'content-type': 'image/png' },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Unexpected fetch URL: ${url}`);
|
||||||
|
}) as typeof globalThis.fetch;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const runtime = createCharacterDictionaryRuntimeService({
|
||||||
|
userDataPath,
|
||||||
|
getCurrentMediaPath: () => '/tmp/current.mkv',
|
||||||
|
getCurrentMediaTitle: () => current.title,
|
||||||
|
resolveMediaPathForJimaku: (mediaPath) => mediaPath,
|
||||||
|
guessAnilistMediaInfo: async () => ({
|
||||||
|
title: current.title,
|
||||||
|
episode: current.episode,
|
||||||
|
source: 'fallback',
|
||||||
|
}),
|
||||||
|
now: () => 1_700_000_000_100,
|
||||||
|
});
|
||||||
|
|
||||||
|
await runtime.getOrCreateCurrentSnapshot();
|
||||||
|
current.title = 'Frieren: Beyond Journey’s End';
|
||||||
|
current.episode = 1;
|
||||||
|
await runtime.getOrCreateCurrentSnapshot();
|
||||||
|
|
||||||
|
const merged = await runtime.buildMergedDictionary([21, 130298]);
|
||||||
|
const index = JSON.parse(readStoredZipEntry(merged.zipPath, 'index.json').toString('utf8')) as {
|
||||||
|
title: string;
|
||||||
|
};
|
||||||
|
const termBank = JSON.parse(readStoredZipEntry(merged.zipPath, 'term_bank_1.json').toString('utf8')) as Array<
|
||||||
|
[string, string, string, string, number, Array<string | Record<string, unknown>>, number, string]
|
||||||
|
>;
|
||||||
|
const frieren = termBank.find(([term]) => term === 'フリーレン');
|
||||||
|
const alpha = termBank.find(([term]) => term === 'アルファ');
|
||||||
|
|
||||||
|
assert.equal(index.title, 'SubMiner Character Dictionary');
|
||||||
|
assert.equal(merged.entryCount >= 2, true);
|
||||||
|
assert.ok(frieren);
|
||||||
|
assert.ok(alpha);
|
||||||
|
assert.equal((frieren[5][0] as { type?: string }).type, 'structured-content');
|
||||||
assert.equal((alpha[5][0] as { type?: string }).type, 'structured-content');
|
assert.equal((alpha[5][0] as { type?: string }).type, 'structured-content');
|
||||||
} finally {
|
} finally {
|
||||||
globalThis.fetch = originalFetch;
|
globalThis.fetch = originalFetch;
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as os from 'os';
|
import * as os from 'os';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
|
import { createHash } from 'node:crypto';
|
||||||
import type { AnilistMediaGuess } from '../core/services/anilist/anilist-updater';
|
import type { AnilistMediaGuess } from '../core/services/anilist/anilist-updater';
|
||||||
import { hasVideoExtension } from '../shared/video-extensions';
|
import { hasVideoExtension } from '../shared/video-extensions';
|
||||||
|
|
||||||
@@ -26,22 +27,35 @@ const HONORIFIC_SUFFIXES = [
|
|||||||
] as const;
|
] as const;
|
||||||
type CharacterDictionaryRole = 'main' | 'primary' | 'side' | 'appears';
|
type CharacterDictionaryRole = 'main' | 'primary' | 'side' | 'appears';
|
||||||
|
|
||||||
type CharacterDictionaryCacheEntry = {
|
type CharacterDictionaryGlossaryEntry = string | Record<string, unknown>;
|
||||||
|
type CharacterDictionaryTermEntry = [
|
||||||
|
string,
|
||||||
|
string,
|
||||||
|
string,
|
||||||
|
string,
|
||||||
|
number,
|
||||||
|
CharacterDictionaryGlossaryEntry[],
|
||||||
|
number,
|
||||||
|
string,
|
||||||
|
];
|
||||||
|
|
||||||
|
type CharacterDictionarySnapshotImage = {
|
||||||
|
path: string;
|
||||||
|
dataBase64: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type CharacterDictionarySnapshot = {
|
||||||
|
formatVersion: number;
|
||||||
mediaId: number;
|
mediaId: number;
|
||||||
mediaTitle: string;
|
mediaTitle: string;
|
||||||
entryCount: number;
|
entryCount: number;
|
||||||
zipPath: string;
|
|
||||||
updatedAt: number;
|
updatedAt: number;
|
||||||
formatVersion?: number;
|
termEntries: CharacterDictionaryTermEntry[];
|
||||||
dictionaryTitle?: string;
|
images: CharacterDictionarySnapshotImage[];
|
||||||
revision?: string;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
type CharacterDictionaryCacheFile = {
|
const CHARACTER_DICTIONARY_FORMAT_VERSION = 9;
|
||||||
anilistById: Record<string, CharacterDictionaryCacheEntry>;
|
const CHARACTER_DICTIONARY_MERGED_TITLE = 'SubMiner Character Dictionary';
|
||||||
};
|
|
||||||
|
|
||||||
const CHARACTER_DICTIONARY_FORMAT_VERSION = 8;
|
|
||||||
|
|
||||||
type AniListSearchResponse = {
|
type AniListSearchResponse = {
|
||||||
Page?: {
|
Page?: {
|
||||||
@@ -117,6 +131,21 @@ export type CharacterDictionaryGenerateOptions = {
|
|||||||
refreshTtlMs?: number;
|
refreshTtlMs?: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type CharacterDictionarySnapshotResult = {
|
||||||
|
mediaId: number;
|
||||||
|
mediaTitle: string;
|
||||||
|
entryCount: number;
|
||||||
|
fromCache: boolean;
|
||||||
|
updatedAt: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type MergedCharacterDictionaryBuildResult = {
|
||||||
|
zipPath: string;
|
||||||
|
revision: string;
|
||||||
|
dictionaryTitle: string;
|
||||||
|
entryCount: number;
|
||||||
|
};
|
||||||
|
|
||||||
export interface CharacterDictionaryRuntimeDeps {
|
export interface CharacterDictionaryRuntimeDeps {
|
||||||
userDataPath: string;
|
userDataPath: string;
|
||||||
getCurrentMediaPath: () => string | null;
|
getCurrentMediaPath: () => string | null;
|
||||||
@@ -383,29 +412,60 @@ function resolveDictionaryGuessInputs(targetPath: string): {
|
|||||||
throw new Error(`Dictionary target must be a file or directory path: ${targetPath}`);
|
throw new Error(`Dictionary target must be a file or directory path: ${targetPath}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
function readCache(cachePath: string): CharacterDictionaryCacheFile {
|
function getSnapshotsDir(outputDir: string): string {
|
||||||
|
return path.join(outputDir, 'snapshots');
|
||||||
|
}
|
||||||
|
|
||||||
|
function getSnapshotPath(outputDir: string, mediaId: number): string {
|
||||||
|
return path.join(getSnapshotsDir(outputDir), `anilist-${mediaId}.json`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getMergedZipPath(outputDir: string): string {
|
||||||
|
return path.join(outputDir, 'merged.zip');
|
||||||
|
}
|
||||||
|
|
||||||
|
function readSnapshot(snapshotPath: string): CharacterDictionarySnapshot | null {
|
||||||
try {
|
try {
|
||||||
const raw = fs.readFileSync(cachePath, 'utf8');
|
const raw = fs.readFileSync(snapshotPath, 'utf8');
|
||||||
const parsed = JSON.parse(raw) as CharacterDictionaryCacheFile;
|
const parsed = JSON.parse(raw) as Partial<CharacterDictionarySnapshot>;
|
||||||
if (!parsed || typeof parsed !== 'object' || !parsed.anilistById) {
|
if (!parsed || typeof parsed !== 'object') {
|
||||||
return { anilistById: {} };
|
return null;
|
||||||
}
|
}
|
||||||
return parsed;
|
if (
|
||||||
|
parsed.formatVersion !== CHARACTER_DICTIONARY_FORMAT_VERSION ||
|
||||||
|
typeof parsed.mediaId !== 'number' ||
|
||||||
|
typeof parsed.mediaTitle !== 'string' ||
|
||||||
|
typeof parsed.entryCount !== 'number' ||
|
||||||
|
typeof parsed.updatedAt !== 'number' ||
|
||||||
|
!Array.isArray(parsed.termEntries) ||
|
||||||
|
!Array.isArray(parsed.images)
|
||||||
|
) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
formatVersion: parsed.formatVersion,
|
||||||
|
mediaId: parsed.mediaId,
|
||||||
|
mediaTitle: parsed.mediaTitle,
|
||||||
|
entryCount: parsed.entryCount,
|
||||||
|
updatedAt: parsed.updatedAt,
|
||||||
|
termEntries: parsed.termEntries as CharacterDictionaryTermEntry[],
|
||||||
|
images: parsed.images as CharacterDictionarySnapshotImage[],
|
||||||
|
};
|
||||||
} catch {
|
} catch {
|
||||||
return { anilistById: {} };
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function writeCache(cachePath: string, cache: CharacterDictionaryCacheFile): void {
|
function writeSnapshot(snapshotPath: string, snapshot: CharacterDictionarySnapshot): void {
|
||||||
ensureDir(path.dirname(cachePath));
|
ensureDir(path.dirname(snapshotPath));
|
||||||
fs.writeFileSync(cachePath, JSON.stringify(cache, null, 2), 'utf8');
|
fs.writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2), 'utf8');
|
||||||
}
|
}
|
||||||
|
|
||||||
function createDefinitionGlossary(
|
function createDefinitionGlossary(
|
||||||
character: CharacterRecord,
|
character: CharacterRecord,
|
||||||
mediaTitle: string,
|
mediaTitle: string,
|
||||||
imagePath: string | null,
|
imagePath: string | null,
|
||||||
): Array<string | Record<string, unknown>> {
|
): CharacterDictionaryGlossaryEntry[] {
|
||||||
const displayName = character.nativeName || character.fullName || `Character ${character.id}`;
|
const displayName = character.nativeName || character.fullName || `Character ${character.id}`;
|
||||||
const lines: string[] = [`${displayName} [${roleLabel(character.role)}]`, `${mediaTitle} · AniList`];
|
const lines: string[] = [`${displayName} [${roleLabel(character.role)}]`, `${mediaTitle} · AniList`];
|
||||||
|
|
||||||
@@ -449,12 +509,16 @@ function createDefinitionGlossary(
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildSnapshotImagePath(mediaId: number, charId: number, ext: string): string {
|
||||||
|
return `img/m${mediaId}-c${charId}.${ext}`;
|
||||||
|
}
|
||||||
|
|
||||||
function buildTermEntry(
|
function buildTermEntry(
|
||||||
term: string,
|
term: string,
|
||||||
reading: string,
|
reading: string,
|
||||||
role: CharacterDictionaryRole,
|
role: CharacterDictionaryRole,
|
||||||
glossary: Array<string | Record<string, unknown>>,
|
glossary: CharacterDictionaryGlossaryEntry[],
|
||||||
): Array<string | number | Array<string | Record<string, unknown>>> {
|
): CharacterDictionaryTermEntry {
|
||||||
const { tag, score } = roleInfo(role);
|
const { tag, score } = roleInfo(role);
|
||||||
return [term, reading, `name ${tag}`, '', score, glossary, 0, ''];
|
return [term, reading, `name ${tag}`, '', score, glossary, 0, ''];
|
||||||
}
|
}
|
||||||
@@ -754,6 +818,7 @@ async function fetchCharactersForMedia(
|
|||||||
|
|
||||||
async function downloadCharacterImage(imageUrl: string, charId: number): Promise<{
|
async function downloadCharacterImage(imageUrl: string, charId: number): Promise<{
|
||||||
filename: string;
|
filename: string;
|
||||||
|
ext: string;
|
||||||
bytes: Buffer;
|
bytes: Buffer;
|
||||||
} | null> {
|
} | null> {
|
||||||
try {
|
try {
|
||||||
@@ -764,6 +829,7 @@ async function downloadCharacterImage(imageUrl: string, charId: number): Promise
|
|||||||
const ext = inferImageExt(response.headers.get('content-type'));
|
const ext = inferImageExt(response.headers.get('content-type'));
|
||||||
return {
|
return {
|
||||||
filename: `c${charId}.${ext}`,
|
filename: `c${charId}.${ext}`,
|
||||||
|
ext,
|
||||||
bytes,
|
bytes,
|
||||||
};
|
};
|
||||||
} catch {
|
} catch {
|
||||||
@@ -775,14 +841,17 @@ function buildDictionaryTitle(mediaId: number): string {
|
|||||||
return `SubMiner Character Dictionary (AniList ${mediaId})`;
|
return `SubMiner Character Dictionary (AniList ${mediaId})`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function createIndex(mediaId: number, mediaTitle: string, revision: string): Record<string, unknown> {
|
function createIndex(
|
||||||
const dictionaryTitle = buildDictionaryTitle(mediaId);
|
dictionaryTitle: string,
|
||||||
|
description: string,
|
||||||
|
revision: string,
|
||||||
|
): Record<string, unknown> {
|
||||||
return {
|
return {
|
||||||
title: dictionaryTitle,
|
title: dictionaryTitle,
|
||||||
revision,
|
revision,
|
||||||
format: 3,
|
format: 3,
|
||||||
author: 'SubMiner',
|
author: 'SubMiner',
|
||||||
description: `Character names from ${mediaTitle} [AniList media ID ${mediaId}]`,
|
description,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -796,108 +865,19 @@ function createTagBank(): Array<[string, string, number, string, number]> {
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createCharacterDictionaryRuntimeService(deps: CharacterDictionaryRuntimeDeps): {
|
function buildSnapshotFromCharacters(
|
||||||
generateForCurrentMedia: (
|
mediaId: number,
|
||||||
targetPath?: string,
|
mediaTitle: string,
|
||||||
options?: CharacterDictionaryGenerateOptions,
|
characters: CharacterRecord[],
|
||||||
) => Promise<CharacterDictionaryBuildResult>;
|
imagesByCharacterId: Map<number, CharacterDictionarySnapshotImage>,
|
||||||
} {
|
updatedAt: number,
|
||||||
const outputDir = path.join(deps.userDataPath, 'character-dictionaries');
|
): CharacterDictionarySnapshot {
|
||||||
const cachePath = path.join(outputDir, 'cache.json');
|
const termEntries: CharacterDictionaryTermEntry[] = [];
|
||||||
const sleepMs = deps.sleep ?? sleep;
|
|
||||||
|
|
||||||
return {
|
|
||||||
generateForCurrentMedia: async (
|
|
||||||
targetPath?: string,
|
|
||||||
options?: CharacterDictionaryGenerateOptions,
|
|
||||||
) => {
|
|
||||||
let hasAniListRequest = false;
|
|
||||||
const waitForAniListRequestSlot = async (): Promise<void> => {
|
|
||||||
if (!hasAniListRequest) {
|
|
||||||
hasAniListRequest = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
await sleepMs(ANILIST_REQUEST_DELAY_MS);
|
|
||||||
};
|
|
||||||
|
|
||||||
const dictionaryTarget = targetPath?.trim() || '';
|
|
||||||
const guessInput =
|
|
||||||
dictionaryTarget.length > 0
|
|
||||||
? resolveDictionaryGuessInputs(dictionaryTarget)
|
|
||||||
: {
|
|
||||||
mediaPath: deps.getCurrentMediaPath(),
|
|
||||||
mediaTitle: deps.getCurrentMediaTitle(),
|
|
||||||
};
|
|
||||||
const mediaPathForGuess = deps.resolveMediaPathForJimaku(guessInput.mediaPath);
|
|
||||||
const mediaTitle = guessInput.mediaTitle;
|
|
||||||
const guessed = await deps.guessAnilistMediaInfo(mediaPathForGuess, mediaTitle);
|
|
||||||
if (!guessed || !guessed.title.trim()) {
|
|
||||||
throw new Error('Unable to resolve current anime from media path/title.');
|
|
||||||
}
|
|
||||||
|
|
||||||
const resolvedMedia = await resolveAniListMediaIdFromGuess(guessed, waitForAniListRequestSlot);
|
|
||||||
const cache = readCache(cachePath);
|
|
||||||
const cached = cache.anilistById[String(resolvedMedia.id)];
|
|
||||||
const refreshTtlMsRaw = options?.refreshTtlMs;
|
|
||||||
const hasRefreshTtl =
|
|
||||||
typeof refreshTtlMsRaw === 'number' && Number.isFinite(refreshTtlMsRaw) && refreshTtlMsRaw > 0;
|
|
||||||
const now = deps.now();
|
|
||||||
const cacheAgeMs =
|
|
||||||
cached && typeof cached.updatedAt === 'number' && Number.isFinite(cached.updatedAt)
|
|
||||||
? Math.max(0, now - cached.updatedAt)
|
|
||||||
: Number.POSITIVE_INFINITY;
|
|
||||||
const isCacheFresh = !hasRefreshTtl || cacheAgeMs <= refreshTtlMsRaw;
|
|
||||||
const isCacheFormatCurrent =
|
|
||||||
cached?.formatVersion === undefined
|
|
||||||
? false
|
|
||||||
: cached.formatVersion >= CHARACTER_DICTIONARY_FORMAT_VERSION;
|
|
||||||
if (cached?.zipPath && fs.existsSync(cached.zipPath) && isCacheFresh && isCacheFormatCurrent) {
|
|
||||||
deps.logInfo?.(
|
|
||||||
`[dictionary] cache hit for AniList ${resolvedMedia.id}: ${path.basename(cached.zipPath)}`,
|
|
||||||
);
|
|
||||||
return {
|
|
||||||
zipPath: cached.zipPath,
|
|
||||||
fromCache: true,
|
|
||||||
mediaId: cached.mediaId,
|
|
||||||
mediaTitle: cached.mediaTitle,
|
|
||||||
entryCount: cached.entryCount,
|
|
||||||
dictionaryTitle: cached.dictionaryTitle ?? buildDictionaryTitle(cached.mediaId),
|
|
||||||
revision: cached.revision,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const { mediaTitle: fetchedMediaTitle, characters } = await fetchCharactersForMedia(
|
|
||||||
resolvedMedia.id,
|
|
||||||
waitForAniListRequestSlot,
|
|
||||||
);
|
|
||||||
if (characters.length === 0) {
|
|
||||||
throw new Error(`No characters returned for AniList media ${resolvedMedia.id}.`);
|
|
||||||
}
|
|
||||||
|
|
||||||
ensureDir(outputDir);
|
|
||||||
const zipFiles: Array<{ name: string; data: Buffer }> = [];
|
|
||||||
const termEntries: Array<Array<string | number | Array<string | Record<string, unknown>>>> =
|
|
||||||
[];
|
|
||||||
const seen = new Set<string>();
|
const seen = new Set<string>();
|
||||||
|
|
||||||
let hasAttemptedCharacterImageDownload = false;
|
|
||||||
for (const character of characters) {
|
for (const character of characters) {
|
||||||
let imagePath: string | null = null;
|
const imagePath = imagesByCharacterId.get(character.id)?.path ?? null;
|
||||||
if (character.imageUrl) {
|
const glossary = createDefinitionGlossary(character, mediaTitle, imagePath);
|
||||||
if (hasAttemptedCharacterImageDownload) {
|
|
||||||
await sleepMs(CHARACTER_IMAGE_DOWNLOAD_DELAY_MS);
|
|
||||||
}
|
|
||||||
hasAttemptedCharacterImageDownload = true;
|
|
||||||
const image = await downloadCharacterImage(character.imageUrl, character.id);
|
|
||||||
if (image) {
|
|
||||||
imagePath = `img/${image.filename}`;
|
|
||||||
zipFiles.push({
|
|
||||||
name: imagePath,
|
|
||||||
data: image.bytes,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const glossary = createDefinitionGlossary(character, fetchedMediaTitle, imagePath);
|
|
||||||
const candidateTerms = buildNameTerms(character);
|
const candidateTerms = buildNameTerms(character);
|
||||||
for (const term of candidateTerms) {
|
for (const term of candidateTerms) {
|
||||||
const reading = buildReading(term);
|
const reading = buildReading(term);
|
||||||
@@ -912,56 +892,253 @@ export function createCharacterDictionaryRuntimeService(deps: CharacterDictionar
|
|||||||
throw new Error('No dictionary entries generated from AniList character data.');
|
throw new Error('No dictionary entries generated from AniList character data.');
|
||||||
}
|
}
|
||||||
|
|
||||||
const revision = String(now);
|
return {
|
||||||
const dictionaryTitle = buildDictionaryTitle(resolvedMedia.id);
|
formatVersion: CHARACTER_DICTIONARY_FORMAT_VERSION,
|
||||||
zipFiles.push({
|
mediaId,
|
||||||
|
mediaTitle,
|
||||||
|
entryCount: termEntries.length,
|
||||||
|
updatedAt,
|
||||||
|
termEntries,
|
||||||
|
images: [...imagesByCharacterId.values()],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildDictionaryZip(
|
||||||
|
outputPath: string,
|
||||||
|
dictionaryTitle: string,
|
||||||
|
description: string,
|
||||||
|
revision: string,
|
||||||
|
termEntries: CharacterDictionaryTermEntry[],
|
||||||
|
images: CharacterDictionarySnapshotImage[],
|
||||||
|
): { zipPath: string; entryCount: number } {
|
||||||
|
const zipFiles: Array<{ name: string; data: Buffer }> = [
|
||||||
|
{
|
||||||
name: 'index.json',
|
name: 'index.json',
|
||||||
data: Buffer.from(
|
data: Buffer.from(JSON.stringify(createIndex(dictionaryTitle, description, revision), null, 2), 'utf8'),
|
||||||
JSON.stringify(createIndex(resolvedMedia.id, fetchedMediaTitle, revision), null, 2),
|
},
|
||||||
'utf8',
|
{
|
||||||
),
|
|
||||||
});
|
|
||||||
zipFiles.push({
|
|
||||||
name: 'tag_bank_1.json',
|
name: 'tag_bank_1.json',
|
||||||
data: Buffer.from(JSON.stringify(createTagBank()), 'utf8'),
|
data: Buffer.from(JSON.stringify(createTagBank()), 'utf8'),
|
||||||
});
|
},
|
||||||
|
];
|
||||||
|
|
||||||
const entriesPerBank = 10_000;
|
for (const image of images) {
|
||||||
for (let i = 0; i < termEntries.length; i += entriesPerBank) {
|
|
||||||
const chunk = termEntries.slice(i, i + entriesPerBank);
|
|
||||||
zipFiles.push({
|
zipFiles.push({
|
||||||
name: `term_bank_${Math.floor(i / entriesPerBank) + 1}.json`,
|
name: image.path,
|
||||||
data: Buffer.from(JSON.stringify(chunk), 'utf8'),
|
data: Buffer.from(image.dataBase64, 'base64'),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const zipBuffer = createStoredZip(zipFiles);
|
const entriesPerBank = 10_000;
|
||||||
const zipPath = path.join(outputDir, `anilist-${resolvedMedia.id}.zip`);
|
for (let i = 0; i < termEntries.length; i += entriesPerBank) {
|
||||||
fs.writeFileSync(zipPath, zipBuffer);
|
zipFiles.push({
|
||||||
|
name: `term_bank_${Math.floor(i / entriesPerBank) + 1}.json`,
|
||||||
|
data: Buffer.from(JSON.stringify(termEntries.slice(i, i + entriesPerBank)), 'utf8'),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
const cacheEntry: CharacterDictionaryCacheEntry = {
|
ensureDir(path.dirname(outputPath));
|
||||||
mediaId: resolvedMedia.id,
|
fs.writeFileSync(outputPath, createStoredZip(zipFiles));
|
||||||
mediaTitle: fetchedMediaTitle,
|
return { zipPath: outputPath, entryCount: termEntries.length };
|
||||||
entryCount: termEntries.length,
|
}
|
||||||
zipPath,
|
|
||||||
updatedAt: now,
|
function buildMergedRevision(mediaIds: number[], snapshots: CharacterDictionarySnapshot[]): string {
|
||||||
formatVersion: CHARACTER_DICTIONARY_FORMAT_VERSION,
|
const hash = createHash('sha1');
|
||||||
dictionaryTitle,
|
hash.update(
|
||||||
revision,
|
JSON.stringify({
|
||||||
|
mediaIds,
|
||||||
|
snapshots: snapshots.map((snapshot) => ({
|
||||||
|
mediaId: snapshot.mediaId,
|
||||||
|
updatedAt: snapshot.updatedAt,
|
||||||
|
entryCount: snapshot.entryCount,
|
||||||
|
})),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
return hash.digest('hex').slice(0, 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createCharacterDictionaryRuntimeService(deps: CharacterDictionaryRuntimeDeps): {
|
||||||
|
getOrCreateCurrentSnapshot: (targetPath?: string) => Promise<CharacterDictionarySnapshotResult>;
|
||||||
|
buildMergedDictionary: (mediaIds: number[]) => Promise<MergedCharacterDictionaryBuildResult>;
|
||||||
|
generateForCurrentMedia: (
|
||||||
|
targetPath?: string,
|
||||||
|
options?: CharacterDictionaryGenerateOptions,
|
||||||
|
) => Promise<CharacterDictionaryBuildResult>;
|
||||||
|
} {
|
||||||
|
const outputDir = path.join(deps.userDataPath, 'character-dictionaries');
|
||||||
|
const sleepMs = deps.sleep ?? sleep;
|
||||||
|
|
||||||
|
const resolveCurrentMedia = async (
|
||||||
|
targetPath?: string,
|
||||||
|
beforeRequest?: () => Promise<void>,
|
||||||
|
): Promise<ResolvedAniListMedia> => {
|
||||||
|
const dictionaryTarget = targetPath?.trim() || '';
|
||||||
|
const guessInput =
|
||||||
|
dictionaryTarget.length > 0
|
||||||
|
? resolveDictionaryGuessInputs(dictionaryTarget)
|
||||||
|
: {
|
||||||
|
mediaPath: deps.getCurrentMediaPath(),
|
||||||
|
mediaTitle: deps.getCurrentMediaTitle(),
|
||||||
|
};
|
||||||
|
const mediaPathForGuess = deps.resolveMediaPathForJimaku(guessInput.mediaPath);
|
||||||
|
const mediaTitle = guessInput.mediaTitle;
|
||||||
|
const guessed = await deps.guessAnilistMediaInfo(mediaPathForGuess, mediaTitle);
|
||||||
|
if (!guessed || !guessed.title.trim()) {
|
||||||
|
throw new Error('Unable to resolve current anime from media path/title.');
|
||||||
|
}
|
||||||
|
return resolveAniListMediaIdFromGuess(guessed, beforeRequest);
|
||||||
};
|
};
|
||||||
cache.anilistById[String(resolvedMedia.id)] = cacheEntry;
|
|
||||||
writeCache(cachePath, cache);
|
|
||||||
|
|
||||||
|
const getOrCreateSnapshot = async (
|
||||||
|
mediaId: number,
|
||||||
|
mediaTitleHint?: string,
|
||||||
|
beforeRequest?: () => Promise<void>,
|
||||||
|
): Promise<CharacterDictionarySnapshotResult> => {
|
||||||
|
const snapshotPath = getSnapshotPath(outputDir, mediaId);
|
||||||
|
const cachedSnapshot = readSnapshot(snapshotPath);
|
||||||
|
if (cachedSnapshot) {
|
||||||
|
deps.logInfo?.(`[dictionary] snapshot hit for AniList ${mediaId}`);
|
||||||
|
return {
|
||||||
|
mediaId: cachedSnapshot.mediaId,
|
||||||
|
mediaTitle: cachedSnapshot.mediaTitle,
|
||||||
|
entryCount: cachedSnapshot.entryCount,
|
||||||
|
fromCache: true,
|
||||||
|
updatedAt: cachedSnapshot.updatedAt,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const { mediaTitle: fetchedMediaTitle, characters } = await fetchCharactersForMedia(
|
||||||
|
mediaId,
|
||||||
|
beforeRequest,
|
||||||
|
);
|
||||||
|
if (characters.length === 0) {
|
||||||
|
throw new Error(`No characters returned for AniList media ${mediaId}.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const imagesByCharacterId = new Map<number, CharacterDictionarySnapshotImage>();
|
||||||
|
let hasAttemptedCharacterImageDownload = false;
|
||||||
|
for (const character of characters) {
|
||||||
|
if (!character.imageUrl) continue;
|
||||||
|
if (hasAttemptedCharacterImageDownload) {
|
||||||
|
await sleepMs(CHARACTER_IMAGE_DOWNLOAD_DELAY_MS);
|
||||||
|
}
|
||||||
|
hasAttemptedCharacterImageDownload = true;
|
||||||
|
const image = await downloadCharacterImage(character.imageUrl, character.id);
|
||||||
|
if (!image) continue;
|
||||||
|
imagesByCharacterId.set(character.id, {
|
||||||
|
path: buildSnapshotImagePath(mediaId, character.id, image.ext),
|
||||||
|
dataBase64: image.bytes.toString('base64'),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const snapshot = buildSnapshotFromCharacters(
|
||||||
|
mediaId,
|
||||||
|
fetchedMediaTitle || mediaTitleHint || `AniList ${mediaId}`,
|
||||||
|
characters,
|
||||||
|
imagesByCharacterId,
|
||||||
|
deps.now(),
|
||||||
|
);
|
||||||
|
writeSnapshot(snapshotPath, snapshot);
|
||||||
deps.logInfo?.(
|
deps.logInfo?.(
|
||||||
`[dictionary] generated AniList ${resolvedMedia.id}: ${termEntries.length} terms -> ${zipPath}`,
|
`[dictionary] stored snapshot for AniList ${mediaId}: ${snapshot.entryCount} terms`,
|
||||||
);
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
zipPath,
|
mediaId: snapshot.mediaId,
|
||||||
|
mediaTitle: snapshot.mediaTitle,
|
||||||
|
entryCount: snapshot.entryCount,
|
||||||
fromCache: false,
|
fromCache: false,
|
||||||
|
updatedAt: snapshot.updatedAt,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
getOrCreateCurrentSnapshot: async (targetPath?: string) => {
|
||||||
|
let hasAniListRequest = false;
|
||||||
|
const waitForAniListRequestSlot = async (): Promise<void> => {
|
||||||
|
if (!hasAniListRequest) {
|
||||||
|
hasAniListRequest = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
await sleepMs(ANILIST_REQUEST_DELAY_MS);
|
||||||
|
};
|
||||||
|
const resolvedMedia = await resolveCurrentMedia(targetPath, waitForAniListRequestSlot);
|
||||||
|
return getOrCreateSnapshot(resolvedMedia.id, resolvedMedia.title, waitForAniListRequestSlot);
|
||||||
|
},
|
||||||
|
buildMergedDictionary: async (mediaIds: number[]) => {
|
||||||
|
const normalizedMediaIds = mediaIds
|
||||||
|
.filter((mediaId) => Number.isFinite(mediaId) && mediaId > 0)
|
||||||
|
.map((mediaId) => Math.floor(mediaId));
|
||||||
|
const snapshots = normalizedMediaIds.map((mediaId) => {
|
||||||
|
const snapshot = readSnapshot(getSnapshotPath(outputDir, mediaId));
|
||||||
|
if (!snapshot) {
|
||||||
|
throw new Error(`Missing character dictionary snapshot for AniList ${mediaId}.`);
|
||||||
|
}
|
||||||
|
return snapshot;
|
||||||
|
});
|
||||||
|
const revision = buildMergedRevision(normalizedMediaIds, snapshots);
|
||||||
|
const description =
|
||||||
|
snapshots.length === 1
|
||||||
|
? `Character names from ${snapshots[0]!.mediaTitle}`
|
||||||
|
: `Character names from ${snapshots.length} recent anime`;
|
||||||
|
const { zipPath, entryCount } = buildDictionaryZip(
|
||||||
|
getMergedZipPath(outputDir),
|
||||||
|
CHARACTER_DICTIONARY_MERGED_TITLE,
|
||||||
|
description,
|
||||||
|
revision,
|
||||||
|
snapshots.flatMap((snapshot) => snapshot.termEntries),
|
||||||
|
snapshots.flatMap((snapshot) => snapshot.images),
|
||||||
|
);
|
||||||
|
deps.logInfo?.(
|
||||||
|
`[dictionary] rebuilt merged dictionary: ${normalizedMediaIds.join(', ') || '<empty>'} -> ${zipPath}`,
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
zipPath,
|
||||||
|
revision,
|
||||||
|
dictionaryTitle: CHARACTER_DICTIONARY_MERGED_TITLE,
|
||||||
|
entryCount,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
generateForCurrentMedia: async (targetPath?: string, _options?: CharacterDictionaryGenerateOptions) => {
|
||||||
|
let hasAniListRequest = false;
|
||||||
|
const waitForAniListRequestSlot = async (): Promise<void> => {
|
||||||
|
if (!hasAniListRequest) {
|
||||||
|
hasAniListRequest = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
await sleepMs(ANILIST_REQUEST_DELAY_MS);
|
||||||
|
};
|
||||||
|
const resolvedMedia = await resolveCurrentMedia(targetPath, waitForAniListRequestSlot);
|
||||||
|
const snapshot = await getOrCreateSnapshot(
|
||||||
|
resolvedMedia.id,
|
||||||
|
resolvedMedia.title,
|
||||||
|
waitForAniListRequestSlot,
|
||||||
|
);
|
||||||
|
const storedSnapshot = readSnapshot(getSnapshotPath(outputDir, resolvedMedia.id));
|
||||||
|
if (!storedSnapshot) {
|
||||||
|
throw new Error(`Snapshot missing after generation for AniList ${resolvedMedia.id}.`);
|
||||||
|
}
|
||||||
|
const revision = String(storedSnapshot.updatedAt);
|
||||||
|
const dictionaryTitle = buildDictionaryTitle(resolvedMedia.id);
|
||||||
|
const description = `Character names from ${storedSnapshot.mediaTitle} [AniList media ID ${resolvedMedia.id}]`;
|
||||||
|
const zipPath = path.join(outputDir, `anilist-${resolvedMedia.id}.zip`);
|
||||||
|
buildDictionaryZip(
|
||||||
|
zipPath,
|
||||||
|
dictionaryTitle,
|
||||||
|
description,
|
||||||
|
revision,
|
||||||
|
storedSnapshot.termEntries,
|
||||||
|
storedSnapshot.images,
|
||||||
|
);
|
||||||
|
deps.logInfo?.(
|
||||||
|
`[dictionary] generated AniList ${resolvedMedia.id}: ${storedSnapshot.entryCount} terms -> ${zipPath}`,
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
zipPath,
|
||||||
|
fromCache: snapshot.fromCache,
|
||||||
mediaId: resolvedMedia.id,
|
mediaId: resolvedMedia.id,
|
||||||
mediaTitle: fetchedMediaTitle,
|
mediaTitle: storedSnapshot.mediaTitle,
|
||||||
entryCount: termEntries.length,
|
entryCount: storedSnapshot.entryCount,
|
||||||
dictionaryTitle,
|
dictionaryTitle,
|
||||||
revision,
|
revision,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -9,213 +9,249 @@ function makeTempDir(): string {
|
|||||||
return fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-char-dict-auto-sync-'));
|
return fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-char-dict-auto-sync-'));
|
||||||
}
|
}
|
||||||
|
|
||||||
test('auto sync imports current dictionary and updates persisted state', async () => {
|
test('auto sync imports merged dictionary and persists MRU state', async () => {
|
||||||
const userDataPath = makeTempDir();
|
const userDataPath = makeTempDir();
|
||||||
const imported: string[] = [];
|
const imported: string[] = [];
|
||||||
|
const deleted: string[] = [];
|
||||||
const upserts: Array<{ title: string; scope: 'all' | 'active' }> = [];
|
const upserts: Array<{ title: string; scope: 'all' | 'active' }> = [];
|
||||||
|
const mergedBuilds: number[][] = [];
|
||||||
|
|
||||||
|
let importedRevision: string | null = null;
|
||||||
|
|
||||||
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||||
userDataPath,
|
userDataPath,
|
||||||
getConfig: () => ({
|
getConfig: () => ({
|
||||||
enabled: true,
|
enabled: true,
|
||||||
refreshTtlHours: 168,
|
|
||||||
maxLoaded: 3,
|
maxLoaded: 3,
|
||||||
evictionPolicy: 'delete',
|
|
||||||
profileScope: 'all',
|
profileScope: 'all',
|
||||||
}),
|
}),
|
||||||
generateCharacterDictionary: async () => ({
|
getOrCreateCurrentSnapshot: async () => ({
|
||||||
zipPath: '/tmp/anilist-130298.zip',
|
|
||||||
fromCache: false,
|
|
||||||
mediaId: 130298,
|
mediaId: 130298,
|
||||||
mediaTitle: 'The Eminence in Shadow',
|
mediaTitle: 'The Eminence in Shadow',
|
||||||
entryCount: 2544,
|
entryCount: 2544,
|
||||||
dictionaryTitle: 'SubMiner Character Dictionary (AniList 130298)',
|
fromCache: false,
|
||||||
revision: '100',
|
updatedAt: 1000,
|
||||||
}),
|
}),
|
||||||
getYomitanDictionaryInfo: async () => [],
|
buildMergedDictionary: async (mediaIds) => {
|
||||||
|
mergedBuilds.push([...mediaIds]);
|
||||||
|
return {
|
||||||
|
zipPath: '/tmp/subminer-character-dictionary.zip',
|
||||||
|
revision: 'rev-1',
|
||||||
|
dictionaryTitle: 'SubMiner Character Dictionary',
|
||||||
|
entryCount: 2544,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
getYomitanDictionaryInfo: async () =>
|
||||||
|
importedRevision
|
||||||
|
? [{ title: 'SubMiner Character Dictionary', revision: importedRevision }]
|
||||||
|
: [],
|
||||||
importYomitanDictionary: async (zipPath) => {
|
importYomitanDictionary: async (zipPath) => {
|
||||||
imported.push(zipPath);
|
imported.push(zipPath);
|
||||||
|
importedRevision = 'rev-1';
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
deleteYomitanDictionary: async (dictionaryTitle) => {
|
||||||
|
deleted.push(dictionaryTitle);
|
||||||
|
importedRevision = null;
|
||||||
return true;
|
return true;
|
||||||
},
|
},
|
||||||
deleteYomitanDictionary: async () => true,
|
|
||||||
upsertYomitanDictionarySettings: async (dictionaryTitle, profileScope) => {
|
upsertYomitanDictionarySettings: async (dictionaryTitle, profileScope) => {
|
||||||
upserts.push({ title: dictionaryTitle, scope: profileScope });
|
upserts.push({ title: dictionaryTitle, scope: profileScope });
|
||||||
return true;
|
return true;
|
||||||
},
|
},
|
||||||
removeYomitanDictionarySettings: async () => true,
|
|
||||||
now: () => 1000,
|
now: () => 1000,
|
||||||
});
|
});
|
||||||
|
|
||||||
await runtime.runSyncNow();
|
await runtime.runSyncNow();
|
||||||
|
|
||||||
assert.deepEqual(imported, ['/tmp/anilist-130298.zip']);
|
assert.deepEqual(mergedBuilds, [[130298]]);
|
||||||
assert.deepEqual(upserts, [
|
assert.deepEqual(imported, ['/tmp/subminer-character-dictionary.zip']);
|
||||||
{ title: 'SubMiner Character Dictionary (AniList 130298)', scope: 'all' },
|
assert.deepEqual(deleted, []);
|
||||||
]);
|
assert.deepEqual(upserts, [{ title: 'SubMiner Character Dictionary', scope: 'all' }]);
|
||||||
|
|
||||||
const statePath = path.join(userDataPath, 'character-dictionaries', 'auto-sync-state.json');
|
const statePath = path.join(userDataPath, 'character-dictionaries', 'auto-sync-state.json');
|
||||||
const state = JSON.parse(fs.readFileSync(statePath, 'utf8')) as {
|
const state = JSON.parse(fs.readFileSync(statePath, 'utf8')) as {
|
||||||
activeMediaIds: number[];
|
activeMediaIds: number[];
|
||||||
dictionariesByMediaId: Record<string, { lastImportedRevision: string }>;
|
mergedRevision: string | null;
|
||||||
|
mergedDictionaryTitle: string | null;
|
||||||
};
|
};
|
||||||
assert.deepEqual(state.activeMediaIds, [130298]);
|
assert.deepEqual(state.activeMediaIds, [130298]);
|
||||||
assert.equal(state.dictionariesByMediaId['130298']?.lastImportedRevision, '100');
|
assert.equal(state.mergedRevision, 'rev-1');
|
||||||
|
assert.equal(state.mergedDictionaryTitle, 'SubMiner Character Dictionary');
|
||||||
});
|
});
|
||||||
|
|
||||||
test('auto sync rotates dictionaries by LRU and deletes overflow when policy=delete', async () => {
|
test('auto sync skips rebuild/import on unchanged revisit when merged dictionary is current', async () => {
|
||||||
const userDataPath = makeTempDir();
|
const userDataPath = makeTempDir();
|
||||||
const generated = [
|
const mergedBuilds: number[][] = [];
|
||||||
{ mediaId: 1, zipPath: '/tmp/anilist-1.zip', title: 'SubMiner Character Dictionary (AniList 1)' },
|
const imports: string[] = [];
|
||||||
{ mediaId: 2, zipPath: '/tmp/anilist-2.zip', title: 'SubMiner Character Dictionary (AniList 2)' },
|
let importedRevision: string | null = null;
|
||||||
{ mediaId: 3, zipPath: '/tmp/anilist-3.zip', title: 'SubMiner Character Dictionary (AniList 3)' },
|
|
||||||
{ mediaId: 4, zipPath: '/tmp/anilist-4.zip', title: 'SubMiner Character Dictionary (AniList 4)' },
|
|
||||||
];
|
|
||||||
let runIndex = 0;
|
|
||||||
const deletes: string[] = [];
|
|
||||||
const removals: Array<{ title: string; mode: 'delete' | 'disable' }> = [];
|
|
||||||
|
|
||||||
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||||
userDataPath,
|
userDataPath,
|
||||||
getConfig: () => ({
|
getConfig: () => ({
|
||||||
enabled: true,
|
enabled: true,
|
||||||
refreshTtlHours: 168,
|
|
||||||
maxLoaded: 3,
|
maxLoaded: 3,
|
||||||
evictionPolicy: 'delete',
|
|
||||||
profileScope: 'all',
|
profileScope: 'all',
|
||||||
}),
|
}),
|
||||||
generateCharacterDictionary: async () => {
|
getOrCreateCurrentSnapshot: async () => ({
|
||||||
const current = generated[Math.min(runIndex, generated.length - 1)]!;
|
mediaId: 7,
|
||||||
runIndex += 1;
|
mediaTitle: 'Frieren',
|
||||||
return {
|
entryCount: 100,
|
||||||
zipPath: current.zipPath,
|
|
||||||
fromCache: false,
|
|
||||||
mediaId: current.mediaId,
|
|
||||||
mediaTitle: `Title ${current.mediaId}`,
|
|
||||||
entryCount: 10,
|
|
||||||
dictionaryTitle: current.title,
|
|
||||||
revision: String(current.mediaId),
|
|
||||||
};
|
|
||||||
},
|
|
||||||
getYomitanDictionaryInfo: async () => [],
|
|
||||||
importYomitanDictionary: async () => true,
|
|
||||||
deleteYomitanDictionary: async (dictionaryTitle) => {
|
|
||||||
deletes.push(dictionaryTitle);
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
upsertYomitanDictionarySettings: async () => true,
|
|
||||||
removeYomitanDictionarySettings: async (dictionaryTitle, _scope, mode) => {
|
|
||||||
removals.push({ title: dictionaryTitle, mode });
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
now: () => Date.now(),
|
|
||||||
});
|
|
||||||
|
|
||||||
await runtime.runSyncNow();
|
|
||||||
await runtime.runSyncNow();
|
|
||||||
await runtime.runSyncNow();
|
|
||||||
await runtime.runSyncNow();
|
|
||||||
|
|
||||||
assert.ok(removals.some((entry) => entry.title.includes('(AniList 1)') && entry.mode === 'delete'));
|
|
||||||
assert.ok(deletes.some((title) => title.includes('(AniList 1)')));
|
|
||||||
|
|
||||||
const statePath = path.join(userDataPath, 'character-dictionaries', 'auto-sync-state.json');
|
|
||||||
const state = JSON.parse(fs.readFileSync(statePath, 'utf8')) as {
|
|
||||||
activeMediaIds: number[];
|
|
||||||
dictionariesByMediaId: Record<string, unknown>;
|
|
||||||
};
|
|
||||||
assert.deepEqual(state.activeMediaIds, [4, 3, 2]);
|
|
||||||
assert.equal(state.dictionariesByMediaId['1'], undefined);
|
|
||||||
});
|
|
||||||
|
|
||||||
test('auto sync disable eviction keeps dictionary in DB and only disables settings', async () => {
|
|
||||||
const userDataPath = makeTempDir();
|
|
||||||
let runIndex = 0;
|
|
||||||
const deletes: string[] = [];
|
|
||||||
const removals: Array<{ title: string; mode: 'delete' | 'disable' }> = [];
|
|
||||||
|
|
||||||
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
|
||||||
userDataPath,
|
|
||||||
getConfig: () => ({
|
|
||||||
enabled: true,
|
|
||||||
refreshTtlHours: 168,
|
|
||||||
maxLoaded: 1,
|
|
||||||
evictionPolicy: 'disable',
|
|
||||||
profileScope: 'all',
|
|
||||||
}),
|
|
||||||
generateCharacterDictionary: async () => {
|
|
||||||
runIndex += 1;
|
|
||||||
return {
|
|
||||||
zipPath: `/tmp/anilist-${runIndex}.zip`,
|
|
||||||
fromCache: false,
|
|
||||||
mediaId: runIndex,
|
|
||||||
mediaTitle: `Title ${runIndex}`,
|
|
||||||
entryCount: 10,
|
|
||||||
dictionaryTitle: `SubMiner Character Dictionary (AniList ${runIndex})`,
|
|
||||||
revision: String(runIndex),
|
|
||||||
};
|
|
||||||
},
|
|
||||||
getYomitanDictionaryInfo: async () => [],
|
|
||||||
importYomitanDictionary: async () => true,
|
|
||||||
deleteYomitanDictionary: async (dictionaryTitle) => {
|
|
||||||
deletes.push(dictionaryTitle);
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
upsertYomitanDictionarySettings: async () => true,
|
|
||||||
removeYomitanDictionarySettings: async (dictionaryTitle, _scope, mode) => {
|
|
||||||
removals.push({ title: dictionaryTitle, mode });
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
now: () => Date.now(),
|
|
||||||
});
|
|
||||||
|
|
||||||
await runtime.runSyncNow();
|
|
||||||
await runtime.runSyncNow();
|
|
||||||
|
|
||||||
assert.ok(removals.some((entry) => entry.mode === 'disable' && entry.title.includes('(AniList 1)')));
|
|
||||||
assert.equal(deletes.some((title) => title.includes('(AniList 1)')), false);
|
|
||||||
|
|
||||||
const statePath = path.join(userDataPath, 'character-dictionaries', 'auto-sync-state.json');
|
|
||||||
const state = JSON.parse(fs.readFileSync(statePath, 'utf8')) as {
|
|
||||||
activeMediaIds: number[];
|
|
||||||
dictionariesByMediaId: Record<string, unknown>;
|
|
||||||
};
|
|
||||||
assert.deepEqual(state.activeMediaIds, [2]);
|
|
||||||
assert.ok(state.dictionariesByMediaId['1']);
|
|
||||||
assert.ok(state.dictionariesByMediaId['2']);
|
|
||||||
});
|
|
||||||
|
|
||||||
test('auto sync fails fast when yomitan import hangs', async () => {
|
|
||||||
const userDataPath = makeTempDir();
|
|
||||||
|
|
||||||
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
|
||||||
userDataPath,
|
|
||||||
operationTimeoutMs: 5,
|
|
||||||
getConfig: () => ({
|
|
||||||
enabled: true,
|
|
||||||
refreshTtlHours: 168,
|
|
||||||
maxLoaded: 3,
|
|
||||||
evictionPolicy: 'delete',
|
|
||||||
profileScope: 'all',
|
|
||||||
}),
|
|
||||||
generateCharacterDictionary: async () => ({
|
|
||||||
zipPath: '/tmp/anilist-130298.zip',
|
|
||||||
fromCache: true,
|
fromCache: true,
|
||||||
mediaId: 130298,
|
updatedAt: 1000,
|
||||||
mediaTitle: 'The Eminence in Shadow',
|
|
||||||
entryCount: 2544,
|
|
||||||
dictionaryTitle: 'SubMiner Character Dictionary (AniList 130298)',
|
|
||||||
revision: '100',
|
|
||||||
}),
|
|
||||||
getYomitanDictionaryInfo: async () => [],
|
|
||||||
importYomitanDictionary: async () =>
|
|
||||||
new Promise<boolean>(() => {
|
|
||||||
// never resolve
|
|
||||||
}),
|
}),
|
||||||
|
buildMergedDictionary: async (mediaIds) => {
|
||||||
|
mergedBuilds.push([...mediaIds]);
|
||||||
|
return {
|
||||||
|
zipPath: '/tmp/merged.zip',
|
||||||
|
revision: 'rev-7',
|
||||||
|
dictionaryTitle: 'SubMiner Character Dictionary',
|
||||||
|
entryCount: 100,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
getYomitanDictionaryInfo: async () =>
|
||||||
|
importedRevision
|
||||||
|
? [{ title: 'SubMiner Character Dictionary', revision: importedRevision }]
|
||||||
|
: [],
|
||||||
|
importYomitanDictionary: async (zipPath) => {
|
||||||
|
imports.push(zipPath);
|
||||||
|
importedRevision = 'rev-7';
|
||||||
|
return true;
|
||||||
|
},
|
||||||
deleteYomitanDictionary: async () => true,
|
deleteYomitanDictionary: async () => true,
|
||||||
upsertYomitanDictionarySettings: async () => true,
|
upsertYomitanDictionarySettings: async () => true,
|
||||||
removeYomitanDictionarySettings: async () => true,
|
now: () => 1000,
|
||||||
|
});
|
||||||
|
|
||||||
|
await runtime.runSyncNow();
|
||||||
|
await runtime.runSyncNow();
|
||||||
|
|
||||||
|
assert.deepEqual(mergedBuilds, [[7]]);
|
||||||
|
assert.deepEqual(imports, ['/tmp/merged.zip']);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('auto sync rebuilds merged dictionary when MRU order changes', async () => {
|
||||||
|
const userDataPath = makeTempDir();
|
||||||
|
const sequence = [1, 2, 1];
|
||||||
|
const mergedBuilds: number[][] = [];
|
||||||
|
const deleted: string[] = [];
|
||||||
|
let importedRevision: string | null = null;
|
||||||
|
let runIndex = 0;
|
||||||
|
|
||||||
|
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||||
|
userDataPath,
|
||||||
|
getConfig: () => ({
|
||||||
|
enabled: true,
|
||||||
|
maxLoaded: 3,
|
||||||
|
profileScope: 'all',
|
||||||
|
}),
|
||||||
|
getOrCreateCurrentSnapshot: async () => {
|
||||||
|
const mediaId = sequence[Math.min(runIndex, sequence.length - 1)]!;
|
||||||
|
runIndex += 1;
|
||||||
|
return {
|
||||||
|
mediaId,
|
||||||
|
mediaTitle: `Title ${mediaId}`,
|
||||||
|
entryCount: 10,
|
||||||
|
fromCache: true,
|
||||||
|
updatedAt: mediaId,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
buildMergedDictionary: async (mediaIds) => {
|
||||||
|
mergedBuilds.push([...mediaIds]);
|
||||||
|
const revision = `rev-${mediaIds.join('-')}`;
|
||||||
|
return {
|
||||||
|
zipPath: `/tmp/${revision}.zip`,
|
||||||
|
revision,
|
||||||
|
dictionaryTitle: 'SubMiner Character Dictionary',
|
||||||
|
entryCount: mediaIds.length * 10,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
getYomitanDictionaryInfo: async () =>
|
||||||
|
importedRevision
|
||||||
|
? [{ title: 'SubMiner Character Dictionary', revision: importedRevision }]
|
||||||
|
: [],
|
||||||
|
importYomitanDictionary: async (zipPath) => {
|
||||||
|
importedRevision = path.basename(zipPath, '.zip');
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
deleteYomitanDictionary: async (dictionaryTitle) => {
|
||||||
|
deleted.push(dictionaryTitle);
|
||||||
|
importedRevision = null;
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
upsertYomitanDictionarySettings: async () => true,
|
||||||
|
now: () => 1000,
|
||||||
|
});
|
||||||
|
|
||||||
|
await runtime.runSyncNow();
|
||||||
|
await runtime.runSyncNow();
|
||||||
|
await runtime.runSyncNow();
|
||||||
|
|
||||||
|
assert.deepEqual(mergedBuilds, [[1], [2, 1], [1, 2]]);
|
||||||
|
assert.ok(deleted.length >= 2);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('auto sync evicts least recently used media from merged set', async () => {
|
||||||
|
const userDataPath = makeTempDir();
|
||||||
|
const sequence = [1, 2, 3, 4];
|
||||||
|
const mergedBuilds: number[][] = [];
|
||||||
|
let runIndex = 0;
|
||||||
|
let importedRevision: string | null = null;
|
||||||
|
|
||||||
|
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||||
|
userDataPath,
|
||||||
|
getConfig: () => ({
|
||||||
|
enabled: true,
|
||||||
|
maxLoaded: 3,
|
||||||
|
profileScope: 'all',
|
||||||
|
}),
|
||||||
|
getOrCreateCurrentSnapshot: async () => {
|
||||||
|
const mediaId = sequence[Math.min(runIndex, sequence.length - 1)]!;
|
||||||
|
runIndex += 1;
|
||||||
|
return {
|
||||||
|
mediaId,
|
||||||
|
mediaTitle: `Title ${mediaId}`,
|
||||||
|
entryCount: 10,
|
||||||
|
fromCache: true,
|
||||||
|
updatedAt: mediaId,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
buildMergedDictionary: async (mediaIds) => {
|
||||||
|
mergedBuilds.push([...mediaIds]);
|
||||||
|
const revision = `rev-${mediaIds.join('-')}`;
|
||||||
|
return {
|
||||||
|
zipPath: `/tmp/${revision}.zip`,
|
||||||
|
revision,
|
||||||
|
dictionaryTitle: 'SubMiner Character Dictionary',
|
||||||
|
entryCount: mediaIds.length * 10,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
getYomitanDictionaryInfo: async () =>
|
||||||
|
importedRevision
|
||||||
|
? [{ title: 'SubMiner Character Dictionary', revision: importedRevision }]
|
||||||
|
: [],
|
||||||
|
importYomitanDictionary: async (zipPath) => {
|
||||||
|
importedRevision = path.basename(zipPath, '.zip');
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
deleteYomitanDictionary: async () => {
|
||||||
|
importedRevision = null;
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
upsertYomitanDictionarySettings: async () => true,
|
||||||
now: () => Date.now(),
|
now: () => Date.now(),
|
||||||
});
|
});
|
||||||
|
|
||||||
await assert.rejects(async () => runtime.runSyncNow(), /importYomitanDictionary\(anilist-130298\.zip\) timed out after 5ms/);
|
await runtime.runSyncNow();
|
||||||
|
await runtime.runSyncNow();
|
||||||
|
await runtime.runSyncNow();
|
||||||
|
await runtime.runSyncNow();
|
||||||
|
|
||||||
|
assert.deepEqual(mergedBuilds, [[1], [2, 1], [3, 2, 1], [4, 3, 2]]);
|
||||||
|
|
||||||
|
const statePath = path.join(userDataPath, 'character-dictionaries', 'auto-sync-state.json');
|
||||||
|
const state = JSON.parse(fs.readFileSync(statePath, 'utf8')) as {
|
||||||
|
activeMediaIds: number[];
|
||||||
|
};
|
||||||
|
assert.deepEqual(state.activeMediaIds, [4, 3, 2]);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,24 +1,15 @@
|
|||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
|
import type { AnilistCharacterDictionaryProfileScope } from '../../types';
|
||||||
import type {
|
import type {
|
||||||
AnilistCharacterDictionaryEvictionPolicy,
|
CharacterDictionarySnapshotResult,
|
||||||
AnilistCharacterDictionaryProfileScope,
|
MergedCharacterDictionaryBuildResult,
|
||||||
} from '../../types';
|
|
||||||
import type {
|
|
||||||
CharacterDictionaryBuildResult,
|
|
||||||
CharacterDictionaryGenerateOptions,
|
|
||||||
} from '../character-dictionary-runtime';
|
} from '../character-dictionary-runtime';
|
||||||
|
|
||||||
type AutoSyncStateDictionaryEntry = {
|
|
||||||
mediaId: number;
|
|
||||||
dictionaryTitle: string;
|
|
||||||
lastImportedRevision: string | null;
|
|
||||||
lastUsedAt: number;
|
|
||||||
};
|
|
||||||
|
|
||||||
type AutoSyncState = {
|
type AutoSyncState = {
|
||||||
activeMediaIds: number[];
|
activeMediaIds: number[];
|
||||||
dictionariesByMediaId: Record<string, AutoSyncStateDictionaryEntry>;
|
mergedRevision: string | null;
|
||||||
|
mergedDictionaryTitle: string | null;
|
||||||
};
|
};
|
||||||
|
|
||||||
type AutoSyncDictionaryInfo = {
|
type AutoSyncDictionaryInfo = {
|
||||||
@@ -28,18 +19,15 @@ type AutoSyncDictionaryInfo = {
|
|||||||
|
|
||||||
export interface CharacterDictionaryAutoSyncConfig {
|
export interface CharacterDictionaryAutoSyncConfig {
|
||||||
enabled: boolean;
|
enabled: boolean;
|
||||||
refreshTtlHours: number;
|
|
||||||
maxLoaded: number;
|
maxLoaded: number;
|
||||||
evictionPolicy: AnilistCharacterDictionaryEvictionPolicy;
|
|
||||||
profileScope: AnilistCharacterDictionaryProfileScope;
|
profileScope: AnilistCharacterDictionaryProfileScope;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface CharacterDictionaryAutoSyncRuntimeDeps {
|
export interface CharacterDictionaryAutoSyncRuntimeDeps {
|
||||||
userDataPath: string;
|
userDataPath: string;
|
||||||
getConfig: () => CharacterDictionaryAutoSyncConfig;
|
getConfig: () => CharacterDictionaryAutoSyncConfig;
|
||||||
generateCharacterDictionary: (
|
getOrCreateCurrentSnapshot: (targetPath?: string) => Promise<CharacterDictionarySnapshotResult>;
|
||||||
options?: CharacterDictionaryGenerateOptions,
|
buildMergedDictionary: (mediaIds: number[]) => Promise<MergedCharacterDictionaryBuildResult>;
|
||||||
) => Promise<CharacterDictionaryBuildResult>;
|
|
||||||
getYomitanDictionaryInfo: () => Promise<AutoSyncDictionaryInfo[]>;
|
getYomitanDictionaryInfo: () => Promise<AutoSyncDictionaryInfo[]>;
|
||||||
importYomitanDictionary: (zipPath: string) => Promise<boolean>;
|
importYomitanDictionary: (zipPath: string) => Promise<boolean>;
|
||||||
deleteYomitanDictionary: (dictionaryTitle: string) => Promise<boolean>;
|
deleteYomitanDictionary: (dictionaryTitle: string) => Promise<boolean>;
|
||||||
@@ -47,11 +35,6 @@ export interface CharacterDictionaryAutoSyncRuntimeDeps {
|
|||||||
dictionaryTitle: string,
|
dictionaryTitle: string,
|
||||||
profileScope: AnilistCharacterDictionaryProfileScope,
|
profileScope: AnilistCharacterDictionaryProfileScope,
|
||||||
) => Promise<boolean>;
|
) => Promise<boolean>;
|
||||||
removeYomitanDictionarySettings: (
|
|
||||||
dictionaryTitle: string,
|
|
||||||
profileScope: AnilistCharacterDictionaryProfileScope,
|
|
||||||
mode: 'delete' | 'disable',
|
|
||||||
) => Promise<boolean>;
|
|
||||||
now: () => number;
|
now: () => number;
|
||||||
schedule?: (fn: () => void, delayMs: number) => ReturnType<typeof setTimeout>;
|
schedule?: (fn: () => void, delayMs: number) => ReturnType<typeof setTimeout>;
|
||||||
clearSchedule?: (timer: ReturnType<typeof setTimeout>) => void;
|
clearSchedule?: (timer: ReturnType<typeof setTimeout>) => void;
|
||||||
@@ -70,56 +53,29 @@ function readAutoSyncState(statePath: string): AutoSyncState {
|
|||||||
try {
|
try {
|
||||||
const raw = fs.readFileSync(statePath, 'utf8');
|
const raw = fs.readFileSync(statePath, 'utf8');
|
||||||
const parsed = JSON.parse(raw) as Partial<AutoSyncState>;
|
const parsed = JSON.parse(raw) as Partial<AutoSyncState>;
|
||||||
if (!parsed || typeof parsed !== 'object') {
|
const activeMediaIds = Array.isArray(parsed.activeMediaIds)
|
||||||
return { activeMediaIds: [], dictionariesByMediaId: {} };
|
? parsed.activeMediaIds
|
||||||
}
|
|
||||||
const dictionariesByMediaId = parsed.dictionariesByMediaId ?? {};
|
|
||||||
if (!dictionariesByMediaId || typeof dictionariesByMediaId !== 'object') {
|
|
||||||
return { activeMediaIds: [], dictionariesByMediaId: {} };
|
|
||||||
}
|
|
||||||
|
|
||||||
const normalizedEntries: Record<string, AutoSyncStateDictionaryEntry> = {};
|
|
||||||
for (const [key, value] of Object.entries(dictionariesByMediaId)) {
|
|
||||||
if (!value || typeof value !== 'object') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const mediaId = Number.parseInt(key, 10);
|
|
||||||
const dictionaryTitle =
|
|
||||||
typeof (value as { dictionaryTitle?: unknown }).dictionaryTitle === 'string'
|
|
||||||
? (value as { dictionaryTitle: string }).dictionaryTitle.trim()
|
|
||||||
: '';
|
|
||||||
if (!Number.isFinite(mediaId) || mediaId <= 0 || !dictionaryTitle) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const lastImportedRevisionRaw = (value as { lastImportedRevision?: unknown })
|
|
||||||
.lastImportedRevision;
|
|
||||||
const lastUsedAtRaw = (value as { lastUsedAt?: unknown }).lastUsedAt;
|
|
||||||
normalizedEntries[String(mediaId)] = {
|
|
||||||
mediaId,
|
|
||||||
dictionaryTitle,
|
|
||||||
lastImportedRevision:
|
|
||||||
typeof lastImportedRevisionRaw === 'string' && lastImportedRevisionRaw.length > 0
|
|
||||||
? lastImportedRevisionRaw
|
|
||||||
: null,
|
|
||||||
lastUsedAt:
|
|
||||||
typeof lastUsedAtRaw === 'number' && Number.isFinite(lastUsedAtRaw) ? lastUsedAtRaw : 0,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const activeMediaIdsRaw = Array.isArray(parsed.activeMediaIds) ? parsed.activeMediaIds : [];
|
|
||||||
const activeMediaIds = activeMediaIdsRaw
|
|
||||||
.filter((value): value is number => typeof value === 'number' && Number.isFinite(value))
|
.filter((value): value is number => typeof value === 'number' && Number.isFinite(value))
|
||||||
.map((value) => Math.max(1, Math.floor(value)))
|
.map((value) => Math.max(1, Math.floor(value)))
|
||||||
.filter((value, index, all) => all.indexOf(value) === index)
|
.filter((value, index, all) => all.indexOf(value) === index)
|
||||||
.filter((value) => normalizedEntries[String(value)] !== undefined);
|
: [];
|
||||||
|
|
||||||
return {
|
return {
|
||||||
activeMediaIds,
|
activeMediaIds,
|
||||||
dictionariesByMediaId: normalizedEntries,
|
mergedRevision:
|
||||||
|
typeof parsed.mergedRevision === 'string' && parsed.mergedRevision.length > 0
|
||||||
|
? parsed.mergedRevision
|
||||||
|
: null,
|
||||||
|
mergedDictionaryTitle:
|
||||||
|
typeof parsed.mergedDictionaryTitle === 'string' && parsed.mergedDictionaryTitle.length > 0
|
||||||
|
? parsed.mergedDictionaryTitle
|
||||||
|
: null,
|
||||||
};
|
};
|
||||||
} catch {
|
} catch {
|
||||||
return { activeMediaIds: [], dictionariesByMediaId: {} };
|
return {
|
||||||
|
activeMediaIds: [],
|
||||||
|
mergedRevision: null,
|
||||||
|
mergedDictionaryTitle: null,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -128,8 +84,12 @@ function writeAutoSyncState(statePath: string, state: AutoSyncState): void {
|
|||||||
fs.writeFileSync(statePath, JSON.stringify(state, null, 2), 'utf8');
|
fs.writeFileSync(statePath, JSON.stringify(state, null, 2), 'utf8');
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildDictionaryTitle(mediaId: number): string {
|
function arraysEqual(left: number[], right: number[]): boolean {
|
||||||
return `SubMiner Character Dictionary (AniList ${mediaId})`;
|
if (left.length !== right.length) return false;
|
||||||
|
for (let i = 0; i < left.length; i += 1) {
|
||||||
|
if (left[i] !== right[i]) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createCharacterDictionaryAutoSyncRuntimeService(
|
export function createCharacterDictionaryAutoSyncRuntimeService(
|
||||||
@@ -173,15 +133,30 @@ export function createCharacterDictionaryAutoSyncRuntimeService(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const refreshTtlMs = Math.max(1, Math.floor(config.refreshTtlHours)) * 60 * 60 * 1000;
|
const snapshot = await deps.getOrCreateCurrentSnapshot();
|
||||||
const generation = await deps.generateCharacterDictionary({ refreshTtlMs });
|
|
||||||
const dictionaryTitle = generation.dictionaryTitle ?? buildDictionaryTitle(generation.mediaId);
|
|
||||||
const revision =
|
|
||||||
typeof generation.revision === 'string' && generation.revision.length > 0
|
|
||||||
? generation.revision
|
|
||||||
: null;
|
|
||||||
|
|
||||||
const state = readAutoSyncState(statePath);
|
const state = readAutoSyncState(statePath);
|
||||||
|
const nextActiveMediaIds = [
|
||||||
|
snapshot.mediaId,
|
||||||
|
...state.activeMediaIds.filter((mediaId) => mediaId !== snapshot.mediaId),
|
||||||
|
].slice(0, Math.max(1, Math.floor(config.maxLoaded)));
|
||||||
|
|
||||||
|
const retainedChanged = !arraysEqual(nextActiveMediaIds, state.activeMediaIds);
|
||||||
|
let merged: MergedCharacterDictionaryBuildResult | null = null;
|
||||||
|
if (
|
||||||
|
retainedChanged ||
|
||||||
|
!state.mergedRevision ||
|
||||||
|
!state.mergedDictionaryTitle ||
|
||||||
|
!snapshot.fromCache
|
||||||
|
) {
|
||||||
|
merged = await deps.buildMergedDictionary(nextActiveMediaIds);
|
||||||
|
}
|
||||||
|
|
||||||
|
const dictionaryTitle = merged?.dictionaryTitle ?? state.mergedDictionaryTitle;
|
||||||
|
const revision = merged?.revision ?? state.mergedRevision;
|
||||||
|
if (!dictionaryTitle || !revision) {
|
||||||
|
throw new Error('Merged character dictionary state is incomplete.');
|
||||||
|
}
|
||||||
|
|
||||||
const dictionaryInfo = await withOperationTimeout(
|
const dictionaryInfo = await withOperationTimeout(
|
||||||
'getYomitanDictionaryInfo',
|
'getYomitanDictionaryInfo',
|
||||||
deps.getYomitanDictionaryInfo(),
|
deps.getYomitanDictionaryInfo(),
|
||||||
@@ -192,7 +167,7 @@ export function createCharacterDictionaryAutoSyncRuntimeService(
|
|||||||
? String(existing.revision)
|
? String(existing.revision)
|
||||||
: null;
|
: null;
|
||||||
const shouldImport =
|
const shouldImport =
|
||||||
existing === null || (revision !== null && existingRevision !== revision);
|
merged !== null || existing === null || existingRevision === null || existingRevision !== revision;
|
||||||
|
|
||||||
if (shouldImport) {
|
if (shouldImport) {
|
||||||
if (existing !== null) {
|
if (existing !== null) {
|
||||||
@@ -201,15 +176,16 @@ export function createCharacterDictionaryAutoSyncRuntimeService(
|
|||||||
deps.deleteYomitanDictionary(dictionaryTitle),
|
deps.deleteYomitanDictionary(dictionaryTitle),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
deps.logInfo?.(
|
if (merged === null) {
|
||||||
`[dictionary:auto-sync] importing AniList ${generation.mediaId}: ${generation.zipPath}`,
|
merged = await deps.buildMergedDictionary(nextActiveMediaIds);
|
||||||
);
|
}
|
||||||
|
deps.logInfo?.(`[dictionary:auto-sync] importing merged dictionary: ${merged.zipPath}`);
|
||||||
const imported = await withOperationTimeout(
|
const imported = await withOperationTimeout(
|
||||||
`importYomitanDictionary(${path.basename(generation.zipPath)})`,
|
`importYomitanDictionary(${path.basename(merged.zipPath)})`,
|
||||||
deps.importYomitanDictionary(generation.zipPath),
|
deps.importYomitanDictionary(merged.zipPath),
|
||||||
);
|
);
|
||||||
if (!imported) {
|
if (!imported) {
|
||||||
throw new Error(`Failed to import dictionary ZIP: ${generation.zipPath}`);
|
throw new Error(`Failed to import dictionary ZIP: ${merged.zipPath}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,49 +194,13 @@ export function createCharacterDictionaryAutoSyncRuntimeService(
|
|||||||
deps.upsertYomitanDictionarySettings(dictionaryTitle, config.profileScope),
|
deps.upsertYomitanDictionarySettings(dictionaryTitle, config.profileScope),
|
||||||
);
|
);
|
||||||
|
|
||||||
const mediaIdKey = String(generation.mediaId);
|
writeAutoSyncState(statePath, {
|
||||||
state.dictionariesByMediaId[mediaIdKey] = {
|
activeMediaIds: nextActiveMediaIds,
|
||||||
mediaId: generation.mediaId,
|
mergedRevision: merged?.revision ?? revision,
|
||||||
dictionaryTitle,
|
mergedDictionaryTitle: merged?.dictionaryTitle ?? dictionaryTitle,
|
||||||
lastImportedRevision: revision,
|
});
|
||||||
lastUsedAt: deps.now(),
|
|
||||||
};
|
|
||||||
state.activeMediaIds = [
|
|
||||||
generation.mediaId,
|
|
||||||
...state.activeMediaIds.filter((value) => value !== generation.mediaId),
|
|
||||||
];
|
|
||||||
|
|
||||||
const maxLoaded = Math.max(1, Math.floor(config.maxLoaded));
|
|
||||||
while (state.activeMediaIds.length > maxLoaded) {
|
|
||||||
const evictedMediaId = state.activeMediaIds.pop();
|
|
||||||
if (evictedMediaId === undefined) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
const evicted = state.dictionariesByMediaId[String(evictedMediaId)];
|
|
||||||
if (!evicted) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
await withOperationTimeout(
|
|
||||||
`removeYomitanDictionarySettings(${evicted.dictionaryTitle})`,
|
|
||||||
deps.removeYomitanDictionarySettings(
|
|
||||||
evicted.dictionaryTitle,
|
|
||||||
config.profileScope,
|
|
||||||
config.evictionPolicy,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
if (config.evictionPolicy === 'delete') {
|
|
||||||
await withOperationTimeout(
|
|
||||||
`deleteYomitanDictionary(${evicted.dictionaryTitle})`,
|
|
||||||
deps.deleteYomitanDictionary(evicted.dictionaryTitle),
|
|
||||||
);
|
|
||||||
delete state.dictionariesByMediaId[String(evictedMediaId)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
writeAutoSyncState(statePath, state);
|
|
||||||
deps.logInfo?.(
|
deps.logInfo?.(
|
||||||
`[dictionary:auto-sync] synced AniList ${generation.mediaId}: ${dictionaryTitle} (${generation.entryCount} entries)`,
|
`[dictionary:auto-sync] synced AniList ${snapshot.mediaId}: ${dictionaryTitle} (${snapshot.entryCount} entries)`,
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -9,12 +9,114 @@ import {
|
|||||||
alignTokensToSourceText,
|
alignTokensToSourceText,
|
||||||
buildSubtitleTokenHoverRanges,
|
buildSubtitleTokenHoverRanges,
|
||||||
computeWordClass,
|
computeWordClass,
|
||||||
|
createSubtitleRenderer,
|
||||||
getFrequencyRankLabelForToken,
|
getFrequencyRankLabelForToken,
|
||||||
getJlptLevelLabelForToken,
|
getJlptLevelLabelForToken,
|
||||||
normalizeSubtitle,
|
normalizeSubtitle,
|
||||||
sanitizeSubtitleHoverTokenColor,
|
sanitizeSubtitleHoverTokenColor,
|
||||||
shouldRenderTokenizedSubtitle,
|
shouldRenderTokenizedSubtitle,
|
||||||
} from './subtitle-render.js';
|
} from './subtitle-render.js';
|
||||||
|
import { createRendererState } from './state.js';
|
||||||
|
|
||||||
|
class FakeTextNode {
|
||||||
|
constructor(public textContent: string) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
class FakeDocumentFragment {
|
||||||
|
childNodes: Array<FakeElement | FakeTextNode> = [];
|
||||||
|
|
||||||
|
appendChild(
|
||||||
|
child: FakeElement | FakeTextNode | FakeDocumentFragment,
|
||||||
|
): FakeElement | FakeTextNode | FakeDocumentFragment {
|
||||||
|
if (child instanceof FakeDocumentFragment) {
|
||||||
|
this.childNodes.push(...child.childNodes);
|
||||||
|
child.childNodes = [];
|
||||||
|
return child;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.childNodes.push(child);
|
||||||
|
return child;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class FakeStyleDeclaration {
|
||||||
|
private values = new Map<string, string>();
|
||||||
|
|
||||||
|
setProperty(name: string, value: string) {
|
||||||
|
this.values.set(name, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class FakeElement {
|
||||||
|
childNodes: Array<FakeElement | FakeTextNode> = [];
|
||||||
|
dataset: Record<string, string> = {};
|
||||||
|
style = new FakeStyleDeclaration();
|
||||||
|
className = '';
|
||||||
|
private ownTextContent = '';
|
||||||
|
|
||||||
|
constructor(public tagName: string) {}
|
||||||
|
|
||||||
|
appendChild(
|
||||||
|
child: FakeElement | FakeTextNode | FakeDocumentFragment,
|
||||||
|
): FakeElement | FakeTextNode | FakeDocumentFragment {
|
||||||
|
if (child instanceof FakeDocumentFragment) {
|
||||||
|
this.childNodes.push(...child.childNodes);
|
||||||
|
child.childNodes = [];
|
||||||
|
return child;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.childNodes.push(child);
|
||||||
|
return child;
|
||||||
|
}
|
||||||
|
|
||||||
|
set textContent(value: string) {
|
||||||
|
this.ownTextContent = value;
|
||||||
|
this.childNodes = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
get textContent(): string {
|
||||||
|
if (this.childNodes.length === 0) {
|
||||||
|
return this.ownTextContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.childNodes
|
||||||
|
.map((child) => (child instanceof FakeTextNode ? child.textContent : child.textContent))
|
||||||
|
.join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
set innerHTML(value: string) {
|
||||||
|
if (value === '') {
|
||||||
|
this.childNodes = [];
|
||||||
|
this.ownTextContent = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function installFakeDocument() {
|
||||||
|
const previousDocument = (globalThis as { document?: unknown }).document;
|
||||||
|
|
||||||
|
Object.defineProperty(globalThis, 'document', {
|
||||||
|
configurable: true,
|
||||||
|
value: {
|
||||||
|
createDocumentFragment: () => new FakeDocumentFragment(),
|
||||||
|
createElement: (tagName: string) => new FakeElement(tagName),
|
||||||
|
createTextNode: (text: string) => new FakeTextNode(text),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
Object.defineProperty(globalThis, 'document', {
|
||||||
|
configurable: true,
|
||||||
|
value: previousDocument,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function collectWordNodes(root: FakeElement): FakeElement[] {
|
||||||
|
return root.childNodes.filter(
|
||||||
|
(child): child is FakeElement => child instanceof FakeElement && child.className.includes('word'),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
function createToken(overrides: Partial<MergedToken>): MergedToken {
|
function createToken(overrides: Partial<MergedToken>): MergedToken {
|
||||||
return {
|
return {
|
||||||
@@ -288,6 +390,16 @@ test('alignTokensToSourceText treats whitespace-only token surfaces as plain tex
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('alignTokensToSourceText preserves unsupported punctuation between matched tokens', () => {
|
||||||
|
const tokens = [createToken({ surface: 'えっ' }), createToken({ surface: 'マジ' })];
|
||||||
|
|
||||||
|
const segments = alignTokensToSourceText(tokens, 'えっ!?マジ');
|
||||||
|
assert.deepEqual(
|
||||||
|
segments.map((segment) => (segment.kind === 'text' ? `text:${segment.text}` : 'token')),
|
||||||
|
['token', 'text:!?', 'token'],
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
test('alignTokensToSourceText avoids duplicate tail when later token surface does not match source', () => {
|
test('alignTokensToSourceText avoids duplicate tail when later token surface does not match source', () => {
|
||||||
const tokens = [
|
const tokens = [
|
||||||
createToken({ surface: '君たちが潰した拠点に' }),
|
createToken({ surface: '君たちが潰した拠点に' }),
|
||||||
@@ -327,6 +439,55 @@ test('buildSubtitleTokenHoverRanges ignores unmatched token surfaces', () => {
|
|||||||
assert.deepEqual(ranges, [{ start: 0, end: 10, tokenIndex: 0 }]);
|
assert.deepEqual(ranges, [{ start: 0, end: 10, tokenIndex: 0 }]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('buildSubtitleTokenHoverRanges skips unsupported punctuation while preserving later offsets', () => {
|
||||||
|
const tokens = [createToken({ surface: 'えっ' }), createToken({ surface: 'マジ' })];
|
||||||
|
|
||||||
|
const ranges = buildSubtitleTokenHoverRanges(tokens, 'えっ!?マジ');
|
||||||
|
assert.deepEqual(ranges, [
|
||||||
|
{ start: 0, end: 2, tokenIndex: 0 },
|
||||||
|
{ start: 4, end: 6, tokenIndex: 1 },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('renderSubtitle preserves unsupported punctuation while keeping it non-interactive', () => {
|
||||||
|
const restoreDocument = installFakeDocument();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const subtitleRoot = new FakeElement('div');
|
||||||
|
const renderer = createSubtitleRenderer({
|
||||||
|
dom: {
|
||||||
|
subtitleRoot,
|
||||||
|
subtitleContainer: new FakeElement('div'),
|
||||||
|
secondarySubRoot: new FakeElement('div'),
|
||||||
|
secondarySubContainer: new FakeElement('div'),
|
||||||
|
},
|
||||||
|
platform: {
|
||||||
|
isMacOSPlatform: false,
|
||||||
|
isModalLayer: false,
|
||||||
|
overlayLayer: 'visible',
|
||||||
|
shouldToggleMouseIgnore: false,
|
||||||
|
},
|
||||||
|
state: createRendererState(),
|
||||||
|
} as never);
|
||||||
|
|
||||||
|
renderer.renderSubtitle({
|
||||||
|
text: 'えっ!?マジ',
|
||||||
|
tokens: [createToken({ surface: 'えっ' }), createToken({ surface: 'マジ' })],
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(subtitleRoot.textContent, 'えっ!?マジ');
|
||||||
|
assert.deepEqual(
|
||||||
|
collectWordNodes(subtitleRoot).map((node) => [node.textContent, node.dataset.tokenIndex]),
|
||||||
|
[
|
||||||
|
['えっ', '0'],
|
||||||
|
['マジ', '1'],
|
||||||
|
],
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
restoreDocument();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
test('normalizeSubtitle collapses explicit line breaks when collapseLineBreaks is enabled', () => {
|
test('normalizeSubtitle collapses explicit line breaks when collapseLineBreaks is enabled', () => {
|
||||||
assert.equal(
|
assert.equal(
|
||||||
normalizeSubtitle('常人が使えば\\Nその圧倒的な力に\\n体が耐えきれず死に至るが…', true, true),
|
normalizeSubtitle('常人が使えば\\Nその圧倒的な力に\\n体が耐えきれず死に至るが…', true, true),
|
||||||
|
|||||||
@@ -241,13 +241,17 @@ function renderWithTokens(
|
|||||||
|
|
||||||
const fragment = document.createDocumentFragment();
|
const fragment = document.createDocumentFragment();
|
||||||
|
|
||||||
if (preserveLineBreaks && sourceText) {
|
if (sourceText) {
|
||||||
const normalizedSource = normalizeSubtitle(sourceText, true, false);
|
const normalizedSource = normalizeSubtitle(sourceText, true, !preserveLineBreaks);
|
||||||
const segments = alignTokensToSourceText(tokens, normalizedSource);
|
const segments = alignTokensToSourceText(tokens, normalizedSource);
|
||||||
|
|
||||||
for (const segment of segments) {
|
for (const segment of segments) {
|
||||||
if (segment.kind === 'text') {
|
if (segment.kind === 'text') {
|
||||||
|
if (preserveLineBreaks) {
|
||||||
renderPlainTextPreserveLineBreaks(fragment, segment.text);
|
renderPlainTextPreserveLineBreaks(fragment, segment.text);
|
||||||
|
} else {
|
||||||
|
fragment.appendChild(document.createTextNode(segment.text));
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user