feat(tokenizer): refine Yomitan grouping and parser tooling

- map segmented Yomitan lines into single logical tokens and improve candidate selection heuristics

- limit frequency lookup to selected token text with POS-based exclusions and add debug logging hook

- add standalone Yomitan parser test script, deterministic utility-script shutdown, and docs/backlog updates
This commit is contained in:
kyasuda
2026-02-16 17:41:24 -08:00
parent 0eb2868805
commit 457e6f0f10
17 changed files with 1667 additions and 293 deletions

View File

@@ -911,6 +911,7 @@ async function tokenizeSubtitle(text: string): Promise<SubtitleData> {
getFrequencyDictionaryEnabled: () =>
getResolvedConfig().subtitleStyle.frequencyDictionary.enabled,
getFrequencyRank: (text) => appState.frequencyRankLookup(text),
getYomitanGroupDebugEnabled: () => appState.overlayDebugVisualizationEnabled,
getMecabTokenizer: () => appState.mecabTokenizer,
}),
);