feat(tokenizer): refine Yomitan grouping and parser tooling

- map segmented Yomitan lines into single logical tokens and improve candidate selection heuristics

- limit frequency lookup to selected token text with POS-based exclusions and add debug logging hook

- add standalone Yomitan parser test script, deterministic utility-script shutdown, and docs/backlog updates
This commit is contained in:
kyasuda
2026-02-16 17:41:24 -08:00
parent 0eb2868805
commit 457e6f0f10
17 changed files with 1667 additions and 293 deletions

View File

@@ -6,6 +6,8 @@
"scripts": {
"get-frequency": "bun run scripts/get_frequency.ts",
"get-frequency:electron": "bun build scripts/get_frequency.ts --format=cjs --target=node --outfile dist/scripts/get_frequency.js --external electron && electron dist/scripts/get_frequency.js",
"test-yomitan-parser": "bun run scripts/test-yomitan-parser.ts",
"test-yomitan-parser:electron": "bun build scripts/test-yomitan-parser.ts --format=cjs --target=node --outfile dist/scripts/test-yomitan-parser.js --external electron && electron dist/scripts/test-yomitan-parser.js",
"build": "tsc && pnpm run build:renderer && cp src/renderer/index.html src/renderer/style.css dist/renderer/ && bash scripts/build-macos-helper.sh",
"build:renderer": "esbuild src/renderer/renderer.ts --bundle --platform=browser --format=esm --target=es2022 --outfile=dist/renderer/renderer.js --sourcemap",
"check:main-lines": "bash scripts/check-main-lines.sh",