feat: source frequency ranks from installed Yomitan dictionaries

This commit is contained in:
2026-02-28 03:47:57 -08:00
parent d24283e82d
commit a9f7ea0204
21 changed files with 448 additions and 55 deletions

View File

@@ -27,7 +27,7 @@ SubMiner is an Electron overlay that sits on top of mpv. It turns your video pla
- **Hover to look up** — Yomitan dictionary popups directly on subtitles - **Hover to look up** — Yomitan dictionary popups directly on subtitles
- **One-key mining** — Creates Anki cards with sentence, audio, screenshot, and translation - **One-key mining** — Creates Anki cards with sentence, audio, screenshot, and translation
- **Instant auto-enrichment** — Optional local AnkiConnect proxy enriches new Yomitan cards immediately - **Instant auto-enrichment** — Optional local AnkiConnect proxy enriches new Yomitan cards immediately
- **N+1 highlighting** — Marks known words from your Anki deck so unknown ones jump out - **Reading annotations** — Combines N+1 targeting, frequency-dictionary highlighting, and JLPT underlining while you read
- **Subtitle tools** — Download from Jimaku, sync with alass/ffsubsync - **Subtitle tools** — Download from Jimaku, sync with alass/ffsubsync
- **Immersion tracking** — SQLite-powered stats on your watch time and mining activity - **Immersion tracking** — SQLite-powered stats on your watch time and mining activity
- **Custom texthooker page** — Built-in custom texthooker page and websocket, no extra setup - **Custom texthooker page** — Built-in custom texthooker page and websocket, no extra setup

View File

@@ -133,7 +133,7 @@
}, // Jlpt colors setting. }, // Jlpt colors setting.
"frequencyDictionary": { "frequencyDictionary": {
"enabled": false, // Enable frequency-dictionary-based highlighting based on token rank. Values: true | false "enabled": false, // Enable frequency-dictionary-based highlighting based on token rank. Values: true | false
"sourcePath": "", // Optional absolute path to a frequency dictionary directory. If empty, built-in discovery search paths are used. "sourcePath": "", // Optional absolute path to a frequency dictionary directory. If empty, SubMiner searches installed/default frequency-dictionary locations.
"topX": 1000, // Only color tokens with frequency rank <= topX (default: 1000). "topX": 1000, // Only color tokens with frequency rank <= topX (default: 1000).
"mode": "single", // single: use one color for all matching tokens. banded: use color ramp by frequency band. Values: single | banded "mode": "single", // single: use one color for all matching tokens. banded: use color ramp by frequency band. Values: single | banded
"singleColor": "#f5a97f", // Color used when frequencyDictionary.mode is `single`. "singleColor": "#f5a97f", // Color used when frequencyDictionary.mode is `single`.

View File

@@ -757,7 +757,7 @@ See `config.example.jsonc` for detailed configuration options.
| `enableJlpt` | boolean | Enable JLPT level underline styling (`false` by default) | | `enableJlpt` | boolean | Enable JLPT level underline styling (`false` by default) |
| `preserveLineBreaks` | boolean | Preserve line breaks in visible overlay subtitle rendering (`false` by default). Enable to mirror mpv line layout. | | `preserveLineBreaks` | boolean | Preserve line breaks in visible overlay subtitle rendering (`false` by default). Enable to mirror mpv line layout. |
| `frequencyDictionary.enabled` | boolean | Enable frequency highlighting from dictionary lookups (`false` by default) | | `frequencyDictionary.enabled` | boolean | Enable frequency highlighting from dictionary lookups (`false` by default) |
| `frequencyDictionary.sourcePath` | string | Path to a local frequency dictionary root. Leave empty or omit to use the built-in bundled dictionary search paths. | | `frequencyDictionary.sourcePath` | string | Path to a local frequency dictionary root. Leave empty or omit to use installed/default frequency-dictionary search paths. |
| `frequencyDictionary.topX` | number | Only color tokens whose frequency rank is `<= topX` (`1000` by default) | | `frequencyDictionary.topX` | number | Only color tokens whose frequency rank is `<= topX` (`1000` by default) |
| `frequencyDictionary.mode` | string | `"single"` or `"banded"` (`"single"` by default) | | `frequencyDictionary.mode` | string | `"single"` or `"banded"` (`"single"` by default) |
| `frequencyDictionary.singleColor` | string | Color used for all highlighted tokens in single mode | | `frequencyDictionary.singleColor` | string | Color used for all highlighted tokens in single mode |
@@ -774,7 +774,7 @@ Frequency dictionary highlighting uses the same dictionary file format as JLPT b
Lookup behavior: Lookup behavior:
- Set `frequencyDictionary.sourcePath` to a directory containing `term_meta_bank_*.json` for a fully custom source. - Set `frequencyDictionary.sourcePath` to a directory containing `term_meta_bank_*.json` for a fully custom source.
- If `sourcePath` is missing or empty, SubMiner uses bundled defaults from `vendor/jiten_freq_global` (packaged under `<resources>/jiten_freq_global` in distribution builds). - If `sourcePath` is missing or empty, SubMiner searches default install/runtime locations for `frequency-dictionary` directories (for example app resources, user data paths, and current working directory).
- In both cases, only terms with a valid `frequencyRank` are used; everything else falls back to no highlighting. - In both cases, only terms with a valid `frequencyRank` are used; everything else falls back to no highlighting.
In `single` mode all highlights use `singleColor`; in `banded` mode tokens map to five ascending color bands from most common to least common inside the topX window. In `single` mode all highlights use `singleColor`; in `banded` mode tokens map to five ascending color bands from most common to least common inside the topX window.

View File

@@ -7,7 +7,7 @@ titleTemplate: Immersion Mining Workflow for MPV
hero: hero:
name: SubMiner name: SubMiner
text: Immersion Mining for MPV text: Immersion Mining for MPV
tagline: Watch media, mine vocabulary, and build cards without leaving the scene. tagline: Watch media, mine vocabulary, and craft anki cards without leaving the scene.
image: image:
src: /assets/SubMiner.png src: /assets/SubMiner.png
alt: SubMiner logo alt: SubMiner logo
@@ -35,16 +35,11 @@ features:
alt: Anki card icon alt: Anki card icon
title: Anki Card Enrichment title: Anki Card Enrichment
details: Auto-fills card fields with subtitle sentence, clipping, image, and translation so you can focus on learning. details: Auto-fills card fields with subtitle sentence, clipping, image, and translation so you can focus on learning.
- icon:
src: /assets/dual-layer.svg
alt: Dual layer icon
title: Unified Overlay Stack
details: Primary interactive subtitle layer with a built-in secondary context bar, all in one overlay window.
- icon: - icon:
src: /assets/highlight.svg src: /assets/highlight.svg
alt: Highlight icon alt: Highlight icon
title: N+1 Highlighting title: Reading Annotations
details: Surfaces known words from your deck so unknown targets stand out during immersion sessions. details: Combines N+1 targeting, Jiten frequency highlighting, and JLPT tagging so useful cues stay visible while you read.
- icon: - icon:
src: /assets/tokenization.svg src: /assets/tokenization.svg
alt: Tokenization icon alt: Tokenization icon
@@ -55,16 +50,6 @@ features:
alt: Subtitle download icon alt: Subtitle download icon
title: Subtitle Download & Sync title: Subtitle Download & Sync
details: Pull and synchronize subtitles with Jimaku plus alass/ffsubsync in one cohesive workflow. details: Pull and synchronize subtitles with Jimaku plus alass/ffsubsync in one cohesive workflow.
- icon:
src: /assets/keyboard.svg
alt: Keyboard icon
title: Keyboard-Driven
details: Run lookups, mining actions, clipping, and workflow toggles with one configurable shortcut surface.
- icon:
src: /assets/texthooker.svg
alt: Texthooker icon
title: Texthooker & WebSocket
details: Stream subtitles in real time to browser tools via local WebSocket and keep your stack integrated.
--- ---
<script setup> <script setup>

View File

@@ -26,7 +26,7 @@ The expected files are:
Each bank maps terms to frequency metadata; only entries with a `frequency.displayValue` are considered for JLPT tagging. Each bank maps terms to frequency metadata; only entries with a `frequency.displayValue` are considered for JLPT tagging.
SubMiner also reuses the same `term_meta_bank_*.json` format for frequency-based subtitle highlighting. The default frequency source is now bundled as `vendor/jiten_freq_global`, so users can enable `subtitleStyle.frequencyDictionary` without extra setup. SubMiner also reuses the same `term_meta_bank_*.json` format for frequency-based subtitle highlighting, using installed/default `frequency-dictionary` locations or an explicit `subtitleStyle.frequencyDictionary.sourcePath`.
## Source and update process ## Source and update process

View File

@@ -133,7 +133,7 @@
}, // Jlpt colors setting. }, // Jlpt colors setting.
"frequencyDictionary": { "frequencyDictionary": {
"enabled": false, // Enable frequency-dictionary-based highlighting based on token rank. Values: true | false "enabled": false, // Enable frequency-dictionary-based highlighting based on token rank. Values: true | false
"sourcePath": "", // Optional absolute path to a frequency dictionary directory. If empty, built-in discovery search paths are used. "sourcePath": "", // Optional absolute path to a frequency dictionary directory. If empty, SubMiner searches installed/default frequency-dictionary locations.
"topX": 1000, // Only color tokens with frequency rank <= topX (default: 1000). "topX": 1000, // Only color tokens with frequency rank <= topX (default: 1000).
"mode": "single", // single: use one color for all matching tokens. banded: use color ramp by frequency band. Values: single | banded "mode": "single", // single: use one color for all matching tokens. banded: use color ramp by frequency band. Values: single | banded
"singleColor": "#f5a97f", // Color used when frequencyDictionary.mode is `single`. "singleColor": "#f5a97f", // Color used when frequencyDictionary.mode is `single`.

View File

@@ -119,10 +119,6 @@
"from": "vendor/yomitan-jlpt-vocab", "from": "vendor/yomitan-jlpt-vocab",
"to": "yomitan-jlpt-vocab" "to": "yomitan-jlpt-vocab"
}, },
{
"from": "vendor/jiten_freq_global",
"to": "jiten_freq_global"
},
{ {
"from": "assets", "from": "assets",
"to": "assets" "to": "assets"

View File

@@ -33,7 +33,7 @@ interface CliOptions {
function parseCliArgs(argv: string[]): CliOptions { function parseCliArgs(argv: string[]): CliOptions {
const args = [...argv]; const args = [...argv];
let inputParts: string[] = []; let inputParts: string[] = [];
let dictionaryPath = path.join(process.cwd(), 'vendor', 'jiten_freq_global'); let dictionaryPath = path.join(process.cwd(), 'vendor', 'frequency-dictionary');
let emitPretty = false; let emitPretty = false;
let emitDiagnostics = false; let emitDiagnostics = false;
let mecabCommand: string | undefined; let mecabCommand: string | undefined;
@@ -394,7 +394,7 @@ function printUsage(): void {
--color-band-5 <#hex> Frequency band-5 color. --color-band-5 <#hex> Frequency band-5 color.
--color-known <#hex> Known-word color (default: #a6da95). --color-known <#hex> Known-word color (default: #a6da95).
--color-n-plus-one <#hex> N+1 target color (default: #c6a0f6). --color-n-plus-one <#hex> N+1 target color (default: #c6a0f6).
--dictionary <path> Frequency dictionary root path (default: ./vendor/jiten_freq_global) --dictionary <path> Frequency dictionary root path (default: ./vendor/frequency-dictionary)
--mecab-command <path> Optional MeCab binary path (default: mecab) --mecab-command <path> Optional MeCab binary path (default: mecab)
--mecab-dictionary <path> Optional MeCab dictionary directory (default: system default) --mecab-dictionary <path> Optional MeCab dictionary directory (default: system default)
-h, --help Show usage. -h, --help Show usage.

View File

@@ -79,3 +79,30 @@ test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a sing
false, false,
); );
}); });
test('createFrequencyDictionaryLookup prefers frequency.value over displayValue', async () => {
const logs: string[] = [];
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
fs.writeFileSync(
bankPath,
JSON.stringify([
['猫', 1, { frequency: { value: 1234, displayValue: 1200 } }],
['犬', 2, { frequency: { displayValue: 88 } }],
]),
);
const lookup = await createFrequencyDictionaryLookup({
searchPaths: [tempDir],
log: (message) => {
logs.push(message);
},
});
assert.equal(lookup('猫'), 1234);
assert.equal(lookup('犬'), 88);
assert.equal(
logs.some((entry) => entry.includes('Frequency dictionary loaded from')),
true,
);
});

View File

@@ -18,17 +18,14 @@ function normalizeFrequencyTerm(value: string): string {
return value.trim().toLowerCase(); return value.trim().toLowerCase();
} }
function extractFrequencyDisplayValue(meta: unknown): number | null { function parsePositiveFrequencyNumber(value: unknown): number | null {
if (!meta || typeof meta !== 'object') return null; if (typeof value === 'number') {
const frequency = (meta as { frequency?: unknown }).frequency; if (!Number.isFinite(value) || value <= 0) return null;
if (!frequency || typeof frequency !== 'object') return null; return Math.floor(value);
const displayValue = (frequency as { displayValue?: unknown }).displayValue;
if (typeof displayValue === 'number') {
if (!Number.isFinite(displayValue) || displayValue <= 0) return null;
return Math.floor(displayValue);
} }
if (typeof displayValue === 'string') {
const normalized = displayValue.trim().replace(/,/g, ''); if (typeof value === 'string') {
const normalized = value.trim().replace(/,/g, '');
const parsed = Number.parseInt(normalized, 10); const parsed = Number.parseInt(normalized, 10);
if (!Number.isFinite(parsed) || parsed <= 0) return null; if (!Number.isFinite(parsed) || parsed <= 0) return null;
return parsed; return parsed;
@@ -37,6 +34,20 @@ function extractFrequencyDisplayValue(meta: unknown): number | null {
return null; return null;
} }
function extractFrequencyDisplayValue(meta: unknown): number | null {
if (!meta || typeof meta !== 'object') return null;
const frequency = (meta as { frequency?: unknown }).frequency;
if (!frequency || typeof frequency !== 'object') return null;
const rawValue = (frequency as { value?: unknown }).value;
const parsedValue = parsePositiveFrequencyNumber(rawValue);
if (parsedValue !== null) {
return parsedValue;
}
const displayValue = (frequency as { displayValue?: unknown }).displayValue;
return parsePositiveFrequencyNumber(displayValue);
}
function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry | null { function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry | null {
if (!Array.isArray(entry) || entry.length < 3) { if (!Array.isArray(entry) || entry.length < 3) {
return null; return null;

View File

@@ -169,6 +169,55 @@ test('tokenizeSubtitle applies frequency dictionary ranks', async () => {
assert.equal(result.tokens?.[1]?.frequencyRank, 1200); assert.equal(result.tokens?.[1]?.frequencyRank, 1200);
}); });
test('tokenizeSubtitle loads frequency ranks from Yomitan installed dictionaries', async () => {
const result = await tokenizeSubtitle(
'猫',
makeDeps({
getFrequencyDictionaryEnabled: () => true,
getYomitanExt: () => ({ id: 'dummy-ext' }) as any,
getYomitanParserWindow: () =>
({
isDestroyed: () => false,
webContents: {
executeJavaScript: async (script: string) => {
if (script.includes('getTermFrequencies')) {
return [
{
term: '猫',
reading: 'ねこ',
dictionary: 'freq-dict',
frequency: 77,
displayValue: '77',
displayValueParsed: true,
},
];
}
return [
{
source: 'scanning-parser',
index: 0,
content: [
[
{
text: '猫',
reading: 'ねこ',
headwords: [[{ term: '猫' }]],
},
],
],
},
];
},
},
}) as unknown as Electron.BrowserWindow,
}),
);
assert.equal(result.tokens?.length, 1);
assert.equal(result.tokens?.[0]?.frequencyRank, 77);
});
test('tokenizeSubtitle uses only selected Yomitan headword for frequency lookup', async () => { test('tokenizeSubtitle uses only selected Yomitan headword for frequency lookup', async () => {
const result = await tokenizeSubtitle( const result = await tokenizeSubtitle(
'猫です', '猫です',

View File

@@ -10,7 +10,10 @@ import {
JlptLevel, JlptLevel,
} from '../../types'; } from '../../types';
import { selectYomitanParseTokens } from './tokenizer/parser-selection-stage'; import { selectYomitanParseTokens } from './tokenizer/parser-selection-stage';
import { requestYomitanParseResults } from './tokenizer/yomitan-parser-runtime'; import {
requestYomitanParseResults,
requestYomitanTermFrequencies,
} from './tokenizer/yomitan-parser-runtime';
const logger = createLogger('main:tokenizer'); const logger = createLogger('main:tokenizer');
@@ -214,6 +217,64 @@ function logSelectedYomitanGroups(text: string, tokens: MergedToken[]): void {
}); });
} }
function normalizePositiveFrequencyRank(value: unknown): number | null {
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
return null;
}
return Math.max(1, Math.floor(value));
}
function resolveFrequencyLookupText(token: MergedToken): string {
if (token.headword && token.headword.length > 0) {
return token.headword;
}
if (token.reading && token.reading.length > 0) {
return token.reading;
}
return token.surface;
}
function applyYomitanFrequencyRanks(
tokens: MergedToken[],
frequencies: ReadonlyArray<{ term: string; frequency: number }>,
): MergedToken[] {
if (tokens.length === 0 || frequencies.length === 0) {
return tokens;
}
const rankByTerm = new Map<string, number>();
for (const frequency of frequencies) {
const normalizedTerm = frequency.term.trim();
const rank = normalizePositiveFrequencyRank(frequency.frequency);
if (!normalizedTerm || rank === null) {
continue;
}
const current = rankByTerm.get(normalizedTerm);
if (current === undefined || rank < current) {
rankByTerm.set(normalizedTerm, rank);
}
}
if (rankByTerm.size === 0) {
return tokens;
}
return tokens.map((token) => {
const lookupText = resolveFrequencyLookupText(token).trim();
if (!lookupText) {
return token;
}
const rank = rankByTerm.get(lookupText);
if (rank === undefined) {
return token;
}
return {
...token,
frequencyRank: rank,
};
});
}
function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOptions { function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOptions {
return { return {
nPlusOneEnabled: deps.getNPlusOneEnabled?.() !== false, nPlusOneEnabled: deps.getNPlusOneEnabled?.() !== false,
@@ -246,14 +307,24 @@ async function parseWithYomitanInternalParser(
logSelectedYomitanGroups(text, selectedTokens); logSelectedYomitanGroups(text, selectedTokens);
} }
let tokensWithFrequency = selectedTokens;
if (options.frequencyEnabled) {
const termReadingList = selectedTokens.map((token) => ({
term: resolveFrequencyLookupText(token),
reading: token.reading && token.reading.trim().length > 0 ? token.reading.trim() : null,
}));
const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger);
tokensWithFrequency = applyYomitanFrequencyRanks(selectedTokens, yomitanFrequencies);
}
if (!needsMecabPosEnrichment(options)) { if (!needsMecabPosEnrichment(options)) {
return selectedTokens; return tokensWithFrequency;
} }
try { try {
const mecabTokens = await deps.tokenizeWithMecab(text); const mecabTokens = await deps.tokenizeWithMecab(text);
const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync; const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync;
return await enrichTokensWithMecab(selectedTokens, mecabTokens); return await enrichTokensWithMecab(tokensWithFrequency, mecabTokens);
} catch (err) { } catch (err) {
const error = err as Error; const error = err as Error;
logger.warn( logger.warn(
@@ -262,7 +333,7 @@ async function parseWithYomitanInternalParser(
`tokenCount=${selectedTokens.length}`, `tokenCount=${selectedTokens.length}`,
`textLength=${text.length}`, `textLength=${text.length}`,
); );
return selectedTokens; return tokensWithFrequency;
} }
} }

View File

@@ -95,6 +95,27 @@ test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 ex
assert.deepEqual(lookupCalls, ['猫']); assert.deepEqual(lookupCalls, ['猫']);
}); });
test('annotateTokens preserves existing frequency rank when lookup is unavailable', () => {
const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: 42 })];
const result = annotateTokens(tokens, makeDeps({ getFrequencyRank: undefined }));
assert.equal(result[0]?.frequencyRank, 42);
});
test('annotateTokens prefers existing frequency rank over fallback lookup', () => {
const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: 42 })];
const result = annotateTokens(
tokens,
makeDeps({
getFrequencyRank: () => 9,
}),
);
assert.equal(result[0]?.frequencyRank, 42);
});
test('annotateTokens handles JLPT disabled and eligibility exclusion paths', () => { test('annotateTokens handles JLPT disabled and eligibility exclusion paths', () => {
let disabledLookupCalls = 0; let disabledLookupCalls = 0;
const disabledResult = annotateTokens( const disabledResult = annotateTokens(
@@ -157,3 +178,38 @@ test('annotateTokens N+1 handoff marks expected target when threshold is satisfi
assert.equal(result[1]?.isNPlusOneTarget, true); assert.equal(result[1]?.isNPlusOneTarget, true);
assert.equal(result[2]?.isNPlusOneTarget, false); assert.equal(result[2]?.isNPlusOneTarget, false);
}); });
test('annotateTokens N+1 minimum sentence words counts only eligible word tokens', () => {
const tokens = [
makeToken({ surface: '猫', headword: '猫', startPos: 0, endPos: 1 }),
makeToken({
surface: 'が',
headword: 'が',
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
startPos: 1,
endPos: 2,
}),
makeToken({
surface: 'です',
headword: 'です',
partOfSpeech: PartOfSpeech.bound_auxiliary,
pos1: '助動詞',
startPos: 2,
endPos: 4,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === 'が' || text === 'です',
}),
{ minSentenceWordsForNPlusOne: 3 },
);
assert.equal(result[0]?.isKnown, false);
assert.equal(result[1]?.isKnown, true);
assert.equal(result[2]?.isKnown, true);
assert.equal(result[0]?.isNPlusOneTarget, false);
});

View File

@@ -141,6 +141,11 @@ function applyFrequencyMarking(
return { ...token, frequencyRank: undefined }; return { ...token, frequencyRank: undefined };
} }
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
const rank = Math.max(1, Math.floor(token.frequencyRank));
return { ...token, frequencyRank: rank };
}
const lookupTexts = getFrequencyLookupTextCandidates(token); const lookupTexts = getFrequencyLookupTextCandidates(token);
if (lookupTexts.length === 0) { if (lookupTexts.length === 0) {
return { ...token, frequencyRank: undefined }; return { ...token, frequencyRank: undefined };
@@ -354,6 +359,14 @@ export function annotateTokens(
const frequencyMarkedTokens = const frequencyMarkedTokens =
frequencyEnabled && deps.getFrequencyRank frequencyEnabled && deps.getFrequencyRank
? applyFrequencyMarking(knownMarkedTokens, deps.getFrequencyRank) ? applyFrequencyMarking(knownMarkedTokens, deps.getFrequencyRank)
: frequencyEnabled
? knownMarkedTokens.map((token) => ({
...token,
frequencyRank:
typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)
? Math.max(1, Math.floor(token.frequencyRank))
: undefined,
}))
: knownMarkedTokens.map((token) => ({ : knownMarkedTokens.map((token) => ({
...token, ...token,
frequencyRank: undefined, frequencyRank: undefined,

View File

@@ -1,6 +1,9 @@
import assert from 'node:assert/strict'; import assert from 'node:assert/strict';
import test from 'node:test'; import test from 'node:test';
import { syncYomitanDefaultAnkiServer } from './yomitan-parser-runtime'; import {
requestYomitanTermFrequencies,
syncYomitanDefaultAnkiServer,
} from './yomitan-parser-runtime';
function createDeps(executeJavaScript: (script: string) => Promise<unknown>) { function createDeps(executeJavaScript: (script: string) => Promise<unknown>) {
const parserWindow = { const parserWindow = {
@@ -81,3 +84,35 @@ test('syncYomitanDefaultAnkiServer no-ops for empty target url', async () => {
assert.equal(updated, false); assert.equal(updated, false);
assert.equal(executeCount, 0); assert.equal(executeCount, 0);
}); });
test('requestYomitanTermFrequencies returns normalized frequency entries', async () => {
let scriptValue = '';
const deps = createDeps(async (script) => {
scriptValue = script;
return [
{
term: '猫',
reading: 'ねこ',
dictionary: 'freq-dict',
frequency: 77,
displayValue: '77',
displayValueParsed: true,
},
{
term: 'invalid',
dictionary: 'freq-dict',
frequency: 0,
},
];
});
const result = await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
error: () => undefined,
});
assert.equal(result.length, 1);
assert.equal(result[0]?.term, '猫');
assert.equal(result[0]?.frequency, 77);
assert.match(scriptValue, /getTermFrequencies/);
assert.match(scriptValue, /optionsGetFull/);
});

View File

@@ -15,6 +15,89 @@ interface YomitanParserRuntimeDeps {
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void; setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
} }
export interface YomitanTermFrequency {
term: string;
reading: string | null;
dictionary: string;
frequency: number;
displayValue: string | null;
displayValueParsed: boolean;
}
export interface YomitanTermReadingPair {
term: string;
reading: string | null;
}
function isObject(value: unknown): value is Record<string, unknown> {
return Boolean(value && typeof value === 'object');
}
function asPositiveInteger(value: unknown): number | null {
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
return null;
}
return Math.max(1, Math.floor(value));
}
function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
if (!isObject(value)) {
return null;
}
const term = typeof value.term === 'string' ? value.term.trim() : '';
const dictionary = typeof value.dictionary === 'string' ? value.dictionary.trim() : '';
const frequency = asPositiveInteger(value.frequency);
if (!term || !dictionary || frequency === null) {
return null;
}
const reading =
value.reading === null
? null
: typeof value.reading === 'string'
? value.reading
: null;
const displayValue =
value.displayValue === null
? null
: typeof value.displayValue === 'string'
? value.displayValue
: null;
const displayValueParsed = value.displayValueParsed === true;
return {
term,
reading,
dictionary,
frequency,
displayValue,
displayValueParsed,
};
}
function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): YomitanTermReadingPair[] {
const normalized: YomitanTermReadingPair[] = [];
const seen = new Set<string>();
for (const pair of termReadingList) {
const term = typeof pair.term === 'string' ? pair.term.trim() : '';
if (!term) {
continue;
}
const reading =
typeof pair.reading === 'string' && pair.reading.trim().length > 0 ? pair.reading.trim() : null;
const key = `${term}\u0000${reading ?? ''}`;
if (seen.has(key)) {
continue;
}
seen.add(key);
normalized.push({ term, reading });
}
return normalized;
}
async function ensureYomitanParserWindow( async function ensureYomitanParserWindow(
deps: YomitanParserRuntimeDeps, deps: YomitanParserRuntimeDeps,
logger: LoggerLike, logger: LoggerLike,
@@ -154,6 +237,79 @@ export async function requestYomitanParseResults(
} }
} }
export async function requestYomitanTermFrequencies(
termReadingList: YomitanTermReadingPair[],
deps: YomitanParserRuntimeDeps,
logger: LoggerLike,
): Promise<YomitanTermFrequency[]> {
const normalizedTermReadingList = normalizeTermReadingList(termReadingList);
const yomitanExt = deps.getYomitanExt();
if (normalizedTermReadingList.length === 0 || !yomitanExt) {
return [];
}
const isReady = await ensureYomitanParserWindow(deps, logger);
const parserWindow = deps.getYomitanParserWindow();
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
return [];
}
const script = `
(async () => {
const invoke = (action, params) =>
new Promise((resolve, reject) => {
chrome.runtime.sendMessage({ action, params }, (response) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
return;
}
if (!response || typeof response !== "object") {
reject(new Error("Invalid response from Yomitan backend"));
return;
}
if (response.error) {
reject(new Error(response.error.message || "Yomitan backend error"));
return;
}
resolve(response.result);
});
});
const optionsFull = await invoke("optionsGetFull", undefined);
const profileIndex = optionsFull.profileCurrent;
const dictionariesRaw = optionsFull.profiles?.[profileIndex]?.options?.dictionaries ?? [];
const dictionaries = Array.isArray(dictionariesRaw)
? dictionariesRaw
.filter((entry) => entry && typeof entry === "object" && entry.enabled === true && typeof entry.name === "string")
.map((entry) => entry.name)
: [];
if (dictionaries.length === 0) {
return [];
}
return await invoke("getTermFrequencies", {
termReadingList: ${JSON.stringify(normalizedTermReadingList)},
dictionaries
});
})();
`;
try {
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
if (!Array.isArray(rawResult)) {
return [];
}
return rawResult
.map((entry) => toYomitanTermFrequency(entry))
.filter((entry): entry is YomitanTermFrequency => entry !== null);
} catch (err) {
logger.error('Yomitan term frequency request failed:', (err as Error).message);
return [];
}
}
export async function syncYomitanDefaultAnkiServer( export async function syncYomitanDefaultAnkiServer(
serverUrl: string, serverUrl: string,
deps: YomitanParserRuntimeDeps, deps: YomitanParserRuntimeDeps,

View File

@@ -28,7 +28,7 @@ export function getFrequencyDictionarySearchPaths(
const rawSearchPaths: string[] = []; const rawSearchPaths: string[] = [];
// User-provided path takes precedence over bundled/default roots. // User-provided path takes precedence over bundled/default roots.
// Root list should include `vendor/jiten_freq_global` in callers. // Root list should include default installed frequency-dictionary locations in callers.
if (sourcePath && sourcePath.trim()) { if (sourcePath && sourcePath.trim()) {
rawSearchPaths.push(sourcePath.trim()); rawSearchPaths.push(sourcePath.trim());
rawSearchPaths.push(path.join(sourcePath.trim(), 'frequency-dictionary')); rawSearchPaths.push(path.join(sourcePath.trim(), 'frequency-dictionary'));

View File

@@ -53,9 +53,9 @@ test('frequency dictionary roots main handler returns expected root list', () =>
joinPath: (...parts) => parts.join('/'), joinPath: (...parts) => parts.join('/'),
})(); })();
assert.equal(roots.length, 15); assert.equal(roots.length, 11);
assert.equal(roots[0], '/repo/dist/main/../../vendor/jiten_freq_global'); assert.equal(roots[0], '/repo/dist/main/../../vendor/frequency-dictionary');
assert.equal(roots[14], '/repo'); assert.equal(roots[10], '/repo');
}); });
test('frequency dictionary runtime main deps builder maps search paths/source and log prefix', () => { test('frequency dictionary runtime main deps builder maps search paths/source and log prefix', () => {

View File

@@ -38,13 +38,9 @@ export function createBuildFrequencyDictionaryRootsMainHandler(deps: {
joinPath: (...parts: string[]) => string; joinPath: (...parts: string[]) => string;
}) { }) {
return () => [ return () => [
deps.joinPath(deps.dirname, '..', '..', 'vendor', 'jiten_freq_global'),
deps.joinPath(deps.dirname, '..', '..', 'vendor', 'frequency-dictionary'), deps.joinPath(deps.dirname, '..', '..', 'vendor', 'frequency-dictionary'),
deps.joinPath(deps.appPath, 'vendor', 'jiten_freq_global'),
deps.joinPath(deps.appPath, 'vendor', 'frequency-dictionary'), deps.joinPath(deps.appPath, 'vendor', 'frequency-dictionary'),
deps.joinPath(deps.resourcesPath, 'jiten_freq_global'),
deps.joinPath(deps.resourcesPath, 'frequency-dictionary'), deps.joinPath(deps.resourcesPath, 'frequency-dictionary'),
deps.joinPath(deps.resourcesPath, 'app.asar', 'vendor', 'jiten_freq_global'),
deps.joinPath(deps.resourcesPath, 'app.asar', 'vendor', 'frequency-dictionary'), deps.joinPath(deps.resourcesPath, 'app.asar', 'vendor', 'frequency-dictionary'),
deps.userDataPath, deps.userDataPath,
deps.appUserDataPath, deps.appUserDataPath,

View File

@@ -1 +0,0 @@
{"title":"Jiten","format":3,"revision":"Jiten 26-02-16","isUpdatable":true,"indexUrl":"https://api.jiten.moe/api/frequency-list/index","downloadUrl":"https://api.jiten.moe/api/frequency-list/download","sequenced":false,"frequencyMode":"rank-based","author":"Jiten","url":"https://jiten.moe","description":"Dictionary based on frequency data of all media from jiten.moe"}

File diff suppressed because one or more lines are too long