mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-28 06:22:45 -08:00
feat: source frequency ranks from installed Yomitan dictionaries
This commit is contained in:
@@ -27,7 +27,7 @@ SubMiner is an Electron overlay that sits on top of mpv. It turns your video pla
|
||||
- **Hover to look up** — Yomitan dictionary popups directly on subtitles
|
||||
- **One-key mining** — Creates Anki cards with sentence, audio, screenshot, and translation
|
||||
- **Instant auto-enrichment** — Optional local AnkiConnect proxy enriches new Yomitan cards immediately
|
||||
- **N+1 highlighting** — Marks known words from your Anki deck so unknown ones jump out
|
||||
- **Reading annotations** — Combines N+1 targeting, frequency-dictionary highlighting, and JLPT underlining while you read
|
||||
- **Subtitle tools** — Download from Jimaku, sync with alass/ffsubsync
|
||||
- **Immersion tracking** — SQLite-powered stats on your watch time and mining activity
|
||||
- **Custom texthooker page** — Built-in custom texthooker page and websocket, no extra setup
|
||||
|
||||
@@ -133,7 +133,7 @@
|
||||
}, // Jlpt colors setting.
|
||||
"frequencyDictionary": {
|
||||
"enabled": false, // Enable frequency-dictionary-based highlighting based on token rank. Values: true | false
|
||||
"sourcePath": "", // Optional absolute path to a frequency dictionary directory. If empty, built-in discovery search paths are used.
|
||||
"sourcePath": "", // Optional absolute path to a frequency dictionary directory. If empty, SubMiner searches installed/default frequency-dictionary locations.
|
||||
"topX": 1000, // Only color tokens with frequency rank <= topX (default: 1000).
|
||||
"mode": "single", // single: use one color for all matching tokens. banded: use color ramp by frequency band. Values: single | banded
|
||||
"singleColor": "#f5a97f", // Color used when frequencyDictionary.mode is `single`.
|
||||
|
||||
@@ -757,7 +757,7 @@ See `config.example.jsonc` for detailed configuration options.
|
||||
| `enableJlpt` | boolean | Enable JLPT level underline styling (`false` by default) |
|
||||
| `preserveLineBreaks` | boolean | Preserve line breaks in visible overlay subtitle rendering (`false` by default). Enable to mirror mpv line layout. |
|
||||
| `frequencyDictionary.enabled` | boolean | Enable frequency highlighting from dictionary lookups (`false` by default) |
|
||||
| `frequencyDictionary.sourcePath` | string | Path to a local frequency dictionary root. Leave empty or omit to use the built-in bundled dictionary search paths. |
|
||||
| `frequencyDictionary.sourcePath` | string | Path to a local frequency dictionary root. Leave empty or omit to use installed/default frequency-dictionary search paths. |
|
||||
| `frequencyDictionary.topX` | number | Only color tokens whose frequency rank is `<= topX` (`1000` by default) |
|
||||
| `frequencyDictionary.mode` | string | `"single"` or `"banded"` (`"single"` by default) |
|
||||
| `frequencyDictionary.singleColor` | string | Color used for all highlighted tokens in single mode |
|
||||
@@ -774,7 +774,7 @@ Frequency dictionary highlighting uses the same dictionary file format as JLPT b
|
||||
Lookup behavior:
|
||||
|
||||
- Set `frequencyDictionary.sourcePath` to a directory containing `term_meta_bank_*.json` for a fully custom source.
|
||||
- If `sourcePath` is missing or empty, SubMiner uses bundled defaults from `vendor/jiten_freq_global` (packaged under `<resources>/jiten_freq_global` in distribution builds).
|
||||
- If `sourcePath` is missing or empty, SubMiner searches default install/runtime locations for `frequency-dictionary` directories (for example app resources, user data paths, and current working directory).
|
||||
- In both cases, only terms with a valid `frequencyRank` are used; everything else falls back to no highlighting.
|
||||
|
||||
In `single` mode all highlights use `singleColor`; in `banded` mode tokens map to five ascending color bands from most common to least common inside the topX window.
|
||||
|
||||
@@ -7,7 +7,7 @@ titleTemplate: Immersion Mining Workflow for MPV
|
||||
hero:
|
||||
name: SubMiner
|
||||
text: Immersion Mining for MPV
|
||||
tagline: Watch media, mine vocabulary, and build cards without leaving the scene.
|
||||
tagline: Watch media, mine vocabulary, and craft anki cards without leaving the scene.
|
||||
image:
|
||||
src: /assets/SubMiner.png
|
||||
alt: SubMiner logo
|
||||
@@ -35,16 +35,11 @@ features:
|
||||
alt: Anki card icon
|
||||
title: Anki Card Enrichment
|
||||
details: Auto-fills card fields with subtitle sentence, clipping, image, and translation so you can focus on learning.
|
||||
- icon:
|
||||
src: /assets/dual-layer.svg
|
||||
alt: Dual layer icon
|
||||
title: Unified Overlay Stack
|
||||
details: Primary interactive subtitle layer with a built-in secondary context bar, all in one overlay window.
|
||||
- icon:
|
||||
src: /assets/highlight.svg
|
||||
alt: Highlight icon
|
||||
title: N+1 Highlighting
|
||||
details: Surfaces known words from your deck so unknown targets stand out during immersion sessions.
|
||||
title: Reading Annotations
|
||||
details: Combines N+1 targeting, Jiten frequency highlighting, and JLPT tagging so useful cues stay visible while you read.
|
||||
- icon:
|
||||
src: /assets/tokenization.svg
|
||||
alt: Tokenization icon
|
||||
@@ -55,16 +50,6 @@ features:
|
||||
alt: Subtitle download icon
|
||||
title: Subtitle Download & Sync
|
||||
details: Pull and synchronize subtitles with Jimaku plus alass/ffsubsync in one cohesive workflow.
|
||||
- icon:
|
||||
src: /assets/keyboard.svg
|
||||
alt: Keyboard icon
|
||||
title: Keyboard-Driven
|
||||
details: Run lookups, mining actions, clipping, and workflow toggles with one configurable shortcut surface.
|
||||
- icon:
|
||||
src: /assets/texthooker.svg
|
||||
alt: Texthooker icon
|
||||
title: Texthooker & WebSocket
|
||||
details: Stream subtitles in real time to browser tools via local WebSocket and keep your stack integrated.
|
||||
---
|
||||
|
||||
<script setup>
|
||||
|
||||
@@ -26,7 +26,7 @@ The expected files are:
|
||||
|
||||
Each bank maps terms to frequency metadata; only entries with a `frequency.displayValue` are considered for JLPT tagging.
|
||||
|
||||
SubMiner also reuses the same `term_meta_bank_*.json` format for frequency-based subtitle highlighting. The default frequency source is now bundled as `vendor/jiten_freq_global`, so users can enable `subtitleStyle.frequencyDictionary` without extra setup.
|
||||
SubMiner also reuses the same `term_meta_bank_*.json` format for frequency-based subtitle highlighting, using installed/default `frequency-dictionary` locations or an explicit `subtitleStyle.frequencyDictionary.sourcePath`.
|
||||
|
||||
## Source and update process
|
||||
|
||||
|
||||
@@ -133,7 +133,7 @@
|
||||
}, // Jlpt colors setting.
|
||||
"frequencyDictionary": {
|
||||
"enabled": false, // Enable frequency-dictionary-based highlighting based on token rank. Values: true | false
|
||||
"sourcePath": "", // Optional absolute path to a frequency dictionary directory. If empty, built-in discovery search paths are used.
|
||||
"sourcePath": "", // Optional absolute path to a frequency dictionary directory. If empty, SubMiner searches installed/default frequency-dictionary locations.
|
||||
"topX": 1000, // Only color tokens with frequency rank <= topX (default: 1000).
|
||||
"mode": "single", // single: use one color for all matching tokens. banded: use color ramp by frequency band. Values: single | banded
|
||||
"singleColor": "#f5a97f", // Color used when frequencyDictionary.mode is `single`.
|
||||
|
||||
@@ -119,10 +119,6 @@
|
||||
"from": "vendor/yomitan-jlpt-vocab",
|
||||
"to": "yomitan-jlpt-vocab"
|
||||
},
|
||||
{
|
||||
"from": "vendor/jiten_freq_global",
|
||||
"to": "jiten_freq_global"
|
||||
},
|
||||
{
|
||||
"from": "assets",
|
||||
"to": "assets"
|
||||
|
||||
@@ -33,7 +33,7 @@ interface CliOptions {
|
||||
function parseCliArgs(argv: string[]): CliOptions {
|
||||
const args = [...argv];
|
||||
let inputParts: string[] = [];
|
||||
let dictionaryPath = path.join(process.cwd(), 'vendor', 'jiten_freq_global');
|
||||
let dictionaryPath = path.join(process.cwd(), 'vendor', 'frequency-dictionary');
|
||||
let emitPretty = false;
|
||||
let emitDiagnostics = false;
|
||||
let mecabCommand: string | undefined;
|
||||
@@ -394,7 +394,7 @@ function printUsage(): void {
|
||||
--color-band-5 <#hex> Frequency band-5 color.
|
||||
--color-known <#hex> Known-word color (default: #a6da95).
|
||||
--color-n-plus-one <#hex> N+1 target color (default: #c6a0f6).
|
||||
--dictionary <path> Frequency dictionary root path (default: ./vendor/jiten_freq_global)
|
||||
--dictionary <path> Frequency dictionary root path (default: ./vendor/frequency-dictionary)
|
||||
--mecab-command <path> Optional MeCab binary path (default: mecab)
|
||||
--mecab-dictionary <path> Optional MeCab dictionary directory (default: system default)
|
||||
-h, --help Show usage.
|
||||
|
||||
@@ -79,3 +79,30 @@ test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a sing
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test('createFrequencyDictionaryLookup prefers frequency.value over displayValue', async () => {
|
||||
const logs: string[] = [];
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-'));
|
||||
const bankPath = path.join(tempDir, 'term_meta_bank_1.json');
|
||||
fs.writeFileSync(
|
||||
bankPath,
|
||||
JSON.stringify([
|
||||
['猫', 1, { frequency: { value: 1234, displayValue: 1200 } }],
|
||||
['犬', 2, { frequency: { displayValue: 88 } }],
|
||||
]),
|
||||
);
|
||||
|
||||
const lookup = await createFrequencyDictionaryLookup({
|
||||
searchPaths: [tempDir],
|
||||
log: (message) => {
|
||||
logs.push(message);
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(lookup('猫'), 1234);
|
||||
assert.equal(lookup('犬'), 88);
|
||||
assert.equal(
|
||||
logs.some((entry) => entry.includes('Frequency dictionary loaded from')),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -18,17 +18,14 @@ function normalizeFrequencyTerm(value: string): string {
|
||||
return value.trim().toLowerCase();
|
||||
}
|
||||
|
||||
function extractFrequencyDisplayValue(meta: unknown): number | null {
|
||||
if (!meta || typeof meta !== 'object') return null;
|
||||
const frequency = (meta as { frequency?: unknown }).frequency;
|
||||
if (!frequency || typeof frequency !== 'object') return null;
|
||||
const displayValue = (frequency as { displayValue?: unknown }).displayValue;
|
||||
if (typeof displayValue === 'number') {
|
||||
if (!Number.isFinite(displayValue) || displayValue <= 0) return null;
|
||||
return Math.floor(displayValue);
|
||||
function parsePositiveFrequencyNumber(value: unknown): number | null {
|
||||
if (typeof value === 'number') {
|
||||
if (!Number.isFinite(value) || value <= 0) return null;
|
||||
return Math.floor(value);
|
||||
}
|
||||
if (typeof displayValue === 'string') {
|
||||
const normalized = displayValue.trim().replace(/,/g, '');
|
||||
|
||||
if (typeof value === 'string') {
|
||||
const normalized = value.trim().replace(/,/g, '');
|
||||
const parsed = Number.parseInt(normalized, 10);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) return null;
|
||||
return parsed;
|
||||
@@ -37,6 +34,20 @@ function extractFrequencyDisplayValue(meta: unknown): number | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractFrequencyDisplayValue(meta: unknown): number | null {
|
||||
if (!meta || typeof meta !== 'object') return null;
|
||||
const frequency = (meta as { frequency?: unknown }).frequency;
|
||||
if (!frequency || typeof frequency !== 'object') return null;
|
||||
const rawValue = (frequency as { value?: unknown }).value;
|
||||
const parsedValue = parsePositiveFrequencyNumber(rawValue);
|
||||
if (parsedValue !== null) {
|
||||
return parsedValue;
|
||||
}
|
||||
|
||||
const displayValue = (frequency as { displayValue?: unknown }).displayValue;
|
||||
return parsePositiveFrequencyNumber(displayValue);
|
||||
}
|
||||
|
||||
function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry | null {
|
||||
if (!Array.isArray(entry) || entry.length < 3) {
|
||||
return null;
|
||||
|
||||
@@ -169,6 +169,55 @@ test('tokenizeSubtitle applies frequency dictionary ranks', async () => {
|
||||
assert.equal(result.tokens?.[1]?.frequencyRank, 1200);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle loads frequency ranks from Yomitan installed dictionaries', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'猫',
|
||||
makeDeps({
|
||||
getFrequencyDictionaryEnabled: () => true,
|
||||
getYomitanExt: () => ({ id: 'dummy-ext' }) as any,
|
||||
getYomitanParserWindow: () =>
|
||||
({
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
executeJavaScript: async (script: string) => {
|
||||
if (script.includes('getTermFrequencies')) {
|
||||
return [
|
||||
{
|
||||
term: '猫',
|
||||
reading: 'ねこ',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 77,
|
||||
displayValue: '77',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
source: 'scanning-parser',
|
||||
index: 0,
|
||||
content: [
|
||||
[
|
||||
{
|
||||
text: '猫',
|
||||
reading: 'ねこ',
|
||||
headwords: [[{ term: '猫' }]],
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
];
|
||||
},
|
||||
},
|
||||
}) as unknown as Electron.BrowserWindow,
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, 77);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle uses only selected Yomitan headword for frequency lookup', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'猫です',
|
||||
|
||||
@@ -10,7 +10,10 @@ import {
|
||||
JlptLevel,
|
||||
} from '../../types';
|
||||
import { selectYomitanParseTokens } from './tokenizer/parser-selection-stage';
|
||||
import { requestYomitanParseResults } from './tokenizer/yomitan-parser-runtime';
|
||||
import {
|
||||
requestYomitanParseResults,
|
||||
requestYomitanTermFrequencies,
|
||||
} from './tokenizer/yomitan-parser-runtime';
|
||||
|
||||
const logger = createLogger('main:tokenizer');
|
||||
|
||||
@@ -214,6 +217,64 @@ function logSelectedYomitanGroups(text: string, tokens: MergedToken[]): void {
|
||||
});
|
||||
}
|
||||
|
||||
function normalizePositiveFrequencyRank(value: unknown): number | null {
|
||||
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
|
||||
return null;
|
||||
}
|
||||
return Math.max(1, Math.floor(value));
|
||||
}
|
||||
|
||||
function resolveFrequencyLookupText(token: MergedToken): string {
|
||||
if (token.headword && token.headword.length > 0) {
|
||||
return token.headword;
|
||||
}
|
||||
if (token.reading && token.reading.length > 0) {
|
||||
return token.reading;
|
||||
}
|
||||
return token.surface;
|
||||
}
|
||||
|
||||
function applyYomitanFrequencyRanks(
|
||||
tokens: MergedToken[],
|
||||
frequencies: ReadonlyArray<{ term: string; frequency: number }>,
|
||||
): MergedToken[] {
|
||||
if (tokens.length === 0 || frequencies.length === 0) {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
const rankByTerm = new Map<string, number>();
|
||||
for (const frequency of frequencies) {
|
||||
const normalizedTerm = frequency.term.trim();
|
||||
const rank = normalizePositiveFrequencyRank(frequency.frequency);
|
||||
if (!normalizedTerm || rank === null) {
|
||||
continue;
|
||||
}
|
||||
const current = rankByTerm.get(normalizedTerm);
|
||||
if (current === undefined || rank < current) {
|
||||
rankByTerm.set(normalizedTerm, rank);
|
||||
}
|
||||
}
|
||||
|
||||
if (rankByTerm.size === 0) {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
return tokens.map((token) => {
|
||||
const lookupText = resolveFrequencyLookupText(token).trim();
|
||||
if (!lookupText) {
|
||||
return token;
|
||||
}
|
||||
const rank = rankByTerm.get(lookupText);
|
||||
if (rank === undefined) {
|
||||
return token;
|
||||
}
|
||||
return {
|
||||
...token,
|
||||
frequencyRank: rank,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOptions {
|
||||
return {
|
||||
nPlusOneEnabled: deps.getNPlusOneEnabled?.() !== false,
|
||||
@@ -246,14 +307,24 @@ async function parseWithYomitanInternalParser(
|
||||
logSelectedYomitanGroups(text, selectedTokens);
|
||||
}
|
||||
|
||||
let tokensWithFrequency = selectedTokens;
|
||||
if (options.frequencyEnabled) {
|
||||
const termReadingList = selectedTokens.map((token) => ({
|
||||
term: resolveFrequencyLookupText(token),
|
||||
reading: token.reading && token.reading.trim().length > 0 ? token.reading.trim() : null,
|
||||
}));
|
||||
const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger);
|
||||
tokensWithFrequency = applyYomitanFrequencyRanks(selectedTokens, yomitanFrequencies);
|
||||
}
|
||||
|
||||
if (!needsMecabPosEnrichment(options)) {
|
||||
return selectedTokens;
|
||||
return tokensWithFrequency;
|
||||
}
|
||||
|
||||
try {
|
||||
const mecabTokens = await deps.tokenizeWithMecab(text);
|
||||
const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync;
|
||||
return await enrichTokensWithMecab(selectedTokens, mecabTokens);
|
||||
return await enrichTokensWithMecab(tokensWithFrequency, mecabTokens);
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
logger.warn(
|
||||
@@ -262,7 +333,7 @@ async function parseWithYomitanInternalParser(
|
||||
`tokenCount=${selectedTokens.length}`,
|
||||
`textLength=${text.length}`,
|
||||
);
|
||||
return selectedTokens;
|
||||
return tokensWithFrequency;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -95,6 +95,27 @@ test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 ex
|
||||
assert.deepEqual(lookupCalls, ['猫']);
|
||||
});
|
||||
|
||||
test('annotateTokens preserves existing frequency rank when lookup is unavailable', () => {
|
||||
const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: 42 })];
|
||||
|
||||
const result = annotateTokens(tokens, makeDeps({ getFrequencyRank: undefined }));
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, 42);
|
||||
});
|
||||
|
||||
test('annotateTokens prefers existing frequency rank over fallback lookup', () => {
|
||||
const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: 42 })];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
getFrequencyRank: () => 9,
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, 42);
|
||||
});
|
||||
|
||||
test('annotateTokens handles JLPT disabled and eligibility exclusion paths', () => {
|
||||
let disabledLookupCalls = 0;
|
||||
const disabledResult = annotateTokens(
|
||||
@@ -157,3 +178,38 @@ test('annotateTokens N+1 handoff marks expected target when threshold is satisfi
|
||||
assert.equal(result[1]?.isNPlusOneTarget, true);
|
||||
assert.equal(result[2]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens N+1 minimum sentence words counts only eligible word tokens', () => {
|
||||
const tokens = [
|
||||
makeToken({ surface: '猫', headword: '猫', startPos: 0, endPos: 1 }),
|
||||
makeToken({
|
||||
surface: 'が',
|
||||
headword: 'が',
|
||||
partOfSpeech: PartOfSpeech.particle,
|
||||
pos1: '助詞',
|
||||
startPos: 1,
|
||||
endPos: 2,
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'です',
|
||||
headword: 'です',
|
||||
partOfSpeech: PartOfSpeech.bound_auxiliary,
|
||||
pos1: '助動詞',
|
||||
startPos: 2,
|
||||
endPos: 4,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'が' || text === 'です',
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 3 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[1]?.isKnown, true);
|
||||
assert.equal(result[2]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
@@ -141,6 +141,11 @@ function applyFrequencyMarking(
|
||||
return { ...token, frequencyRank: undefined };
|
||||
}
|
||||
|
||||
if (typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)) {
|
||||
const rank = Math.max(1, Math.floor(token.frequencyRank));
|
||||
return { ...token, frequencyRank: rank };
|
||||
}
|
||||
|
||||
const lookupTexts = getFrequencyLookupTextCandidates(token);
|
||||
if (lookupTexts.length === 0) {
|
||||
return { ...token, frequencyRank: undefined };
|
||||
@@ -354,6 +359,14 @@ export function annotateTokens(
|
||||
const frequencyMarkedTokens =
|
||||
frequencyEnabled && deps.getFrequencyRank
|
||||
? applyFrequencyMarking(knownMarkedTokens, deps.getFrequencyRank)
|
||||
: frequencyEnabled
|
||||
? knownMarkedTokens.map((token) => ({
|
||||
...token,
|
||||
frequencyRank:
|
||||
typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank)
|
||||
? Math.max(1, Math.floor(token.frequencyRank))
|
||||
: undefined,
|
||||
}))
|
||||
: knownMarkedTokens.map((token) => ({
|
||||
...token,
|
||||
frequencyRank: undefined,
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
import { syncYomitanDefaultAnkiServer } from './yomitan-parser-runtime';
|
||||
import {
|
||||
requestYomitanTermFrequencies,
|
||||
syncYomitanDefaultAnkiServer,
|
||||
} from './yomitan-parser-runtime';
|
||||
|
||||
function createDeps(executeJavaScript: (script: string) => Promise<unknown>) {
|
||||
const parserWindow = {
|
||||
@@ -81,3 +84,35 @@ test('syncYomitanDefaultAnkiServer no-ops for empty target url', async () => {
|
||||
assert.equal(updated, false);
|
||||
assert.equal(executeCount, 0);
|
||||
});
|
||||
|
||||
test('requestYomitanTermFrequencies returns normalized frequency entries', async () => {
|
||||
let scriptValue = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
scriptValue = script;
|
||||
return [
|
||||
{
|
||||
term: '猫',
|
||||
reading: 'ねこ',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 77,
|
||||
displayValue: '77',
|
||||
displayValueParsed: true,
|
||||
},
|
||||
{
|
||||
term: 'invalid',
|
||||
dictionary: 'freq-dict',
|
||||
frequency: 0,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
const result = await requestYomitanTermFrequencies([{ term: '猫', reading: 'ねこ' }], deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(result.length, 1);
|
||||
assert.equal(result[0]?.term, '猫');
|
||||
assert.equal(result[0]?.frequency, 77);
|
||||
assert.match(scriptValue, /getTermFrequencies/);
|
||||
assert.match(scriptValue, /optionsGetFull/);
|
||||
});
|
||||
|
||||
@@ -15,6 +15,89 @@ interface YomitanParserRuntimeDeps {
|
||||
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
||||
}
|
||||
|
||||
export interface YomitanTermFrequency {
|
||||
term: string;
|
||||
reading: string | null;
|
||||
dictionary: string;
|
||||
frequency: number;
|
||||
displayValue: string | null;
|
||||
displayValueParsed: boolean;
|
||||
}
|
||||
|
||||
export interface YomitanTermReadingPair {
|
||||
term: string;
|
||||
reading: string | null;
|
||||
}
|
||||
|
||||
function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value && typeof value === 'object');
|
||||
}
|
||||
|
||||
function asPositiveInteger(value: unknown): number | null {
|
||||
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) {
|
||||
return null;
|
||||
}
|
||||
return Math.max(1, Math.floor(value));
|
||||
}
|
||||
|
||||
function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null {
|
||||
if (!isObject(value)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const term = typeof value.term === 'string' ? value.term.trim() : '';
|
||||
const dictionary = typeof value.dictionary === 'string' ? value.dictionary.trim() : '';
|
||||
const frequency = asPositiveInteger(value.frequency);
|
||||
if (!term || !dictionary || frequency === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const reading =
|
||||
value.reading === null
|
||||
? null
|
||||
: typeof value.reading === 'string'
|
||||
? value.reading
|
||||
: null;
|
||||
const displayValue =
|
||||
value.displayValue === null
|
||||
? null
|
||||
: typeof value.displayValue === 'string'
|
||||
? value.displayValue
|
||||
: null;
|
||||
const displayValueParsed = value.displayValueParsed === true;
|
||||
|
||||
return {
|
||||
term,
|
||||
reading,
|
||||
dictionary,
|
||||
frequency,
|
||||
displayValue,
|
||||
displayValueParsed,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeTermReadingList(termReadingList: YomitanTermReadingPair[]): YomitanTermReadingPair[] {
|
||||
const normalized: YomitanTermReadingPair[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const pair of termReadingList) {
|
||||
const term = typeof pair.term === 'string' ? pair.term.trim() : '';
|
||||
if (!term) {
|
||||
continue;
|
||||
}
|
||||
const reading =
|
||||
typeof pair.reading === 'string' && pair.reading.trim().length > 0 ? pair.reading.trim() : null;
|
||||
const key = `${term}\u0000${reading ?? ''}`;
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
normalized.push({ term, reading });
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
async function ensureYomitanParserWindow(
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
@@ -154,6 +237,79 @@ export async function requestYomitanParseResults(
|
||||
}
|
||||
}
|
||||
|
||||
export async function requestYomitanTermFrequencies(
|
||||
termReadingList: YomitanTermReadingPair[],
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<YomitanTermFrequency[]> {
|
||||
const normalizedTermReadingList = normalizeTermReadingList(termReadingList);
|
||||
const yomitanExt = deps.getYomitanExt();
|
||||
if (normalizedTermReadingList.length === 0 || !yomitanExt) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const isReady = await ensureYomitanParserWindow(deps, logger);
|
||||
const parserWindow = deps.getYomitanParserWindow();
|
||||
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const script = `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
const optionsFull = await invoke("optionsGetFull", undefined);
|
||||
const profileIndex = optionsFull.profileCurrent;
|
||||
const dictionariesRaw = optionsFull.profiles?.[profileIndex]?.options?.dictionaries ?? [];
|
||||
const dictionaries = Array.isArray(dictionariesRaw)
|
||||
? dictionariesRaw
|
||||
.filter((entry) => entry && typeof entry === "object" && entry.enabled === true && typeof entry.name === "string")
|
||||
.map((entry) => entry.name)
|
||||
: [];
|
||||
|
||||
if (dictionaries.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return await invoke("getTermFrequencies", {
|
||||
termReadingList: ${JSON.stringify(normalizedTermReadingList)},
|
||||
dictionaries
|
||||
});
|
||||
})();
|
||||
`;
|
||||
|
||||
try {
|
||||
const rawResult = await parserWindow.webContents.executeJavaScript(script, true);
|
||||
if (!Array.isArray(rawResult)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawResult
|
||||
.map((entry) => toYomitanTermFrequency(entry))
|
||||
.filter((entry): entry is YomitanTermFrequency => entry !== null);
|
||||
} catch (err) {
|
||||
logger.error('Yomitan term frequency request failed:', (err as Error).message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
export async function syncYomitanDefaultAnkiServer(
|
||||
serverUrl: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
|
||||
@@ -28,7 +28,7 @@ export function getFrequencyDictionarySearchPaths(
|
||||
|
||||
const rawSearchPaths: string[] = [];
|
||||
// User-provided path takes precedence over bundled/default roots.
|
||||
// Root list should include `vendor/jiten_freq_global` in callers.
|
||||
// Root list should include default installed frequency-dictionary locations in callers.
|
||||
if (sourcePath && sourcePath.trim()) {
|
||||
rawSearchPaths.push(sourcePath.trim());
|
||||
rawSearchPaths.push(path.join(sourcePath.trim(), 'frequency-dictionary'));
|
||||
|
||||
@@ -53,9 +53,9 @@ test('frequency dictionary roots main handler returns expected root list', () =>
|
||||
joinPath: (...parts) => parts.join('/'),
|
||||
})();
|
||||
|
||||
assert.equal(roots.length, 15);
|
||||
assert.equal(roots[0], '/repo/dist/main/../../vendor/jiten_freq_global');
|
||||
assert.equal(roots[14], '/repo');
|
||||
assert.equal(roots.length, 11);
|
||||
assert.equal(roots[0], '/repo/dist/main/../../vendor/frequency-dictionary');
|
||||
assert.equal(roots[10], '/repo');
|
||||
});
|
||||
|
||||
test('frequency dictionary runtime main deps builder maps search paths/source and log prefix', () => {
|
||||
|
||||
@@ -38,13 +38,9 @@ export function createBuildFrequencyDictionaryRootsMainHandler(deps: {
|
||||
joinPath: (...parts: string[]) => string;
|
||||
}) {
|
||||
return () => [
|
||||
deps.joinPath(deps.dirname, '..', '..', 'vendor', 'jiten_freq_global'),
|
||||
deps.joinPath(deps.dirname, '..', '..', 'vendor', 'frequency-dictionary'),
|
||||
deps.joinPath(deps.appPath, 'vendor', 'jiten_freq_global'),
|
||||
deps.joinPath(deps.appPath, 'vendor', 'frequency-dictionary'),
|
||||
deps.joinPath(deps.resourcesPath, 'jiten_freq_global'),
|
||||
deps.joinPath(deps.resourcesPath, 'frequency-dictionary'),
|
||||
deps.joinPath(deps.resourcesPath, 'app.asar', 'vendor', 'jiten_freq_global'),
|
||||
deps.joinPath(deps.resourcesPath, 'app.asar', 'vendor', 'frequency-dictionary'),
|
||||
deps.userDataPath,
|
||||
deps.appUserDataPath,
|
||||
|
||||
1
vendor/jiten_freq_global/index.json
vendored
1
vendor/jiten_freq_global/index.json
vendored
@@ -1 +0,0 @@
|
||||
{"title":"Jiten","format":3,"revision":"Jiten 26-02-16","isUpdatable":true,"indexUrl":"https://api.jiten.moe/api/frequency-list/index","downloadUrl":"https://api.jiten.moe/api/frequency-list/download","sequenced":false,"frequencyMode":"rank-based","author":"Jiten","url":"https://jiten.moe","description":"Dictionary based on frequency data of all media from jiten.moe"}
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user