diff --git a/config.example.jsonc b/config.example.jsonc index a6429bb..fb67f02 100644 --- a/config.example.jsonc +++ b/config.example.jsonc @@ -136,6 +136,7 @@ "sourcePath": "", // Optional absolute path to a frequency dictionary directory. If empty, SubMiner searches installed/default frequency-dictionary locations. "topX": 1000, // Only color tokens with frequency rank <= topX (default: 1000). "mode": "single", // single: use one color for all matching tokens. banded: use color ramp by frequency band. Values: single | banded + "matchMode": "headword", // Frequency lookup text selection mode. Values: headword | surface "singleColor": "#f5a97f", // Color used when frequencyDictionary.mode is `single`. "bandedColors": [ "#ed8796", diff --git a/docs/configuration.md b/docs/configuration.md index 09500d4..eb3c10e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -601,6 +601,8 @@ See `config.example.jsonc` for detailed configuration options and more examples. **Supported commands:** Any valid mpv JSON IPC command array (`["cycle", "pause"]`, `["seek", 5]`, `["script-binding", "..."]`, etc.) +For subtitle-position and subtitle-track proxy commands (`sub-pos`, `sid`, `secondary-sid`), SubMiner also shows an mpv OSD notification after the command runs. + **See `config.example.jsonc`** for more keybinding examples and configuration options. ### Runtime Option Palette @@ -760,6 +762,7 @@ See `config.example.jsonc` for detailed configuration options. | `frequencyDictionary.sourcePath` | string | Path to a local frequency dictionary root. Leave empty or omit to use installed/default frequency-dictionary search paths. | | `frequencyDictionary.topX` | number | Only color tokens whose frequency rank is `<= topX` (`1000` by default) | | `frequencyDictionary.mode` | string | `"single"` or `"banded"` (`"single"` by default) | +| `frequencyDictionary.matchMode` | string | `"headword"` or `"surface"` (`"headword"` by default) | | `frequencyDictionary.singleColor` | string | Color used for all highlighted tokens in single mode | | `frequencyDictionary.bandedColors` | string[] | Array of five hex colors used for ranked bands in banded mode | | `nPlusOneColor` | string | Existing n+1 highlight color (default: `#c6a0f6`) | @@ -776,6 +779,7 @@ Lookup behavior: - Set `frequencyDictionary.sourcePath` to a directory containing `term_meta_bank_*.json` for a fully custom source. - If `sourcePath` is missing or empty, SubMiner searches default install/runtime locations for `frequency-dictionary` directories (for example app resources, user data paths, and current working directory). - In both cases, only terms with a valid `frequencyRank` are used; everything else falls back to no highlighting. +- `frequencyDictionary.matchMode` controls which token text is used for frequency lookups: `headword` (dictionary form) or `surface` (visible subtitle text). In `single` mode all highlights use `singleColor`; in `banded` mode tokens map to five ascending color bands from most common to least common inside the topX window. diff --git a/docs/public/config.example.jsonc b/docs/public/config.example.jsonc index b1d60f9..42475bc 100644 --- a/docs/public/config.example.jsonc +++ b/docs/public/config.example.jsonc @@ -136,6 +136,7 @@ "sourcePath": "", // Optional absolute path to a frequency dictionary directory. If empty, SubMiner searches installed/default frequency-dictionary locations. "topX": 1000, // Only color tokens with frequency rank <= topX (default: 1000). "mode": "single", // single: use one color for all matching tokens. banded: use color ramp by frequency band. Values: single | banded + "matchMode": "headword", // Frequency lookup text selection mode. Values: headword | surface "singleColor": "#f5a97f", // Color used when frequencyDictionary.mode is `single`. "bandedColors": [ "#ed8796", diff --git a/src/config/definitions/defaults-subtitle.ts b/src/config/definitions/defaults-subtitle.ts index 1f959ae..89220da 100644 --- a/src/config/definitions/defaults-subtitle.ts +++ b/src/config/definitions/defaults-subtitle.ts @@ -33,6 +33,7 @@ export const SUBTITLE_DEFAULT_CONFIG: Pick = { sourcePath: '', topX: 1000, mode: 'single', + matchMode: 'headword', singleColor: '#f5a97f', bandedColors: ['#ed8796', '#f5a97f', '#f9e2af', '#a6e3a1', '#8aadf4'], }, diff --git a/src/config/definitions/options-subtitle.ts b/src/config/definitions/options-subtitle.ts index 04a3362..1e428d1 100644 --- a/src/config/definitions/options-subtitle.ts +++ b/src/config/definitions/options-subtitle.ts @@ -61,6 +61,14 @@ export function buildSubtitleConfigOptionRegistry( description: 'single: use one color for all matching tokens. banded: use color ramp by frequency band.', }, + { + path: 'subtitleStyle.frequencyDictionary.matchMode', + kind: 'enum', + enumValues: ['headword', 'surface'], + defaultValue: defaultConfig.subtitleStyle.frequencyDictionary.matchMode, + description: + 'headword: frequency lookup uses dictionary form. surface: lookup uses subtitle-visible token text.', + }, { path: 'subtitleStyle.frequencyDictionary.singleColor', kind: 'string', diff --git a/src/config/resolve/subtitle-domains.ts b/src/config/resolve/subtitle-domains.ts index fae006c..39e7349 100644 --- a/src/config/resolve/subtitle-domains.ts +++ b/src/config/resolve/subtitle-domains.ts @@ -102,9 +102,18 @@ export function applySubtitleDomainConfig(context: ResolveContext): void { const fallbackSubtitleStyleHoverTokenColor = resolved.subtitleStyle.hoverTokenColor; const fallbackSubtitleStyleHoverTokenBackgroundColor = resolved.subtitleStyle.hoverTokenBackgroundColor; + const fallbackFrequencyDictionary = { + ...resolved.subtitleStyle.frequencyDictionary, + }; resolved.subtitleStyle = { ...resolved.subtitleStyle, ...(src.subtitleStyle as ResolvedConfig['subtitleStyle']), + frequencyDictionary: { + ...resolved.subtitleStyle.frequencyDictionary, + ...(isObject((src.subtitleStyle as { frequencyDictionary?: unknown }).frequencyDictionary) + ? ((src.subtitleStyle as { frequencyDictionary?: unknown }).frequencyDictionary as ResolvedConfig['subtitleStyle']['frequencyDictionary']) + : {}), + }, secondary: { ...resolved.subtitleStyle.secondary, ...(isObject(src.subtitleStyle.secondary) @@ -186,6 +195,7 @@ export function applySubtitleDomainConfig(context: ResolveContext): void { if (frequencyEnabled !== undefined) { resolved.subtitleStyle.frequencyDictionary.enabled = frequencyEnabled; } else if ((frequencyDictionary as { enabled?: unknown }).enabled !== undefined) { + resolved.subtitleStyle.frequencyDictionary.enabled = fallbackFrequencyDictionary.enabled; warn( 'subtitleStyle.frequencyDictionary.enabled', (frequencyDictionary as { enabled?: unknown }).enabled, @@ -198,6 +208,7 @@ export function applySubtitleDomainConfig(context: ResolveContext): void { if (sourcePath !== undefined) { resolved.subtitleStyle.frequencyDictionary.sourcePath = sourcePath; } else if ((frequencyDictionary as { sourcePath?: unknown }).sourcePath !== undefined) { + resolved.subtitleStyle.frequencyDictionary.sourcePath = fallbackFrequencyDictionary.sourcePath; warn( 'subtitleStyle.frequencyDictionary.sourcePath', (frequencyDictionary as { sourcePath?: unknown }).sourcePath, @@ -210,6 +221,7 @@ export function applySubtitleDomainConfig(context: ResolveContext): void { if (topX !== undefined && Number.isInteger(topX) && topX > 0) { resolved.subtitleStyle.frequencyDictionary.topX = Math.floor(topX); } else if ((frequencyDictionary as { topX?: unknown }).topX !== undefined) { + resolved.subtitleStyle.frequencyDictionary.topX = fallbackFrequencyDictionary.topX; warn( 'subtitleStyle.frequencyDictionary.topX', (frequencyDictionary as { topX?: unknown }).topX, @@ -222,6 +234,7 @@ export function applySubtitleDomainConfig(context: ResolveContext): void { if (frequencyMode === 'single' || frequencyMode === 'banded') { resolved.subtitleStyle.frequencyDictionary.mode = frequencyMode; } else if (frequencyMode !== undefined) { + resolved.subtitleStyle.frequencyDictionary.mode = fallbackFrequencyDictionary.mode; warn( 'subtitleStyle.frequencyDictionary.mode', frequencyDictionary.mode, @@ -230,10 +243,24 @@ export function applySubtitleDomainConfig(context: ResolveContext): void { ); } + const frequencyMatchMode = (frequencyDictionary as { matchMode?: unknown }).matchMode; + if (frequencyMatchMode === 'headword' || frequencyMatchMode === 'surface') { + resolved.subtitleStyle.frequencyDictionary.matchMode = frequencyMatchMode; + } else if (frequencyMatchMode !== undefined) { + resolved.subtitleStyle.frequencyDictionary.matchMode = fallbackFrequencyDictionary.matchMode; + warn( + 'subtitleStyle.frequencyDictionary.matchMode', + frequencyMatchMode, + resolved.subtitleStyle.frequencyDictionary.matchMode, + "Expected 'headword' or 'surface'.", + ); + } + const singleColor = asColor((frequencyDictionary as { singleColor?: unknown }).singleColor); if (singleColor !== undefined) { resolved.subtitleStyle.frequencyDictionary.singleColor = singleColor; } else if ((frequencyDictionary as { singleColor?: unknown }).singleColor !== undefined) { + resolved.subtitleStyle.frequencyDictionary.singleColor = fallbackFrequencyDictionary.singleColor; warn( 'subtitleStyle.frequencyDictionary.singleColor', (frequencyDictionary as { singleColor?: unknown }).singleColor, @@ -248,6 +275,8 @@ export function applySubtitleDomainConfig(context: ResolveContext): void { if (bandedColors !== undefined) { resolved.subtitleStyle.frequencyDictionary.bandedColors = bandedColors; } else if ((frequencyDictionary as { bandedColors?: unknown }).bandedColors !== undefined) { + resolved.subtitleStyle.frequencyDictionary.bandedColors = + fallbackFrequencyDictionary.bandedColors; warn( 'subtitleStyle.frequencyDictionary.bandedColors', (frequencyDictionary as { bandedColors?: unknown }).bandedColors, diff --git a/src/config/resolve/subtitle-style.test.ts b/src/config/resolve/subtitle-style.test.ts index 43c7a3d..c077cfc 100644 --- a/src/config/resolve/subtitle-style.test.ts +++ b/src/config/resolve/subtitle-style.test.ts @@ -27,3 +27,32 @@ test('subtitleStyle preserveLineBreaks falls back while merge is preserved', () ), ); }); + +test('subtitleStyle frequencyDictionary.matchMode accepts valid values and warns on invalid', () => { + const valid = createResolveContext({ + subtitleStyle: { + frequencyDictionary: { + matchMode: 'surface', + }, + }, + }); + applySubtitleDomainConfig(valid.context); + assert.equal(valid.context.resolved.subtitleStyle.frequencyDictionary.matchMode, 'surface'); + + const invalid = createResolveContext({ + subtitleStyle: { + frequencyDictionary: { + matchMode: 'reading' as unknown as 'headword' | 'surface', + }, + }, + }); + applySubtitleDomainConfig(invalid.context); + assert.equal(invalid.context.resolved.subtitleStyle.frequencyDictionary.matchMode, 'headword'); + assert.ok( + invalid.warnings.some( + (warning) => + warning.path === 'subtitleStyle.frequencyDictionary.matchMode' && + warning.message === "Expected 'headword' or 'surface'.", + ), + ); +}); diff --git a/src/core/services/frequency-dictionary.test.ts b/src/core/services/frequency-dictionary.test.ts index 4c8f82d..502e061 100644 --- a/src/core/services/frequency-dictionary.test.ts +++ b/src/core/services/frequency-dictionary.test.ts @@ -80,7 +80,7 @@ test('createFrequencyDictionaryLookup aggregates duplicate-term logs into a sing ); }); -test('createFrequencyDictionaryLookup prefers frequency.value over displayValue', async () => { +test('createFrequencyDictionaryLookup prefers frequency.displayValue over value when both exist', async () => { const logs: string[] = []; const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-')); const bankPath = path.join(tempDir, 'term_meta_bank_1.json'); @@ -88,6 +88,7 @@ test('createFrequencyDictionaryLookup prefers frequency.value over displayValue' bankPath, JSON.stringify([ ['猫', 1, { frequency: { value: 1234, displayValue: 1200 } }], + ['鍛える', 2, { frequency: { value: 46961, displayValue: 2847 } }], ['犬', 2, { frequency: { displayValue: 88 } }], ]), ); @@ -99,10 +100,31 @@ test('createFrequencyDictionaryLookup prefers frequency.value over displayValue' }, }); - assert.equal(lookup('猫'), 1234); + assert.equal(lookup('猫'), 1200); + assert.equal(lookup('鍛える'), 2847); assert.equal(lookup('犬'), 88); assert.equal( logs.some((entry) => entry.includes('Frequency dictionary loaded from')), true, ); }); + +test('createFrequencyDictionaryLookup parses composite displayValue by primary rank', async () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-frequency-dict-')); + const bankPath = path.join(tempDir, 'term_meta_bank_1.json'); + fs.writeFileSync( + bankPath, + JSON.stringify([ + ['鍛える', 1, { frequency: { displayValue: '3272,52377' } }], + ['高み', 2, { frequency: { displayValue: '9933,108961' } }], + ]), + ); + + const lookup = await createFrequencyDictionaryLookup({ + searchPaths: [tempDir], + log: () => undefined, + }); + + assert.equal(lookup('鍛える'), 3272); + assert.equal(lookup('高み'), 9933); +}); diff --git a/src/core/services/frequency-dictionary.ts b/src/core/services/frequency-dictionary.ts index efddf4a..5f9a9a2 100644 --- a/src/core/services/frequency-dictionary.ts +++ b/src/core/services/frequency-dictionary.ts @@ -18,6 +18,32 @@ function normalizeFrequencyTerm(value: string): string { return value.trim().toLowerCase(); } +function parsePositiveFrequencyString(value: string): number | null { + const trimmed = value.trim(); + if (!trimmed) { + return null; + } + + const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0]; + if (!numericPrefix) { + return null; + } + + const chunks = numericPrefix.split(','); + const normalizedNumber = + chunks.length <= 1 + ? chunks[0] ?? '' + : chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk)) + ? chunks.join('') + : (chunks[0] ?? ''); + const parsed = Number.parseInt(normalizedNumber, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + return null; + } + + return parsed; +} + function parsePositiveFrequencyNumber(value: unknown): number | null { if (typeof value === 'number') { if (!Number.isFinite(value) || value <= 0) return null; @@ -25,10 +51,7 @@ function parsePositiveFrequencyNumber(value: unknown): number | null { } if (typeof value === 'string') { - const normalized = value.trim().replace(/,/g, ''); - const parsed = Number.parseInt(normalized, 10); - if (!Number.isFinite(parsed) || parsed <= 0) return null; - return parsed; + return parsePositiveFrequencyString(value); } return null; @@ -38,14 +61,14 @@ function extractFrequencyDisplayValue(meta: unknown): number | null { if (!meta || typeof meta !== 'object') return null; const frequency = (meta as { frequency?: unknown }).frequency; if (!frequency || typeof frequency !== 'object') return null; - const rawValue = (frequency as { value?: unknown }).value; - const parsedValue = parsePositiveFrequencyNumber(rawValue); - if (parsedValue !== null) { - return parsedValue; + const displayValue = (frequency as { displayValue?: unknown }).displayValue; + const parsedDisplayValue = parsePositiveFrequencyNumber(displayValue); + if (parsedDisplayValue !== null) { + return parsedDisplayValue; } - const displayValue = (frequency as { displayValue?: unknown }).displayValue; - return parsePositiveFrequencyNumber(displayValue); + const rawValue = (frequency as { value?: unknown }).value; + return parsePositiveFrequencyNumber(rawValue); } function asFrequencyDictionaryEntry(entry: unknown): FrequencyDictionaryEntry | null { diff --git a/src/core/services/tokenizer.test.ts b/src/core/services/tokenizer.test.ts index 3cca382..c3a0989 100644 --- a/src/core/services/tokenizer.test.ts +++ b/src/core/services/tokenizer.test.ts @@ -218,6 +218,119 @@ test('tokenizeSubtitle loads frequency ranks from Yomitan installed dictionaries assert.equal(result.tokens?.[0]?.frequencyRank, 77); }); +test('tokenizeSubtitle queries headword frequencies without forcing surface reading', async () => { + const result = await tokenizeSubtitle( + '鍛えた', + makeDeps({ + getFrequencyDictionaryEnabled: () => true, + getYomitanExt: () => ({ id: 'dummy-ext' }) as any, + getYomitanParserWindow: () => + ({ + isDestroyed: () => false, + webContents: { + executeJavaScript: async (script: string) => { + if (script.includes('getTermFrequencies')) { + if (!script.includes('"term":"鍛える","reading":null')) { + return []; + } + return [ + { + term: '鍛える', + reading: 'きたえる', + dictionary: 'freq-dict', + frequency: 46961, + displayValue: '2847,46961', + displayValueParsed: true, + }, + ]; + } + + return [ + { + source: 'scanning-parser', + index: 0, + content: [ + [ + { + text: '鍛えた', + reading: 'きた', + headwords: [[{ term: '鍛える' }]], + }, + ], + ], + }, + ]; + }, + }, + }) as unknown as Electron.BrowserWindow, + }), + ); + + assert.equal(result.tokens?.length, 1); + assert.equal(result.tokens?.[0]?.headword, '鍛える'); + assert.equal(result.tokens?.[0]?.reading, 'きた'); + assert.equal(result.tokens?.[0]?.frequencyRank, 2847); +}); + +test('tokenizeSubtitle prefers Yomitan frequency from highest-priority dictionary', async () => { + const result = await tokenizeSubtitle( + '猫', + makeDeps({ + getFrequencyDictionaryEnabled: () => true, + getYomitanExt: () => ({ id: 'dummy-ext' }) as any, + getYomitanParserWindow: () => + ({ + isDestroyed: () => false, + webContents: { + executeJavaScript: async (script: string) => { + if (script.includes('getTermFrequencies')) { + return [ + { + term: '猫', + reading: 'ねこ', + dictionary: 'low-priority', + dictionaryPriority: 2, + frequency: 5, + displayValue: '5', + displayValueParsed: true, + }, + { + term: '猫', + reading: 'ねこ', + dictionary: 'high-priority', + dictionaryPriority: 0, + frequency: 100, + displayValue: '100', + displayValueParsed: true, + }, + ]; + } + + return [ + { + source: 'scanning-parser', + index: 0, + content: [ + [ + { + text: '猫', + reading: 'ねこ', + headwords: [[{ term: '猫' }]], + }, + ], + ], + }, + ]; + }, + }, + }) as unknown as Electron.BrowserWindow, + }), + ); + + assert.equal(result.tokens?.length, 1); + assert.equal(result.tokens?.[0]?.frequencyRank, 100); +}); + test('tokenizeSubtitle uses only selected Yomitan headword for frequency lookup', async () => { const result = await tokenizeSubtitle( '猫です', @@ -1693,6 +1806,20 @@ test('tokenizeSubtitle checks known words by surface when configured', async () assert.equal(result.tokens?.[0]?.isKnown, true); }); +test('tokenizeSubtitle uses frequency surface match mode when configured', async () => { + const result = await tokenizeSubtitle( + '鍛えた', + makeDepsFromYomitanTokens([{ surface: '鍛えた', reading: 'きたえた', headword: '鍛える' }], { + getFrequencyDictionaryEnabled: () => true, + getFrequencyDictionaryMatchMode: () => 'surface', + getFrequencyRank: (text) => (text === '鍛えた' ? 2847 : null), + }), + ); + + assert.equal(result.text, '鍛えた'); + assert.equal(result.tokens?.[0]?.frequencyRank, 2847); +}); + test('createTokenizerDepsRuntime checks MeCab availability before first tokenizeWithMecab call', async () => { let available = false; let checkCalls = 0; diff --git a/src/core/services/tokenizer.ts b/src/core/services/tokenizer.ts index 53a14f6..764cbaa 100644 --- a/src/core/services/tokenizer.ts +++ b/src/core/services/tokenizer.ts @@ -2,6 +2,7 @@ import type { BrowserWindow, Extension } from 'electron'; import { mergeTokens } from '../../token-merger'; import { createLogger } from '../../logger'; import { + FrequencyDictionaryMatchMode, MergedToken, NPlusOneMatchMode, SubtitleData, @@ -36,6 +37,7 @@ export interface TokenizerServiceDeps { getNPlusOneEnabled?: () => boolean; getJlptEnabled?: () => boolean; getFrequencyDictionaryEnabled?: () => boolean; + getFrequencyDictionaryMatchMode?: () => FrequencyDictionaryMatchMode; getFrequencyRank?: FrequencyDictionaryLookup; getMinSentenceWordsForNPlusOne?: () => number; getYomitanGroupDebugEnabled?: () => boolean; @@ -63,6 +65,7 @@ export interface TokenizerDepsRuntimeOptions { getNPlusOneEnabled?: () => boolean; getJlptEnabled?: () => boolean; getFrequencyDictionaryEnabled?: () => boolean; + getFrequencyDictionaryMatchMode?: () => FrequencyDictionaryMatchMode; getFrequencyRank?: FrequencyDictionaryLookup; getMinSentenceWordsForNPlusOne?: () => number; getYomitanGroupDebugEnabled?: () => boolean; @@ -73,6 +76,7 @@ interface TokenizerAnnotationOptions { nPlusOneEnabled: boolean; jlptEnabled: boolean; frequencyEnabled: boolean; + frequencyMatchMode: FrequencyDictionaryMatchMode; minSentenceWordsForNPlusOne: number | undefined; } @@ -139,7 +143,6 @@ async function applyAnnotationStage( isKnownWord: getKnownWordLookup(deps, options), knownWordMatchMode: deps.getKnownWordMatchMode(), getJlptLevel: deps.getJlptLevel, - getFrequencyRank: deps.getFrequencyRank, }, options, ); @@ -164,6 +167,8 @@ export function createTokenizerDepsRuntime( getNPlusOneEnabled: options.getNPlusOneEnabled, getJlptEnabled: options.getJlptEnabled, getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled, + getFrequencyDictionaryMatchMode: + options.getFrequencyDictionaryMatchMode ?? (() => 'headword'), getFrequencyRank: options.getFrequencyRank, getMinSentenceWordsForNPlusOne: options.getMinSentenceWordsForNPlusOne ?? (() => 3), getYomitanGroupDebugEnabled: options.getYomitanGroupDebugEnabled ?? (() => false), @@ -224,7 +229,24 @@ function normalizePositiveFrequencyRank(value: unknown): number | null { return Math.max(1, Math.floor(value)); } -function resolveFrequencyLookupText(token: MergedToken): string { +function normalizeFrequencyLookupText(rawText: string): string { + return rawText.trim().toLowerCase(); +} + +function resolveFrequencyLookupText( + token: MergedToken, + matchMode: FrequencyDictionaryMatchMode, +): string { + if (matchMode === 'surface') { + if (token.surface && token.surface.length > 0) { + return token.surface; + } + if (token.headword && token.headword.length > 0) { + return token.headword; + } + return token.reading; + } + if (token.headword && token.headword.length > 0) { return token.headword; } @@ -234,43 +256,128 @@ function resolveFrequencyLookupText(token: MergedToken): string { return token.surface; } -function applyYomitanFrequencyRanks( +function buildYomitanFrequencyTermReadingList( tokens: MergedToken[], - frequencies: ReadonlyArray<{ term: string; frequency: number }>, -): MergedToken[] { - if (tokens.length === 0 || frequencies.length === 0) { - return tokens; - } + matchMode: FrequencyDictionaryMatchMode, +): Array<{ term: string; reading: string | null }> { + return tokens + .map((token) => { + const term = resolveFrequencyLookupText(token, matchMode).trim(); + if (!term) { + return null; + } + const readingRaw = + token.reading && token.reading.trim().length > 0 ? token.reading.trim() : null; + const reading = matchMode === 'headword' ? null : readingRaw; + return { term, reading }; + }) + .filter((pair): pair is { term: string; reading: string | null } => pair !== null); +} - const rankByTerm = new Map(); +function buildYomitanFrequencyRankMap( + frequencies: ReadonlyArray<{ term: string; frequency: number; dictionaryPriority?: number }>, +): Map { + const rankByTerm = new Map(); for (const frequency of frequencies) { const normalizedTerm = frequency.term.trim(); const rank = normalizePositiveFrequencyRank(frequency.frequency); if (!normalizedTerm || rank === null) { continue; } + const dictionaryPriority = + typeof frequency.dictionaryPriority === 'number' && Number.isFinite(frequency.dictionaryPriority) + ? Math.max(0, Math.floor(frequency.dictionaryPriority)) + : Number.MAX_SAFE_INTEGER; const current = rankByTerm.get(normalizedTerm); - if (current === undefined || rank < current) { - rankByTerm.set(normalizedTerm, rank); + if ( + current === undefined || + dictionaryPriority < current.dictionaryPriority || + (dictionaryPriority === current.dictionaryPriority && rank < current.rank) + ) { + rankByTerm.set(normalizedTerm, { rank, dictionaryPriority }); } } - if (rankByTerm.size === 0) { + const collapsedRankByTerm = new Map(); + for (const [term, entry] of rankByTerm.entries()) { + collapsedRankByTerm.set(term, entry.rank); + } + + return collapsedRankByTerm; +} + +function getLocalFrequencyRank( + lookupText: string, + getFrequencyRank: FrequencyDictionaryLookup, + cache: Map, +): number | null { + const normalizedText = normalizeFrequencyLookupText(lookupText); + if (!normalizedText) { + return null; + } + + if (cache.has(normalizedText)) { + return cache.get(normalizedText) ?? null; + } + + let rank: number | null; + try { + rank = getFrequencyRank(normalizedText); + } catch { + rank = null; + } + rank = normalizePositiveFrequencyRank(rank); + cache.set(normalizedText, rank); + return rank; +} + +function applyFrequencyRanks( + tokens: MergedToken[], + matchMode: FrequencyDictionaryMatchMode, + yomitanRankByTerm: Map, + getFrequencyRank: FrequencyDictionaryLookup | undefined, +): MergedToken[] { + if (tokens.length === 0) { return tokens; } + const localLookupCache = new Map(); return tokens.map((token) => { - const lookupText = resolveFrequencyLookupText(token).trim(); + const existingRank = normalizePositiveFrequencyRank(token.frequencyRank); + if (existingRank !== null) { + return { + ...token, + frequencyRank: existingRank, + }; + } + + const lookupText = resolveFrequencyLookupText(token, matchMode).trim(); if (!lookupText) { - return token; + return { + ...token, + frequencyRank: undefined, + }; } - const rank = rankByTerm.get(lookupText); - if (rank === undefined) { - return token; + + const yomitanRank = yomitanRankByTerm.get(lookupText); + if (yomitanRank !== undefined) { + return { + ...token, + frequencyRank: yomitanRank, + }; } + + if (!getFrequencyRank) { + return { + ...token, + frequencyRank: undefined, + }; + } + + const localRank = getLocalFrequencyRank(lookupText, getFrequencyRank, localLookupCache); return { ...token, - frequencyRank: rank, + frequencyRank: localRank ?? undefined, }; }); } @@ -280,6 +387,7 @@ function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOp nPlusOneEnabled: deps.getNPlusOneEnabled?.() !== false, jlptEnabled: deps.getJlptEnabled?.() !== false, frequencyEnabled: deps.getFrequencyDictionaryEnabled?.() !== false, + frequencyMatchMode: deps.getFrequencyDictionaryMatchMode?.() ?? 'headword', minSentenceWordsForNPlusOne: deps.getMinSentenceWordsForNPlusOne?.(), }; } @@ -307,34 +415,44 @@ async function parseWithYomitanInternalParser( logSelectedYomitanGroups(text, selectedTokens); } - let tokensWithFrequency = selectedTokens; + let yomitanRankByTerm = new Map(); if (options.frequencyEnabled) { - const termReadingList = selectedTokens.map((token) => ({ - term: resolveFrequencyLookupText(token), - reading: token.reading && token.reading.trim().length > 0 ? token.reading.trim() : null, - })); - const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger); - tokensWithFrequency = applyYomitanFrequencyRanks(selectedTokens, yomitanFrequencies); - } - - if (!needsMecabPosEnrichment(options)) { - return tokensWithFrequency; - } - - try { - const mecabTokens = await deps.tokenizeWithMecab(text); - const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync; - return await enrichTokensWithMecab(tokensWithFrequency, mecabTokens); - } catch (err) { - const error = err as Error; - logger.warn( - 'Failed to enrich Yomitan tokens with MeCab POS:', - error.message, - `tokenCount=${selectedTokens.length}`, - `textLength=${text.length}`, + const frequencyMatchMode = options.frequencyMatchMode; + const termReadingList = buildYomitanFrequencyTermReadingList( + selectedTokens, + frequencyMatchMode, ); - return tokensWithFrequency; + const yomitanFrequencies = await requestYomitanTermFrequencies(termReadingList, deps, logger); + yomitanRankByTerm = buildYomitanFrequencyRankMap(yomitanFrequencies); } + + let enrichedTokens = selectedTokens; + if (needsMecabPosEnrichment(options)) { + try { + const mecabTokens = await deps.tokenizeWithMecab(text); + const enrichTokensWithMecab = deps.enrichTokensWithMecab ?? enrichTokensWithMecabAsync; + enrichedTokens = await enrichTokensWithMecab(enrichedTokens, mecabTokens); + } catch (err) { + const error = err as Error; + logger.warn( + 'Failed to enrich Yomitan tokens with MeCab POS:', + error.message, + `tokenCount=${selectedTokens.length}`, + `textLength=${text.length}`, + ); + } + } + + if (options.frequencyEnabled) { + return applyFrequencyRanks( + enrichedTokens, + options.frequencyMatchMode, + yomitanRankByTerm, + deps.getFrequencyRank, + ); + } + + return enrichedTokens; } export async function tokenizeSubtitle( diff --git a/src/core/services/tokenizer/annotation-stage.test.ts b/src/core/services/tokenizer/annotation-stage.test.ts index e9176e1..fd4541b 100644 --- a/src/core/services/tokenizer/annotation-stage.test.ts +++ b/src/core/services/tokenizer/annotation-stage.test.ts @@ -51,15 +51,15 @@ test('annotateTokens known-word match mode uses headword vs surface', () => { }); test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 exclusions', () => { - const lookupCalls: string[] = []; const tokens = [ - makeToken({ surface: 'は', headword: 'は', partOfSpeech: PartOfSpeech.particle }), + makeToken({ surface: 'は', headword: 'は', partOfSpeech: PartOfSpeech.particle, frequencyRank: 3 }), makeToken({ surface: 'です', headword: 'です', partOfSpeech: PartOfSpeech.bound_auxiliary, startPos: 1, endPos: 3, + frequencyRank: 4, }), makeToken({ surface: 'の', @@ -68,6 +68,7 @@ test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 ex pos1: '助詞', startPos: 3, endPos: 4, + frequencyRank: 5, }), makeToken({ surface: '猫', @@ -75,45 +76,36 @@ test('annotateTokens excludes frequency for particle/bound_auxiliary and pos1 ex partOfSpeech: PartOfSpeech.noun, startPos: 4, endPos: 5, + frequencyRank: 11, }), ]; - const result = annotateTokens( - tokens, - makeDeps({ - getFrequencyRank: (text) => { - lookupCalls.push(text); - return text === '猫' ? 11 : 999; - }, - }), - ); + const result = annotateTokens(tokens, makeDeps()); assert.equal(result[0]?.frequencyRank, undefined); assert.equal(result[1]?.frequencyRank, undefined); assert.equal(result[2]?.frequencyRank, undefined); assert.equal(result[3]?.frequencyRank, 11); - assert.deepEqual(lookupCalls, ['猫']); }); -test('annotateTokens preserves existing frequency rank when lookup is unavailable', () => { +test('annotateTokens preserves existing frequency rank when frequency is enabled', () => { const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: 42 })]; - const result = annotateTokens(tokens, makeDeps({ getFrequencyRank: undefined })); + const result = annotateTokens(tokens, makeDeps()); assert.equal(result[0]?.frequencyRank, 42); }); -test('annotateTokens prefers existing frequency rank over fallback lookup', () => { +test('annotateTokens drops invalid frequency rank values', () => { + const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: Number.NaN })]; + const result = annotateTokens(tokens, makeDeps()); + assert.equal(result[0]?.frequencyRank, undefined); +}); + +test('annotateTokens clears frequency rank when frequency is disabled', () => { const tokens = [makeToken({ surface: '猫', headword: '猫', frequencyRank: 42 })]; - - const result = annotateTokens( - tokens, - makeDeps({ - getFrequencyRank: () => 9, - }), - ); - - assert.equal(result[0]?.frequencyRank, 42); + const result = annotateTokens(tokens, makeDeps(), { frequencyEnabled: false }); + assert.equal(result[0]?.frequencyRank, undefined); }); test('annotateTokens handles JLPT disabled and eligibility exclusion paths', () => { diff --git a/src/core/services/tokenizer/annotation-stage.ts b/src/core/services/tokenizer/annotation-stage.ts index dde4e23..af409e4 100644 --- a/src/core/services/tokenizer/annotation-stage.ts +++ b/src/core/services/tokenizer/annotation-stage.ts @@ -1,6 +1,5 @@ import { markNPlusOneTargets } from '../../../token-merger'; import { - FrequencyDictionaryLookup, JlptLevel, MergedToken, NPlusOneMatchMode, @@ -12,22 +11,16 @@ const KATAKANA_TO_HIRAGANA_OFFSET = 0x60; const KATAKANA_CODEPOINT_START = 0x30a1; const KATAKANA_CODEPOINT_END = 0x30f6; const JLPT_LEVEL_LOOKUP_CACHE_LIMIT = 2048; -const FREQUENCY_RANK_LOOKUP_CACHE_LIMIT = 2048; const jlptLevelLookupCaches = new WeakMap< (text: string) => JlptLevel | null, Map >(); -const frequencyRankLookupCaches = new WeakMap< - FrequencyDictionaryLookup, - Map ->(); export interface AnnotationStageDeps { isKnownWord: (text: string) => boolean; knownWordMatchMode: NPlusOneMatchMode; getJlptLevel: (text: string) => JlptLevel | null; - getFrequencyRank?: FrequencyDictionaryLookup; } export interface AnnotationStageOptions { @@ -60,67 +53,6 @@ function applyKnownWordMarking( }); } -function normalizeFrequencyLookupText(rawText: string): string { - return rawText.trim().toLowerCase(); -} - -function getCachedFrequencyRank( - lookupText: string, - getFrequencyRank: FrequencyDictionaryLookup, -): number | null { - const normalizedText = normalizeFrequencyLookupText(lookupText); - if (!normalizedText) { - return null; - } - - let cache = frequencyRankLookupCaches.get(getFrequencyRank); - if (!cache) { - cache = new Map(); - frequencyRankLookupCaches.set(getFrequencyRank, cache); - } - - if (cache.has(normalizedText)) { - return cache.get(normalizedText) ?? null; - } - - let rank: number | null; - try { - rank = getFrequencyRank(normalizedText); - } catch { - rank = null; - } - if (rank !== null) { - if (!Number.isFinite(rank) || rank <= 0) { - rank = null; - } - } - - cache.set(normalizedText, rank); - while (cache.size > FREQUENCY_RANK_LOOKUP_CACHE_LIMIT) { - const firstKey = cache.keys().next().value; - if (firstKey !== undefined) { - cache.delete(firstKey); - } - } - - return rank; -} - -function resolveFrequencyLookupText(token: MergedToken): string { - if (token.headword && token.headword.length > 0) { - return token.headword; - } - if (token.reading && token.reading.length > 0) { - return token.reading; - } - return token.surface; -} - -function getFrequencyLookupTextCandidates(token: MergedToken): string[] { - const lookupText = resolveFrequencyLookupText(token).trim(); - return lookupText ? [lookupText] : []; -} - function isFrequencyExcludedByPos(token: MergedToken): boolean { if ( token.partOfSpeech === PartOfSpeech.particle || @@ -134,7 +66,6 @@ function isFrequencyExcludedByPos(token: MergedToken): boolean { function applyFrequencyMarking( tokens: MergedToken[], - getFrequencyRank: FrequencyDictionaryLookup, ): MergedToken[] { return tokens.map((token) => { if (isFrequencyExcludedByPos(token)) { @@ -146,25 +77,9 @@ function applyFrequencyMarking( return { ...token, frequencyRank: rank }; } - const lookupTexts = getFrequencyLookupTextCandidates(token); - if (lookupTexts.length === 0) { - return { ...token, frequencyRank: undefined }; - } - - let bestRank: number | null = null; - for (const lookupText of lookupTexts) { - const rank = getCachedFrequencyRank(lookupText, getFrequencyRank); - if (rank === null) { - continue; - } - if (bestRank === null || rank < bestRank) { - bestRank = rank; - } - } - return { ...token, - frequencyRank: bestRank ?? undefined, + frequencyRank: undefined, }; }); } @@ -357,16 +272,8 @@ export function annotateTokens( const frequencyEnabled = options.frequencyEnabled !== false; const frequencyMarkedTokens = - frequencyEnabled && deps.getFrequencyRank - ? applyFrequencyMarking(knownMarkedTokens, deps.getFrequencyRank) - : frequencyEnabled - ? knownMarkedTokens.map((token) => ({ - ...token, - frequencyRank: - typeof token.frequencyRank === 'number' && Number.isFinite(token.frequencyRank) - ? Math.max(1, Math.floor(token.frequencyRank)) - : undefined, - })) + frequencyEnabled + ? applyFrequencyMarking(knownMarkedTokens) : knownMarkedTokens.map((token) => ({ ...token, frequencyRank: undefined, diff --git a/src/core/services/tokenizer/yomitan-parser-runtime.test.ts b/src/core/services/tokenizer/yomitan-parser-runtime.test.ts index 4877578..978b76b 100644 --- a/src/core/services/tokenizer/yomitan-parser-runtime.test.ts +++ b/src/core/services/tokenizer/yomitan-parser-runtime.test.ts @@ -94,10 +94,20 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async term: '猫', reading: 'ねこ', dictionary: 'freq-dict', + dictionaryPriority: 0, frequency: 77, displayValue: '77', displayValueParsed: true, }, + { + term: '鍛える', + reading: 'きたえる', + dictionary: 'freq-dict', + dictionaryPriority: 1, + frequency: 46961, + displayValue: '2847,46961', + displayValueParsed: true, + }, { term: 'invalid', dictionary: 'freq-dict', @@ -110,9 +120,12 @@ test('requestYomitanTermFrequencies returns normalized frequency entries', async error: () => undefined, }); - assert.equal(result.length, 1); + assert.equal(result.length, 2); assert.equal(result[0]?.term, '猫'); assert.equal(result[0]?.frequency, 77); + assert.equal(result[0]?.dictionaryPriority, 0); + assert.equal(result[1]?.term, '鍛える'); + assert.equal(result[1]?.frequency, 2847); assert.match(scriptValue, /getTermFrequencies/); assert.match(scriptValue, /optionsGetFull/); }); diff --git a/src/core/services/tokenizer/yomitan-parser-runtime.ts b/src/core/services/tokenizer/yomitan-parser-runtime.ts index 3d96e94..7d552dc 100644 --- a/src/core/services/tokenizer/yomitan-parser-runtime.ts +++ b/src/core/services/tokenizer/yomitan-parser-runtime.ts @@ -19,6 +19,7 @@ export interface YomitanTermFrequency { term: string; reading: string | null; dictionary: string; + dictionaryPriority: number; frequency: number; displayValue: string | null; displayValueParsed: boolean; @@ -40,6 +41,32 @@ function asPositiveInteger(value: unknown): number | null { return Math.max(1, Math.floor(value)); } +function parsePositiveFrequencyString(value: string): number | null { + const trimmed = value.trim(); + if (!trimmed) { + return null; + } + + const numericPrefix = trimmed.match(/^\d[\d,]*/)?.[0]; + if (!numericPrefix) { + return null; + } + + const chunks = numericPrefix.split(','); + const normalizedNumber = + chunks.length <= 1 + ? chunks[0] ?? '' + : chunks.slice(1).every((chunk) => /^\d{3}$/.test(chunk)) + ? chunks.join('') + : (chunks[0] ?? ''); + const parsed = Number.parseInt(normalizedNumber, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + return null; + } + + return parsed; +} + function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null { if (!isObject(value)) { return null; @@ -47,10 +74,24 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null { const term = typeof value.term === 'string' ? value.term.trim() : ''; const dictionary = typeof value.dictionary === 'string' ? value.dictionary.trim() : ''; - const frequency = asPositiveInteger(value.frequency); + const rawFrequency = asPositiveInteger(value.frequency); + const displayValueRaw = + value.displayValue === null + ? null + : typeof value.displayValue === 'string' + ? value.displayValue + : null; + const parsedDisplayFrequency = + displayValueRaw !== null ? parsePositiveFrequencyString(displayValueRaw) : null; + const frequency = parsedDisplayFrequency ?? rawFrequency; if (!term || !dictionary || frequency === null) { return null; } + const dictionaryPriorityRaw = (value as { dictionaryPriority?: unknown }).dictionaryPriority; + const dictionaryPriority = + typeof dictionaryPriorityRaw === 'number' && Number.isFinite(dictionaryPriorityRaw) + ? Math.max(0, Math.floor(dictionaryPriorityRaw)) + : Number.MAX_SAFE_INTEGER; const reading = value.reading === null @@ -58,18 +99,14 @@ function toYomitanTermFrequency(value: unknown): YomitanTermFrequency | null { : typeof value.reading === 'string' ? value.reading : null; - const displayValue = - value.displayValue === null - ? null - : typeof value.displayValue === 'string' - ? value.displayValue - : null; + const displayValue = displayValueRaw; const displayValueParsed = value.displayValueParsed === true; return { term, reading, dictionary, + dictionaryPriority, frequency, displayValue, displayValueParsed, @@ -278,20 +315,43 @@ export async function requestYomitanTermFrequencies( const optionsFull = await invoke("optionsGetFull", undefined); const profileIndex = optionsFull.profileCurrent; const dictionariesRaw = optionsFull.profiles?.[profileIndex]?.options?.dictionaries ?? []; - const dictionaries = Array.isArray(dictionariesRaw) + const dictionaryEntries = Array.isArray(dictionariesRaw) ? dictionariesRaw .filter((entry) => entry && typeof entry === "object" && entry.enabled === true && typeof entry.name === "string") - .map((entry) => entry.name) + .map((entry, index) => ({ + name: entry.name, + id: typeof entry.id === "number" && Number.isFinite(entry.id) ? Math.floor(entry.id) : index + })) + .sort((a, b) => a.id - b.id) : []; + const dictionaries = dictionaryEntries.map((entry) => entry.name); + const dictionaryPriorityByName = dictionaryEntries.reduce((acc, entry, index) => { + acc[entry.name] = index; + return acc; + }, {}); if (dictionaries.length === 0) { return []; } - return await invoke("getTermFrequencies", { + const rawFrequencies = await invoke("getTermFrequencies", { termReadingList: ${JSON.stringify(normalizedTermReadingList)}, dictionaries }); + + if (!Array.isArray(rawFrequencies)) { + return []; + } + + return rawFrequencies + .filter((entry) => entry && typeof entry === "object") + .map((entry) => ({ + ...entry, + dictionaryPriority: + typeof entry.dictionary === "string" && dictionaryPriorityByName[entry.dictionary] !== undefined + ? dictionaryPriorityByName[entry.dictionary] + : Number.MAX_SAFE_INTEGER + })); })(); `; diff --git a/src/main.ts b/src/main.ts index 3671421..88419c0 100644 --- a/src/main.ts +++ b/src/main.ts @@ -2303,6 +2303,8 @@ const { getJlptEnabled: () => getResolvedConfig().subtitleStyle.enableJlpt, getFrequencyDictionaryEnabled: () => getResolvedConfig().subtitleStyle.frequencyDictionary.enabled, + getFrequencyDictionaryMatchMode: () => + getResolvedConfig().subtitleStyle.frequencyDictionary.matchMode, getFrequencyRank: (text) => appState.frequencyRankLookup(text), getYomitanGroupDebugEnabled: () => appState.overlayDebugVisualizationEnabled, getMecabTokenizer: () => appState.mecabTokenizer, diff --git a/src/main/runtime/composers/mpv-runtime-composer.test.ts b/src/main/runtime/composers/mpv-runtime-composer.test.ts index 22a9f47..10ab215 100644 --- a/src/main/runtime/composers/mpv-runtime-composer.test.ts +++ b/src/main/runtime/composers/mpv-runtime-composer.test.ts @@ -128,6 +128,7 @@ test('composeMpvRuntimeHandlers returns callable handlers and forwards to inject getJlptLevel: () => null, getJlptEnabled: () => true, getFrequencyDictionaryEnabled: () => true, + getFrequencyDictionaryMatchMode: () => 'headword', getFrequencyRank: () => null, getYomitanGroupDebugEnabled: () => false, getMecabTokenizer: () => null, diff --git a/src/main/runtime/subtitle-tokenization-main-deps.test.ts b/src/main/runtime/subtitle-tokenization-main-deps.test.ts index 0efe713..d951012 100644 --- a/src/main/runtime/subtitle-tokenization-main-deps.test.ts +++ b/src/main/runtime/subtitle-tokenization-main-deps.test.ts @@ -35,6 +35,7 @@ test('tokenizer deps builder records known-word lookups and maps readers', () => getJlptLevel: () => 'N2', getJlptEnabled: () => true, getFrequencyDictionaryEnabled: () => true, + getFrequencyDictionaryMatchMode: () => 'surface', getFrequencyRank: () => 5, getYomitanGroupDebugEnabled: () => false, getMecabTokenizer: () => null, @@ -47,6 +48,7 @@ test('tokenizer deps builder records known-word lookups and maps readers', () => deps.setYomitanParserInitPromise(null); assert.equal(deps.getNPlusOneEnabled?.(), true); assert.equal(deps.getMinSentenceWordsForNPlusOne?.(), 3); + assert.equal(deps.getFrequencyDictionaryMatchMode?.(), 'surface'); assert.deepEqual(calls, ['lookup:true', 'lookup:false', 'set-window', 'set-ready', 'set-init']); }); diff --git a/src/main/runtime/subtitle-tokenization-main-deps.ts b/src/main/runtime/subtitle-tokenization-main-deps.ts index 8ef9fa6..7511c38 100644 --- a/src/main/runtime/subtitle-tokenization-main-deps.ts +++ b/src/main/runtime/subtitle-tokenization-main-deps.ts @@ -5,6 +5,9 @@ type TokenizerMainDeps = TokenizerDepsRuntimeOptions & { getFrequencyDictionaryEnabled: NonNullable< TokenizerDepsRuntimeOptions['getFrequencyDictionaryEnabled'] >; + getFrequencyDictionaryMatchMode: NonNullable< + TokenizerDepsRuntimeOptions['getFrequencyDictionaryMatchMode'] + >; getFrequencyRank: NonNullable; getMinSentenceWordsForNPlusOne: NonNullable< TokenizerDepsRuntimeOptions['getMinSentenceWordsForNPlusOne'] @@ -41,6 +44,7 @@ export function createBuildTokenizerDepsMainHandler(deps: TokenizerMainDeps) { getJlptLevel: (text: string) => deps.getJlptLevel(text), getJlptEnabled: () => deps.getJlptEnabled(), getFrequencyDictionaryEnabled: () => deps.getFrequencyDictionaryEnabled(), + getFrequencyDictionaryMatchMode: () => deps.getFrequencyDictionaryMatchMode(), getFrequencyRank: (text: string) => deps.getFrequencyRank(text), getYomitanGroupDebugEnabled: () => deps.getYomitanGroupDebugEnabled(), getMecabTokenizer: () => deps.getMecabTokenizer(), diff --git a/src/renderer/subtitle-render.test.ts b/src/renderer/subtitle-render.test.ts index 8783987..a8f9009 100644 --- a/src/renderer/subtitle-render.test.ts +++ b/src/renderer/subtitle-render.test.ts @@ -79,7 +79,7 @@ test('computeWordClass preserves known and n+1 classes while adding JLPT classes assert.equal(computeWordClass(nPlusOneJlpt), 'word word-n-plus-one word-jlpt-n2'); }); -test('computeWordClass does not add frequency class to known or N+1 terms', () => { +test('computeWordClass keeps known/N+1 color classes exclusive over frequency classes', () => { const known = createToken({ isKnown: true, frequencyRank: 10, @@ -231,7 +231,7 @@ test('getFrequencyRankLabelForToken returns rank only for frequency-colored toke const outOfRangeToken = createToken({ surface: '圏外', frequencyRank: 1000 }); assert.equal(getFrequencyRankLabelForToken(frequencyToken, settings), '20'); - assert.equal(getFrequencyRankLabelForToken(knownToken, settings), null); + assert.equal(getFrequencyRankLabelForToken(knownToken, settings), '20'); assert.equal(getFrequencyRankLabelForToken(outOfRangeToken, settings), null); }); diff --git a/src/renderer/subtitle-render.ts b/src/renderer/subtitle-render.ts index 2025b21..2438e05 100644 --- a/src/renderer/subtitle-render.ts +++ b/src/renderer/subtitle-render.ts @@ -184,7 +184,7 @@ export function getFrequencyRankLabelForToken( token: MergedToken, frequencySettings?: Partial, ): string | null { - if (token.isKnown || token.isNPlusOneTarget) { + if (token.isNPlusOneTarget) { return null; } diff --git a/src/types.ts b/src/types.ts index 71350cb..4915f2c 100644 --- a/src/types.ts +++ b/src/types.ts @@ -177,6 +177,7 @@ export type RuntimeOptionValueType = 'boolean' | 'enum'; export type RuntimeOptionValue = boolean | string; export type NPlusOneMatchMode = 'headword' | 'surface'; +export type FrequencyDictionaryMatchMode = 'headword' | 'surface'; export interface RuntimeOptionState { id: RuntimeOptionId; @@ -312,6 +313,7 @@ export interface SubtitleStyleConfig { sourcePath?: string; topX?: number; mode?: FrequencyDictionaryMode; + matchMode?: FrequencyDictionaryMatchMode; singleColor?: string; bandedColors?: [string, string, string, string, string]; }; @@ -536,6 +538,7 @@ export interface ResolvedConfig { sourcePath: string; topX: number; mode: FrequencyDictionaryMode; + matchMode: FrequencyDictionaryMatchMode; singleColor: string; bandedColors: [string, string, string, string, string]; };