feat(subtitles): highlight character-name tokens

This commit is contained in:
2026-03-06 16:38:19 -08:00
parent c548044c61
commit 82bec02a36
26 changed files with 703 additions and 43 deletions

View File

@@ -42,6 +42,7 @@ function makeDeps(overrides: Partial<AppReadyRuntimeDeps> = {}) {
},
texthookerOnlyMode: false,
shouldAutoInitializeOverlayRuntimeFromConfig: () => true,
setVisibleOverlayVisible: (visible) => calls.push(`setVisibleOverlayVisible:${visible}`),
initializeOverlayRuntime: () => calls.push('initializeOverlayRuntime'),
handleInitialArgs: () => calls.push('handleInitialArgs'),
logDebug: (message) => calls.push(`debug:${message}`),
@@ -57,7 +58,11 @@ test('runAppReadyRuntime starts websocket in auto mode when plugin missing', asy
});
await runAppReadyRuntime(deps);
assert.ok(calls.includes('startSubtitleWebsocket:9001'));
assert.ok(calls.includes('setVisibleOverlayVisible:true'));
assert.ok(calls.includes('initializeOverlayRuntime'));
assert.ok(
calls.indexOf('setVisibleOverlayVisible:true') < calls.indexOf('initializeOverlayRuntime'),
);
assert.ok(calls.includes('startBackgroundWarmups'));
assert.ok(
calls.includes(

View File

@@ -116,6 +116,7 @@ export interface AppReadyRuntimeDeps {
startBackgroundWarmups: () => void;
texthookerOnlyMode: boolean;
shouldAutoInitializeOverlayRuntimeFromConfig: () => boolean;
setVisibleOverlayVisible: (visible: boolean) => void;
initializeOverlayRuntime: () => void;
handleInitialArgs: () => void;
logDebug?: (message: string) => void;
@@ -226,6 +227,7 @@ export async function runAppReadyRuntime(deps: AppReadyRuntimeDeps): Promise<voi
if (deps.texthookerOnlyMode) {
deps.log('Texthooker-only mode enabled; skipping overlay window.');
} else if (deps.shouldAutoInitializeOverlayRuntimeFromConfig()) {
deps.setVisibleOverlayVisible(true);
deps.initializeOverlayRuntime();
} else {
deps.log('Overlay runtime deferred: waiting for explicit overlay command.');

View File

@@ -24,6 +24,7 @@ interface YomitanTokenInput {
surface: string;
reading?: string;
headword?: string;
isNameMatch?: boolean;
}
function makeDepsFromYomitanTokens(
@@ -53,6 +54,7 @@ function makeDepsFromYomitanTokens(
headword: token.headword ?? token.surface,
startPos,
endPos,
isNameMatch: token.isNameMatch ?? false,
};
});
},
@@ -115,6 +117,20 @@ test('tokenizeSubtitle assigns JLPT level to parsed Yomitan tokens', async () =>
assert.equal(result.tokens?.[0]?.jlptLevel, 'N5');
});
test('tokenizeSubtitle preserves Yomitan name-match metadata on tokens', async () => {
const result = await tokenizeSubtitle(
'アクアです',
makeDepsFromYomitanTokens([
{ surface: 'アクア', reading: 'あくあ', headword: 'アクア', isNameMatch: true },
{ surface: 'です', reading: 'です', headword: 'です' },
]),
);
assert.equal(result.tokens?.length, 2);
assert.equal((result.tokens?.[0] as { isNameMatch?: boolean } | undefined)?.isNameMatch, true);
assert.equal((result.tokens?.[1] as { isNameMatch?: boolean } | undefined)?.isNameMatch, false);
});
test('tokenizeSubtitle caches JLPT lookups across repeated tokens', async () => {
let lookupCalls = 0;
const result = await tokenizeSubtitle(

View File

@@ -44,6 +44,7 @@ export interface TokenizerServiceDeps {
getJlptLevel: (text: string) => JlptLevel | null;
getNPlusOneEnabled?: () => boolean;
getJlptEnabled?: () => boolean;
getNameMatchEnabled?: () => boolean;
getFrequencyDictionaryEnabled?: () => boolean;
getFrequencyDictionaryMatchMode?: () => FrequencyDictionaryMatchMode;
getFrequencyRank?: FrequencyDictionaryLookup;
@@ -73,6 +74,7 @@ export interface TokenizerDepsRuntimeOptions {
getJlptLevel: (text: string) => JlptLevel | null;
getNPlusOneEnabled?: () => boolean;
getJlptEnabled?: () => boolean;
getNameMatchEnabled?: () => boolean;
getFrequencyDictionaryEnabled?: () => boolean;
getFrequencyDictionaryMatchMode?: () => FrequencyDictionaryMatchMode;
getFrequencyRank?: FrequencyDictionaryLookup;
@@ -85,6 +87,7 @@ export interface TokenizerDepsRuntimeOptions {
interface TokenizerAnnotationOptions {
nPlusOneEnabled: boolean;
jlptEnabled: boolean;
nameMatchEnabled: boolean;
frequencyEnabled: boolean;
frequencyMatchMode: FrequencyDictionaryMatchMode;
minSentenceWordsForNPlusOne: number | undefined;
@@ -190,6 +193,7 @@ export function createTokenizerDepsRuntime(
getJlptLevel: options.getJlptLevel,
getNPlusOneEnabled: options.getNPlusOneEnabled,
getJlptEnabled: options.getJlptEnabled,
getNameMatchEnabled: options.getNameMatchEnabled,
getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled,
getFrequencyDictionaryMatchMode: options.getFrequencyDictionaryMatchMode ?? (() => 'headword'),
getFrequencyRank: options.getFrequencyRank,
@@ -301,6 +305,7 @@ function normalizeSelectedYomitanTokens(tokens: MergedToken[]): MergedToken[] {
isMerged: token.isMerged ?? true,
isKnown: token.isKnown ?? false,
isNPlusOneTarget: token.isNPlusOneTarget ?? false,
isNameMatch: token.isNameMatch ?? false,
reading: normalizeYomitanMergedReading(token),
}));
}
@@ -460,6 +465,7 @@ function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOp
return {
nPlusOneEnabled: deps.getNPlusOneEnabled?.() !== false,
jlptEnabled: deps.getJlptEnabled?.() !== false,
nameMatchEnabled: deps.getNameMatchEnabled?.() !== false,
frequencyEnabled: deps.getFrequencyDictionaryEnabled?.() !== false,
frequencyMatchMode: deps.getFrequencyDictionaryMatchMode?.() ?? 'headword',
minSentenceWordsForNPlusOne: deps.getMinSentenceWordsForNPlusOne?.(),
@@ -473,7 +479,9 @@ async function parseWithYomitanInternalParser(
deps: TokenizerServiceDeps,
options: TokenizerAnnotationOptions,
): Promise<MergedToken[] | null> {
const selectedTokens = await requestYomitanScanTokens(text, deps, logger);
const selectedTokens = await requestYomitanScanTokens(text, deps, logger, {
includeNameMatchMetadata: options.nameMatchEnabled,
});
if (!selectedTokens || selectedTokens.length === 0) {
return null;
}
@@ -489,6 +497,7 @@ async function parseWithYomitanInternalParser(
isMerged: true,
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: token.isNameMatch ?? false,
}),
),
);

View File

@@ -3,6 +3,7 @@ import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import test from 'node:test';
import * as vm from 'node:vm';
import {
getYomitanDictionaryInfo,
importYomitanDictionaryFromZip,
@@ -39,6 +40,40 @@ function createDeps(
};
}
async function runInjectedYomitanScript(
script: string,
handler: (action: string, params: unknown) => unknown,
): Promise<unknown> {
return await vm.runInNewContext(script, {
chrome: {
runtime: {
lastError: null,
sendMessage: (
payload: { action?: string; params?: unknown },
callback: (response: { result?: unknown; error?: { message?: string } }) => void,
) => {
try {
callback({ result: handler(payload.action ?? '', payload.params) });
} catch (error) {
callback({ error: { message: (error as Error).message } });
}
},
},
},
Array,
Error,
JSON,
Map,
Math,
Number,
Object,
Promise,
RegExp,
Set,
String,
});
}
test('syncYomitanDefaultAnkiServer updates default profile server when script reports update', async () => {
let scriptValue = '';
const deps = createDeps(async (script) => {
@@ -450,6 +485,164 @@ test('requestYomitanScanTokens uses left-to-right termsFind scanning instead of
assert.match(scannerScript ?? '', /deinflect:\s*true/);
});
test('requestYomitanScanTokens marks tokens backed by SubMiner character dictionary entries', async () => {
const deps = createDeps(async (script) => {
if (script.includes('optionsGetFull')) {
return {
profileCurrent: 0,
profiles: [
{
options: {
scanning: { length: 40 },
},
},
],
};
}
return [
{
surface: 'アクア',
reading: 'あくあ',
headword: 'アクア',
startPos: 0,
endPos: 3,
isNameMatch: true,
},
{
surface: 'です',
reading: 'です',
headword: 'です',
startPos: 3,
endPos: 5,
isNameMatch: false,
},
];
});
const result = await requestYomitanScanTokens('アクアです', deps, {
error: () => undefined,
});
assert.equal(result?.length, 2);
assert.equal((result?.[0] as { isNameMatch?: boolean } | undefined)?.isNameMatch, true);
assert.equal((result?.[1] as { isNameMatch?: boolean } | undefined)?.isNameMatch, false);
});
test('requestYomitanScanTokens skips name-match work when disabled', async () => {
let scannerScript = '';
const deps = createDeps(async (script) => {
if (script.includes('termsFind')) {
scannerScript = script;
}
if (script.includes('optionsGetFull')) {
return {
profileCurrent: 0,
profiles: [
{
options: {
scanning: { length: 40 },
},
},
],
};
}
return [
{
surface: 'アクア',
reading: 'あくあ',
headword: 'アクア',
startPos: 0,
endPos: 3,
},
];
});
const result = await requestYomitanScanTokens(
'アクア',
deps,
{ error: () => undefined },
{ includeNameMatchMetadata: false },
);
assert.equal(result?.length, 1);
assert.equal((result?.[0] as { isNameMatch?: boolean } | undefined)?.isNameMatch, undefined);
assert.match(scannerScript, /const includeNameMatchMetadata = false;/);
});
test('requestYomitanScanTokens marks grouped entries when SubMiner dictionary alias only exists on definitions', async () => {
let scannerScript = '';
const deps = createDeps(async (script) => {
if (script.includes('termsFind')) {
scannerScript = script;
return [];
}
if (script.includes('optionsGetFull')) {
return {
profileCurrent: 0,
profiles: [
{
options: {
scanning: { length: 40 },
},
},
],
};
}
return null;
});
await requestYomitanScanTokens(
'カズマ',
deps,
{ error: () => undefined },
{ includeNameMatchMetadata: true },
);
assert.match(scannerScript, /getPreferredHeadword/);
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
if (action === 'termsFind') {
const text = (params as { text?: string } | undefined)?.text;
if (text === 'カズマ') {
return {
originalTextLength: 3,
dictionaryEntries: [
{
dictionaryAlias: '',
headwords: [
{
term: 'カズマ',
reading: 'かずま',
sources: [{ originalText: 'カズマ', isPrimary: true, matchType: 'exact' }],
},
],
definitions: [
{ dictionary: 'JMdict', dictionaryAlias: 'JMdict' },
{
dictionary: 'SubMiner Character Dictionary (AniList 130298)',
dictionaryAlias: 'SubMiner Character Dictionary (AniList 130298)',
},
],
},
],
};
}
return { originalTextLength: 0, dictionaryEntries: [] };
}
throw new Error(`unexpected action: ${action}`);
});
assert.equal(Array.isArray(result), true);
assert.equal((result as { length?: number } | null)?.length, 1);
assert.equal((result as Array<{ surface?: string }>)[0]?.surface, 'カズマ');
assert.equal((result as Array<{ headword?: string }>)[0]?.headword, 'カズマ');
assert.equal((result as Array<{ startPos?: number }>)[0]?.startPos, 0);
assert.equal((result as Array<{ endPos?: number }>)[0]?.endPos, 3);
assert.equal((result as Array<{ isNameMatch?: boolean }>)[0]?.isNameMatch, true);
});
test('getYomitanDictionaryInfo requests dictionary info via backend action', async () => {
let scriptValue = '';
const deps = createDeps(async (script) => {

View File

@@ -45,6 +45,7 @@ export interface YomitanScanToken {
headword: string;
startPos: number;
endPos: number;
isNameMatch?: boolean;
}
interface YomitanProfileMetadata {
@@ -75,7 +76,8 @@ function isScanTokenArray(value: unknown): value is YomitanScanToken[] {
typeof entry.reading === 'string' &&
typeof entry.headword === 'string' &&
typeof entry.startPos === 'number' &&
typeof entry.endPos === 'number',
typeof entry.endPos === 'number' &&
(entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean'),
)
);
}
@@ -772,24 +774,92 @@ const YOMITAN_SCANNING_HELPERS = String.raw`
return segments;
}
function getPreferredHeadword(dictionaryEntries, token) {
function appendDictionaryNames(target, value) {
if (!value || typeof value !== 'object') {
return;
}
const candidates = [
value.dictionary,
value.dictionaryName,
value.name,
value.title,
value.dictionaryTitle,
value.dictionaryAlias
];
for (const candidate of candidates) {
if (typeof candidate === 'string' && candidate.trim().length > 0) {
target.push(candidate.trim());
}
}
}
function getDictionaryEntryNames(entry) {
const names = [];
appendDictionaryNames(names, entry);
for (const definition of entry?.definitions || []) {
appendDictionaryNames(names, definition);
}
for (const frequency of entry?.frequencies || []) {
appendDictionaryNames(names, frequency);
}
for (const pronunciation of entry?.pronunciations || []) {
appendDictionaryNames(names, pronunciation);
}
return names;
}
function isNameDictionaryEntry(entry) {
if (!includeNameMatchMetadata || !entry || typeof entry !== 'object') {
return false;
}
return getDictionaryEntryNames(entry).some((name) => name.startsWith("SubMiner Character Dictionary"));
}
function hasExactPrimarySource(headword, token) {
for (const src of headword.sources || []) {
if (src.originalText !== token) { continue; }
if (!src.isPrimary) { continue; }
if (src.matchType !== 'exact') { continue; }
return true;
}
return false;
}
let matchedNameDictionary = false;
if (includeNameMatchMetadata) {
for (const dictionaryEntry of dictionaryEntries || []) {
if (!isNameDictionaryEntry(dictionaryEntry)) { continue; }
for (const headword of dictionaryEntry.headwords || []) {
if (!hasExactPrimarySource(headword, token)) { continue; }
matchedNameDictionary = true;
break;
}
if (matchedNameDictionary) { break; }
}
}
for (const dictionaryEntry of dictionaryEntries || []) {
for (const headword of dictionaryEntry.headwords || []) {
const validSources = [];
for (const src of headword.sources || []) {
if (src.originalText !== token) { continue; }
if (!src.isPrimary) { continue; }
if (src.matchType !== 'exact') { continue; }
validSources.push(src);
}
if (validSources.length > 0) { return {term: headword.term, reading: headword.reading}; }
if (!hasExactPrimarySource(headword, token)) { continue; }
return {
term: headword.term,
reading: headword.reading,
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(dictionaryEntry)
};
}
}
const fallback = dictionaryEntries?.[0]?.headwords?.[0];
return fallback ? {term: fallback.term, reading: fallback.reading} : null;
return fallback
? {
term: fallback.term,
reading: fallback.reading,
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(dictionaryEntries?.[0])
}
: null;
}
`;
function buildYomitanScanningScript(text: string, profileIndex: number, scanLength: number): string {
function buildYomitanScanningScript(
text: string,
profileIndex: number,
scanLength: number,
includeNameMatchMetadata: boolean,
): string {
return `
(async () => {
const invoke = (action, params) =>
@@ -811,6 +881,7 @@ function buildYomitanScanningScript(text: string, profileIndex: number, scanLeng
});
});
${YOMITAN_SCANNING_HELPERS}
const includeNameMatchMetadata = ${includeNameMatchMetadata ? 'true' : 'false'};
const text = ${JSON.stringify(text)};
const details = {matchType: "exact", deinflect: true};
const tokens = [];
@@ -834,6 +905,7 @@ ${YOMITAN_SCANNING_HELPERS}
headword: preferredHeadword.term,
startPos: i,
endPos: i + originalTextLength,
isNameMatch: includeNameMatchMetadata && preferredHeadword.isNameMatch === true,
});
i += originalTextLength;
continue;
@@ -944,6 +1016,9 @@ export async function requestYomitanScanTokens(
text: string,
deps: YomitanParserRuntimeDeps,
logger: LoggerLike,
options?: {
includeNameMatchMetadata?: boolean;
},
): Promise<YomitanScanToken[] | null> {
const yomitanExt = deps.getYomitanExt();
if (!text || !yomitanExt) {
@@ -962,7 +1037,12 @@ export async function requestYomitanScanTokens(
try {
const rawResult = await parserWindow.webContents.executeJavaScript(
buildYomitanScanningScript(text, profileIndex, scanLength),
buildYomitanScanningScript(
text,
profileIndex,
scanLength,
options?.includeNameMatchMetadata === true,
),
true,
);
if (isScanTokenArray(rawResult)) {