mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-04-12 04:19:25 -07:00
feat(subtitles): highlight character-name tokens
This commit is contained in:
@@ -42,6 +42,7 @@ function makeDeps(overrides: Partial<AppReadyRuntimeDeps> = {}) {
|
||||
},
|
||||
texthookerOnlyMode: false,
|
||||
shouldAutoInitializeOverlayRuntimeFromConfig: () => true,
|
||||
setVisibleOverlayVisible: (visible) => calls.push(`setVisibleOverlayVisible:${visible}`),
|
||||
initializeOverlayRuntime: () => calls.push('initializeOverlayRuntime'),
|
||||
handleInitialArgs: () => calls.push('handleInitialArgs'),
|
||||
logDebug: (message) => calls.push(`debug:${message}`),
|
||||
@@ -57,7 +58,11 @@ test('runAppReadyRuntime starts websocket in auto mode when plugin missing', asy
|
||||
});
|
||||
await runAppReadyRuntime(deps);
|
||||
assert.ok(calls.includes('startSubtitleWebsocket:9001'));
|
||||
assert.ok(calls.includes('setVisibleOverlayVisible:true'));
|
||||
assert.ok(calls.includes('initializeOverlayRuntime'));
|
||||
assert.ok(
|
||||
calls.indexOf('setVisibleOverlayVisible:true') < calls.indexOf('initializeOverlayRuntime'),
|
||||
);
|
||||
assert.ok(calls.includes('startBackgroundWarmups'));
|
||||
assert.ok(
|
||||
calls.includes(
|
||||
|
||||
@@ -116,6 +116,7 @@ export interface AppReadyRuntimeDeps {
|
||||
startBackgroundWarmups: () => void;
|
||||
texthookerOnlyMode: boolean;
|
||||
shouldAutoInitializeOverlayRuntimeFromConfig: () => boolean;
|
||||
setVisibleOverlayVisible: (visible: boolean) => void;
|
||||
initializeOverlayRuntime: () => void;
|
||||
handleInitialArgs: () => void;
|
||||
logDebug?: (message: string) => void;
|
||||
@@ -226,6 +227,7 @@ export async function runAppReadyRuntime(deps: AppReadyRuntimeDeps): Promise<voi
|
||||
if (deps.texthookerOnlyMode) {
|
||||
deps.log('Texthooker-only mode enabled; skipping overlay window.');
|
||||
} else if (deps.shouldAutoInitializeOverlayRuntimeFromConfig()) {
|
||||
deps.setVisibleOverlayVisible(true);
|
||||
deps.initializeOverlayRuntime();
|
||||
} else {
|
||||
deps.log('Overlay runtime deferred: waiting for explicit overlay command.');
|
||||
|
||||
@@ -24,6 +24,7 @@ interface YomitanTokenInput {
|
||||
surface: string;
|
||||
reading?: string;
|
||||
headword?: string;
|
||||
isNameMatch?: boolean;
|
||||
}
|
||||
|
||||
function makeDepsFromYomitanTokens(
|
||||
@@ -53,6 +54,7 @@ function makeDepsFromYomitanTokens(
|
||||
headword: token.headword ?? token.surface,
|
||||
startPos,
|
||||
endPos,
|
||||
isNameMatch: token.isNameMatch ?? false,
|
||||
};
|
||||
});
|
||||
},
|
||||
@@ -115,6 +117,20 @@ test('tokenizeSubtitle assigns JLPT level to parsed Yomitan tokens', async () =>
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, 'N5');
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle preserves Yomitan name-match metadata on tokens', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'アクアです',
|
||||
makeDepsFromYomitanTokens([
|
||||
{ surface: 'アクア', reading: 'あくあ', headword: 'アクア', isNameMatch: true },
|
||||
{ surface: 'です', reading: 'です', headword: 'です' },
|
||||
]),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 2);
|
||||
assert.equal((result.tokens?.[0] as { isNameMatch?: boolean } | undefined)?.isNameMatch, true);
|
||||
assert.equal((result.tokens?.[1] as { isNameMatch?: boolean } | undefined)?.isNameMatch, false);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle caches JLPT lookups across repeated tokens', async () => {
|
||||
let lookupCalls = 0;
|
||||
const result = await tokenizeSubtitle(
|
||||
|
||||
@@ -44,6 +44,7 @@ export interface TokenizerServiceDeps {
|
||||
getJlptLevel: (text: string) => JlptLevel | null;
|
||||
getNPlusOneEnabled?: () => boolean;
|
||||
getJlptEnabled?: () => boolean;
|
||||
getNameMatchEnabled?: () => boolean;
|
||||
getFrequencyDictionaryEnabled?: () => boolean;
|
||||
getFrequencyDictionaryMatchMode?: () => FrequencyDictionaryMatchMode;
|
||||
getFrequencyRank?: FrequencyDictionaryLookup;
|
||||
@@ -73,6 +74,7 @@ export interface TokenizerDepsRuntimeOptions {
|
||||
getJlptLevel: (text: string) => JlptLevel | null;
|
||||
getNPlusOneEnabled?: () => boolean;
|
||||
getJlptEnabled?: () => boolean;
|
||||
getNameMatchEnabled?: () => boolean;
|
||||
getFrequencyDictionaryEnabled?: () => boolean;
|
||||
getFrequencyDictionaryMatchMode?: () => FrequencyDictionaryMatchMode;
|
||||
getFrequencyRank?: FrequencyDictionaryLookup;
|
||||
@@ -85,6 +87,7 @@ export interface TokenizerDepsRuntimeOptions {
|
||||
interface TokenizerAnnotationOptions {
|
||||
nPlusOneEnabled: boolean;
|
||||
jlptEnabled: boolean;
|
||||
nameMatchEnabled: boolean;
|
||||
frequencyEnabled: boolean;
|
||||
frequencyMatchMode: FrequencyDictionaryMatchMode;
|
||||
minSentenceWordsForNPlusOne: number | undefined;
|
||||
@@ -190,6 +193,7 @@ export function createTokenizerDepsRuntime(
|
||||
getJlptLevel: options.getJlptLevel,
|
||||
getNPlusOneEnabled: options.getNPlusOneEnabled,
|
||||
getJlptEnabled: options.getJlptEnabled,
|
||||
getNameMatchEnabled: options.getNameMatchEnabled,
|
||||
getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled,
|
||||
getFrequencyDictionaryMatchMode: options.getFrequencyDictionaryMatchMode ?? (() => 'headword'),
|
||||
getFrequencyRank: options.getFrequencyRank,
|
||||
@@ -301,6 +305,7 @@ function normalizeSelectedYomitanTokens(tokens: MergedToken[]): MergedToken[] {
|
||||
isMerged: token.isMerged ?? true,
|
||||
isKnown: token.isKnown ?? false,
|
||||
isNPlusOneTarget: token.isNPlusOneTarget ?? false,
|
||||
isNameMatch: token.isNameMatch ?? false,
|
||||
reading: normalizeYomitanMergedReading(token),
|
||||
}));
|
||||
}
|
||||
@@ -460,6 +465,7 @@ function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOp
|
||||
return {
|
||||
nPlusOneEnabled: deps.getNPlusOneEnabled?.() !== false,
|
||||
jlptEnabled: deps.getJlptEnabled?.() !== false,
|
||||
nameMatchEnabled: deps.getNameMatchEnabled?.() !== false,
|
||||
frequencyEnabled: deps.getFrequencyDictionaryEnabled?.() !== false,
|
||||
frequencyMatchMode: deps.getFrequencyDictionaryMatchMode?.() ?? 'headword',
|
||||
minSentenceWordsForNPlusOne: deps.getMinSentenceWordsForNPlusOne?.(),
|
||||
@@ -473,7 +479,9 @@ async function parseWithYomitanInternalParser(
|
||||
deps: TokenizerServiceDeps,
|
||||
options: TokenizerAnnotationOptions,
|
||||
): Promise<MergedToken[] | null> {
|
||||
const selectedTokens = await requestYomitanScanTokens(text, deps, logger);
|
||||
const selectedTokens = await requestYomitanScanTokens(text, deps, logger, {
|
||||
includeNameMatchMetadata: options.nameMatchEnabled,
|
||||
});
|
||||
if (!selectedTokens || selectedTokens.length === 0) {
|
||||
return null;
|
||||
}
|
||||
@@ -489,6 +497,7 @@ async function parseWithYomitanInternalParser(
|
||||
isMerged: true,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
isNameMatch: token.isNameMatch ?? false,
|
||||
}),
|
||||
),
|
||||
);
|
||||
|
||||
@@ -3,6 +3,7 @@ import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import test from 'node:test';
|
||||
import * as vm from 'node:vm';
|
||||
import {
|
||||
getYomitanDictionaryInfo,
|
||||
importYomitanDictionaryFromZip,
|
||||
@@ -39,6 +40,40 @@ function createDeps(
|
||||
};
|
||||
}
|
||||
|
||||
async function runInjectedYomitanScript(
|
||||
script: string,
|
||||
handler: (action: string, params: unknown) => unknown,
|
||||
): Promise<unknown> {
|
||||
return await vm.runInNewContext(script, {
|
||||
chrome: {
|
||||
runtime: {
|
||||
lastError: null,
|
||||
sendMessage: (
|
||||
payload: { action?: string; params?: unknown },
|
||||
callback: (response: { result?: unknown; error?: { message?: string } }) => void,
|
||||
) => {
|
||||
try {
|
||||
callback({ result: handler(payload.action ?? '', payload.params) });
|
||||
} catch (error) {
|
||||
callback({ error: { message: (error as Error).message } });
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
Array,
|
||||
Error,
|
||||
JSON,
|
||||
Map,
|
||||
Math,
|
||||
Number,
|
||||
Object,
|
||||
Promise,
|
||||
RegExp,
|
||||
Set,
|
||||
String,
|
||||
});
|
||||
}
|
||||
|
||||
test('syncYomitanDefaultAnkiServer updates default profile server when script reports update', async () => {
|
||||
let scriptValue = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
@@ -450,6 +485,164 @@ test('requestYomitanScanTokens uses left-to-right termsFind scanning instead of
|
||||
assert.match(scannerScript ?? '', /deinflect:\s*true/);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens marks tokens backed by SubMiner character dictionary entries', async () => {
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
surface: 'アクア',
|
||||
reading: 'あくあ',
|
||||
headword: 'アクア',
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
isNameMatch: true,
|
||||
},
|
||||
{
|
||||
surface: 'です',
|
||||
reading: 'です',
|
||||
headword: 'です',
|
||||
startPos: 3,
|
||||
endPos: 5,
|
||||
isNameMatch: false,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
const result = await requestYomitanScanTokens('アクアです', deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(result?.length, 2);
|
||||
assert.equal((result?.[0] as { isNameMatch?: boolean } | undefined)?.isNameMatch, true);
|
||||
assert.equal((result?.[1] as { isNameMatch?: boolean } | undefined)?.isNameMatch, false);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens skips name-match work when disabled', async () => {
|
||||
let scannerScript = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('termsFind')) {
|
||||
scannerScript = script;
|
||||
}
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
surface: 'アクア',
|
||||
reading: 'あくあ',
|
||||
headword: 'アクア',
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
const result = await requestYomitanScanTokens(
|
||||
'アクア',
|
||||
deps,
|
||||
{ error: () => undefined },
|
||||
{ includeNameMatchMetadata: false },
|
||||
);
|
||||
|
||||
assert.equal(result?.length, 1);
|
||||
assert.equal((result?.[0] as { isNameMatch?: boolean } | undefined)?.isNameMatch, undefined);
|
||||
assert.match(scannerScript, /const includeNameMatchMetadata = false;/);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens marks grouped entries when SubMiner dictionary alias only exists on definitions', async () => {
|
||||
let scannerScript = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('termsFind')) {
|
||||
scannerScript = script;
|
||||
return [];
|
||||
}
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
await requestYomitanScanTokens(
|
||||
'カズマ',
|
||||
deps,
|
||||
{ error: () => undefined },
|
||||
{ includeNameMatchMetadata: true },
|
||||
);
|
||||
|
||||
assert.match(scannerScript, /getPreferredHeadword/);
|
||||
|
||||
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
|
||||
if (action === 'termsFind') {
|
||||
const text = (params as { text?: string } | undefined)?.text;
|
||||
if (text === 'カズマ') {
|
||||
return {
|
||||
originalTextLength: 3,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
dictionaryAlias: '',
|
||||
headwords: [
|
||||
{
|
||||
term: 'カズマ',
|
||||
reading: 'かずま',
|
||||
sources: [{ originalText: 'カズマ', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
definitions: [
|
||||
{ dictionary: 'JMdict', dictionaryAlias: 'JMdict' },
|
||||
{
|
||||
dictionary: 'SubMiner Character Dictionary (AniList 130298)',
|
||||
dictionaryAlias: 'SubMiner Character Dictionary (AniList 130298)',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return { originalTextLength: 0, dictionaryEntries: [] };
|
||||
}
|
||||
throw new Error(`unexpected action: ${action}`);
|
||||
});
|
||||
|
||||
assert.equal(Array.isArray(result), true);
|
||||
assert.equal((result as { length?: number } | null)?.length, 1);
|
||||
assert.equal((result as Array<{ surface?: string }>)[0]?.surface, 'カズマ');
|
||||
assert.equal((result as Array<{ headword?: string }>)[0]?.headword, 'カズマ');
|
||||
assert.equal((result as Array<{ startPos?: number }>)[0]?.startPos, 0);
|
||||
assert.equal((result as Array<{ endPos?: number }>)[0]?.endPos, 3);
|
||||
assert.equal((result as Array<{ isNameMatch?: boolean }>)[0]?.isNameMatch, true);
|
||||
});
|
||||
|
||||
test('getYomitanDictionaryInfo requests dictionary info via backend action', async () => {
|
||||
let scriptValue = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
|
||||
@@ -45,6 +45,7 @@ export interface YomitanScanToken {
|
||||
headword: string;
|
||||
startPos: number;
|
||||
endPos: number;
|
||||
isNameMatch?: boolean;
|
||||
}
|
||||
|
||||
interface YomitanProfileMetadata {
|
||||
@@ -75,7 +76,8 @@ function isScanTokenArray(value: unknown): value is YomitanScanToken[] {
|
||||
typeof entry.reading === 'string' &&
|
||||
typeof entry.headword === 'string' &&
|
||||
typeof entry.startPos === 'number' &&
|
||||
typeof entry.endPos === 'number',
|
||||
typeof entry.endPos === 'number' &&
|
||||
(entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean'),
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -772,24 +774,92 @@ const YOMITAN_SCANNING_HELPERS = String.raw`
|
||||
return segments;
|
||||
}
|
||||
function getPreferredHeadword(dictionaryEntries, token) {
|
||||
function appendDictionaryNames(target, value) {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return;
|
||||
}
|
||||
const candidates = [
|
||||
value.dictionary,
|
||||
value.dictionaryName,
|
||||
value.name,
|
||||
value.title,
|
||||
value.dictionaryTitle,
|
||||
value.dictionaryAlias
|
||||
];
|
||||
for (const candidate of candidates) {
|
||||
if (typeof candidate === 'string' && candidate.trim().length > 0) {
|
||||
target.push(candidate.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
function getDictionaryEntryNames(entry) {
|
||||
const names = [];
|
||||
appendDictionaryNames(names, entry);
|
||||
for (const definition of entry?.definitions || []) {
|
||||
appendDictionaryNames(names, definition);
|
||||
}
|
||||
for (const frequency of entry?.frequencies || []) {
|
||||
appendDictionaryNames(names, frequency);
|
||||
}
|
||||
for (const pronunciation of entry?.pronunciations || []) {
|
||||
appendDictionaryNames(names, pronunciation);
|
||||
}
|
||||
return names;
|
||||
}
|
||||
function isNameDictionaryEntry(entry) {
|
||||
if (!includeNameMatchMetadata || !entry || typeof entry !== 'object') {
|
||||
return false;
|
||||
}
|
||||
return getDictionaryEntryNames(entry).some((name) => name.startsWith("SubMiner Character Dictionary"));
|
||||
}
|
||||
function hasExactPrimarySource(headword, token) {
|
||||
for (const src of headword.sources || []) {
|
||||
if (src.originalText !== token) { continue; }
|
||||
if (!src.isPrimary) { continue; }
|
||||
if (src.matchType !== 'exact') { continue; }
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
let matchedNameDictionary = false;
|
||||
if (includeNameMatchMetadata) {
|
||||
for (const dictionaryEntry of dictionaryEntries || []) {
|
||||
if (!isNameDictionaryEntry(dictionaryEntry)) { continue; }
|
||||
for (const headword of dictionaryEntry.headwords || []) {
|
||||
if (!hasExactPrimarySource(headword, token)) { continue; }
|
||||
matchedNameDictionary = true;
|
||||
break;
|
||||
}
|
||||
if (matchedNameDictionary) { break; }
|
||||
}
|
||||
}
|
||||
for (const dictionaryEntry of dictionaryEntries || []) {
|
||||
for (const headword of dictionaryEntry.headwords || []) {
|
||||
const validSources = [];
|
||||
for (const src of headword.sources || []) {
|
||||
if (src.originalText !== token) { continue; }
|
||||
if (!src.isPrimary) { continue; }
|
||||
if (src.matchType !== 'exact') { continue; }
|
||||
validSources.push(src);
|
||||
}
|
||||
if (validSources.length > 0) { return {term: headword.term, reading: headword.reading}; }
|
||||
if (!hasExactPrimarySource(headword, token)) { continue; }
|
||||
return {
|
||||
term: headword.term,
|
||||
reading: headword.reading,
|
||||
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(dictionaryEntry)
|
||||
};
|
||||
}
|
||||
}
|
||||
const fallback = dictionaryEntries?.[0]?.headwords?.[0];
|
||||
return fallback ? {term: fallback.term, reading: fallback.reading} : null;
|
||||
return fallback
|
||||
? {
|
||||
term: fallback.term,
|
||||
reading: fallback.reading,
|
||||
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(dictionaryEntries?.[0])
|
||||
}
|
||||
: null;
|
||||
}
|
||||
`;
|
||||
|
||||
function buildYomitanScanningScript(text: string, profileIndex: number, scanLength: number): string {
|
||||
function buildYomitanScanningScript(
|
||||
text: string,
|
||||
profileIndex: number,
|
||||
scanLength: number,
|
||||
includeNameMatchMetadata: boolean,
|
||||
): string {
|
||||
return `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
@@ -811,6 +881,7 @@ function buildYomitanScanningScript(text: string, profileIndex: number, scanLeng
|
||||
});
|
||||
});
|
||||
${YOMITAN_SCANNING_HELPERS}
|
||||
const includeNameMatchMetadata = ${includeNameMatchMetadata ? 'true' : 'false'};
|
||||
const text = ${JSON.stringify(text)};
|
||||
const details = {matchType: "exact", deinflect: true};
|
||||
const tokens = [];
|
||||
@@ -834,6 +905,7 @@ ${YOMITAN_SCANNING_HELPERS}
|
||||
headword: preferredHeadword.term,
|
||||
startPos: i,
|
||||
endPos: i + originalTextLength,
|
||||
isNameMatch: includeNameMatchMetadata && preferredHeadword.isNameMatch === true,
|
||||
});
|
||||
i += originalTextLength;
|
||||
continue;
|
||||
@@ -944,6 +1016,9 @@ export async function requestYomitanScanTokens(
|
||||
text: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
options?: {
|
||||
includeNameMatchMetadata?: boolean;
|
||||
},
|
||||
): Promise<YomitanScanToken[] | null> {
|
||||
const yomitanExt = deps.getYomitanExt();
|
||||
if (!text || !yomitanExt) {
|
||||
@@ -962,7 +1037,12 @@ export async function requestYomitanScanTokens(
|
||||
|
||||
try {
|
||||
const rawResult = await parserWindow.webContents.executeJavaScript(
|
||||
buildYomitanScanningScript(text, profileIndex, scanLength),
|
||||
buildYomitanScanningScript(
|
||||
text,
|
||||
profileIndex,
|
||||
scanLength,
|
||||
options?.includeNameMatchMetadata === true,
|
||||
),
|
||||
true,
|
||||
);
|
||||
if (isScanTokenArray(rawResult)) {
|
||||
|
||||
Reference in New Issue
Block a user