mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-04-12 04:19:25 -07:00
Enhance AniList character dictionary sync and subtitle features (#15)
This commit is contained in:
@@ -252,12 +252,12 @@ test('annotateTokens applies configured pos1 exclusions to both frequency and N+
|
||||
test('annotateTokens allows previously default-excluded pos1 when removed from effective set', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'は',
|
||||
headword: 'は',
|
||||
surface: 'まで',
|
||||
headword: 'まで',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '助詞',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
endPos: 2,
|
||||
frequencyRank: 8,
|
||||
}),
|
||||
];
|
||||
@@ -314,6 +314,52 @@ test('annotateTokens excludes likely kana SFX tokens from frequency when POS tag
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes single hiragana and katakana tokens from frequency when POS tags are missing', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'た',
|
||||
reading: 'た',
|
||||
headword: 'た',
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
frequencyRank: 21,
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'ア',
|
||||
reading: 'ア',
|
||||
headword: 'ア',
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
frequencyRank: 22,
|
||||
startPos: 1,
|
||||
endPos: 2,
|
||||
}),
|
||||
makeToken({
|
||||
surface: '山',
|
||||
reading: 'やま',
|
||||
headword: '山',
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
frequencyRank: 23,
|
||||
startPos: 2,
|
||||
endPos: 3,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(tokens, makeDeps(), {
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[1]?.frequencyRank, undefined);
|
||||
assert.equal(result[2]?.frequencyRank, 23);
|
||||
});
|
||||
|
||||
test('annotateTokens keeps frequency when mecab tags classify token as content-bearing', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
|
||||
@@ -103,6 +103,10 @@ function isFrequencyExcludedByPos(
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
pos2Exclusions: ReadonlySet<string>,
|
||||
): boolean {
|
||||
if (isSingleKanaFrequencyNoiseToken(token.surface)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const normalizedPos1 = normalizePos1Tag(token.pos1);
|
||||
const hasPos1 = normalizedPos1.length > 0;
|
||||
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {
|
||||
@@ -231,6 +235,7 @@ function isKanaChar(char: string): boolean {
|
||||
return (
|
||||
(code >= 0x3041 && code <= 0x3096) ||
|
||||
(code >= 0x309b && code <= 0x309f) ||
|
||||
code === 0x30fc ||
|
||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||
(code >= 0x30fd && code <= 0x30ff)
|
||||
);
|
||||
@@ -362,6 +367,20 @@ function isLikelyFrequencyNoiseToken(token: MergedToken): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
function isSingleKanaFrequencyNoiseToken(text: string | undefined): boolean {
|
||||
if (typeof text !== 'string') {
|
||||
return false;
|
||||
}
|
||||
|
||||
const normalized = text.trim();
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const chars = [...normalized];
|
||||
return chars.length === 1 && isKanaChar(chars[0]!);
|
||||
}
|
||||
|
||||
function isJlptEligibleToken(token: MergedToken): boolean {
|
||||
if (token.pos1 && shouldIgnoreJlptForMecabPos1(token.pos1)) {
|
||||
return false;
|
||||
|
||||
@@ -127,3 +127,88 @@ test('drops scanning parser tokens which have no dictionary headword', () => {
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
test('prefers the longest dictionary headword across merged segments', () => {
|
||||
const parseResults = [
|
||||
makeParseItem('scanning-parser', [
|
||||
[
|
||||
{ text: 'バニ', reading: 'ばに', headword: 'バニ' },
|
||||
{ text: 'ール', reading: 'ーる', headword: 'バニール' },
|
||||
],
|
||||
]),
|
||||
];
|
||||
|
||||
const tokens = selectYomitanParseTokens(parseResults, () => false, 'headword');
|
||||
assert.deepEqual(
|
||||
tokens?.map((token) => ({
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
headword: token.headword,
|
||||
})),
|
||||
[
|
||||
{
|
||||
surface: 'バニール',
|
||||
reading: 'ばにーる',
|
||||
headword: 'バニール',
|
||||
},
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
test('keeps the first headword when later segments are standalone words', () => {
|
||||
const parseResults = [
|
||||
makeParseItem('scanning-parser', [
|
||||
[
|
||||
{ text: '猫', reading: 'ねこ', headword: '猫' },
|
||||
{ text: 'です', reading: 'です', headword: 'です' },
|
||||
],
|
||||
]),
|
||||
];
|
||||
|
||||
const tokens = selectYomitanParseTokens(parseResults, () => false, 'headword');
|
||||
assert.deepEqual(
|
||||
tokens?.map((token) => ({
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
headword: token.headword,
|
||||
})),
|
||||
[
|
||||
{
|
||||
surface: '猫です',
|
||||
reading: 'ねこです',
|
||||
headword: '猫',
|
||||
},
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
test('merges trailing katakana continuation without headword into previous token', () => {
|
||||
const parseResults = [
|
||||
makeParseItem('scanning-parser', [
|
||||
[{ text: 'カズ', reading: 'かず', headword: 'カズマ' }],
|
||||
[{ text: 'マ', reading: 'ま' }],
|
||||
[{ text: '魔王軍', reading: 'まおうぐん', headword: '魔王軍' }],
|
||||
]),
|
||||
];
|
||||
|
||||
const tokens = selectYomitanParseTokens(parseResults, () => false, 'headword');
|
||||
assert.deepEqual(
|
||||
tokens?.map((token) => ({
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
headword: token.headword,
|
||||
})),
|
||||
[
|
||||
{
|
||||
surface: 'カズマ',
|
||||
reading: 'かずま',
|
||||
headword: 'カズマ',
|
||||
},
|
||||
{
|
||||
surface: '魔王軍',
|
||||
reading: 'まおうぐん',
|
||||
headword: '魔王軍',
|
||||
},
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
@@ -49,6 +49,7 @@ function isKanaChar(char: string): boolean {
|
||||
return (
|
||||
(code >= 0x3041 && code <= 0x3096) ||
|
||||
(code >= 0x309b && code <= 0x309f) ||
|
||||
code === 0x30fc ||
|
||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||
(code >= 0x30fd && code <= 0x30ff)
|
||||
);
|
||||
@@ -111,6 +112,51 @@ function extractYomitanHeadword(segment: YomitanParseSegment): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
function selectMergedHeadword(
|
||||
firstHeadword: string,
|
||||
expandedHeadwords: string[],
|
||||
surface: string,
|
||||
): string {
|
||||
if (expandedHeadwords.length > 0) {
|
||||
const exactSurfaceMatch = expandedHeadwords.find((headword) => headword === surface);
|
||||
if (exactSurfaceMatch) {
|
||||
return exactSurfaceMatch;
|
||||
}
|
||||
|
||||
return expandedHeadwords.reduce((best, current) => {
|
||||
if (current.length !== best.length) {
|
||||
return current.length > best.length ? current : best;
|
||||
}
|
||||
return best;
|
||||
});
|
||||
}
|
||||
|
||||
if (!firstHeadword) {
|
||||
return '';
|
||||
}
|
||||
return firstHeadword;
|
||||
}
|
||||
|
||||
function isKanaOnlyText(text: string): boolean {
|
||||
return text.length > 0 && Array.from(text).every((char) => isKanaChar(char));
|
||||
}
|
||||
|
||||
function shouldMergeKanaContinuation(
|
||||
previousToken: MergedToken | undefined,
|
||||
continuationSurface: string,
|
||||
): previousToken is MergedToken {
|
||||
if (!previousToken || !continuationSurface || !isKanaOnlyText(continuationSurface)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!previousToken.headword || previousToken.headword.length <= previousToken.surface.length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const appendedSurface = previousToken.surface + continuationSurface;
|
||||
return previousToken.headword.startsWith(appendedSurface);
|
||||
}
|
||||
|
||||
export function mapYomitanParseResultItemToMergedTokens(
|
||||
parseResult: YomitanParseResultItem,
|
||||
isKnownWord: (text: string) => boolean,
|
||||
@@ -140,7 +186,8 @@ export function mapYomitanParseResultItemToMergedTokens(
|
||||
|
||||
let combinedSurface = '';
|
||||
let combinedReading = '';
|
||||
let combinedHeadword = '';
|
||||
let firstHeadword = '';
|
||||
const expandedHeadwords: string[] = [];
|
||||
|
||||
for (const segment of line) {
|
||||
const segmentText = segment.text;
|
||||
@@ -152,8 +199,14 @@ export function mapYomitanParseResultItemToMergedTokens(
|
||||
if (typeof segment.reading === 'string') {
|
||||
combinedReading += segment.reading;
|
||||
}
|
||||
if (!combinedHeadword) {
|
||||
combinedHeadword = extractYomitanHeadword(segment);
|
||||
const segmentHeadword = extractYomitanHeadword(segment);
|
||||
if (segmentHeadword) {
|
||||
if (!firstHeadword) {
|
||||
firstHeadword = segmentHeadword;
|
||||
}
|
||||
if (segmentHeadword.length > segmentText.length) {
|
||||
expandedHeadwords.push(segmentHeadword);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,7 +217,20 @@ export function mapYomitanParseResultItemToMergedTokens(
|
||||
const start = charOffset;
|
||||
const end = start + combinedSurface.length;
|
||||
charOffset = end;
|
||||
const combinedHeadword = selectMergedHeadword(
|
||||
firstHeadword,
|
||||
expandedHeadwords,
|
||||
combinedSurface,
|
||||
);
|
||||
if (!combinedHeadword) {
|
||||
const previousToken = tokens[tokens.length - 1];
|
||||
if (shouldMergeKanaContinuation(previousToken, combinedSurface)) {
|
||||
previousToken.surface += combinedSurface;
|
||||
previousToken.reading += combinedReading;
|
||||
previousToken.endPos = end;
|
||||
continue;
|
||||
}
|
||||
|
||||
// No dictionary-backed headword for this merged unit; skip it entirely so
|
||||
// downstream keyboard/frequency/JLPT flows only operate on lookup-backed tokens.
|
||||
continue;
|
||||
|
||||
@@ -1,12 +1,26 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import test from 'node:test';
|
||||
import * as vm from 'node:vm';
|
||||
import {
|
||||
requestYomitanParseResults,
|
||||
getYomitanDictionaryInfo,
|
||||
importYomitanDictionaryFromZip,
|
||||
deleteYomitanDictionaryByTitle,
|
||||
removeYomitanDictionarySettings,
|
||||
requestYomitanScanTokens,
|
||||
requestYomitanTermFrequencies,
|
||||
syncYomitanDefaultAnkiServer,
|
||||
upsertYomitanDictionarySettings,
|
||||
} from './yomitan-parser-runtime';
|
||||
|
||||
function createDeps(executeJavaScript: (script: string) => Promise<unknown>) {
|
||||
function createDeps(
|
||||
executeJavaScript: (script: string) => Promise<unknown>,
|
||||
options?: {
|
||||
createYomitanExtensionWindow?: (pageName: string) => Promise<unknown>;
|
||||
},
|
||||
) {
|
||||
const parserWindow = {
|
||||
isDestroyed: () => false,
|
||||
webContents: {
|
||||
@@ -22,9 +36,44 @@ function createDeps(executeJavaScript: (script: string) => Promise<unknown>) {
|
||||
setYomitanParserReadyPromise: () => undefined,
|
||||
getYomitanParserInitPromise: () => null,
|
||||
setYomitanParserInitPromise: () => undefined,
|
||||
createYomitanExtensionWindow: options?.createYomitanExtensionWindow as never,
|
||||
};
|
||||
}
|
||||
|
||||
async function runInjectedYomitanScript(
|
||||
script: string,
|
||||
handler: (action: string, params: unknown) => unknown,
|
||||
): Promise<unknown> {
|
||||
return await vm.runInNewContext(script, {
|
||||
chrome: {
|
||||
runtime: {
|
||||
lastError: null,
|
||||
sendMessage: (
|
||||
payload: { action?: string; params?: unknown },
|
||||
callback: (response: { result?: unknown; error?: { message?: string } }) => void,
|
||||
) => {
|
||||
try {
|
||||
callback({ result: handler(payload.action ?? '', payload.params) });
|
||||
} catch (error) {
|
||||
callback({ error: { message: (error as Error).message } });
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
Array,
|
||||
Error,
|
||||
JSON,
|
||||
Map,
|
||||
Math,
|
||||
Number,
|
||||
Object,
|
||||
Promise,
|
||||
RegExp,
|
||||
Set,
|
||||
String,
|
||||
});
|
||||
}
|
||||
|
||||
test('syncYomitanDefaultAnkiServer updates default profile server when script reports update', async () => {
|
||||
let scriptValue = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
@@ -389,7 +438,7 @@ test('requestYomitanTermFrequencies caches repeated term+reading lookups', async
|
||||
assert.equal(frequencyCalls, 1);
|
||||
});
|
||||
|
||||
test('requestYomitanParseResults disables Yomitan MeCab parser path', async () => {
|
||||
test('requestYomitanScanTokens uses left-to-right termsFind scanning instead of parseText', async () => {
|
||||
const scripts: string[] = [];
|
||||
const deps = createDeps(async (script) => {
|
||||
scripts.push(script);
|
||||
@@ -405,15 +454,517 @@ test('requestYomitanParseResults disables Yomitan MeCab parser path', async () =
|
||||
],
|
||||
};
|
||||
}
|
||||
return [];
|
||||
return [
|
||||
{
|
||||
surface: 'カズマ',
|
||||
reading: 'かずま',
|
||||
headword: 'カズマ',
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
const result = await requestYomitanParseResults('猫です', deps, {
|
||||
const result = await requestYomitanScanTokens('カズマ', deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.deepEqual(result, []);
|
||||
const parseScript = scripts.find((script) => script.includes('parseText'));
|
||||
assert.ok(parseScript, 'expected parseText request script');
|
||||
assert.match(parseScript ?? '', /useMecabParser:\s*false/);
|
||||
assert.deepEqual(result, [
|
||||
{
|
||||
surface: 'カズマ',
|
||||
reading: 'かずま',
|
||||
headword: 'カズマ',
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
},
|
||||
]);
|
||||
const scannerScript = scripts.find((script) => script.includes('termsFind'));
|
||||
assert.ok(scannerScript, 'expected termsFind scanning request script');
|
||||
assert.doesNotMatch(scannerScript ?? '', /parseText/);
|
||||
assert.match(scannerScript ?? '', /matchType:\s*"exact"/);
|
||||
assert.match(scannerScript ?? '', /deinflect:\s*true/);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens marks tokens backed by SubMiner character dictionary entries', async () => {
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
surface: 'アクア',
|
||||
reading: 'あくあ',
|
||||
headword: 'アクア',
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
isNameMatch: true,
|
||||
},
|
||||
{
|
||||
surface: 'です',
|
||||
reading: 'です',
|
||||
headword: 'です',
|
||||
startPos: 3,
|
||||
endPos: 5,
|
||||
isNameMatch: false,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
const result = await requestYomitanScanTokens('アクアです', deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(result?.length, 2);
|
||||
assert.equal((result?.[0] as { isNameMatch?: boolean } | undefined)?.isNameMatch, true);
|
||||
assert.equal((result?.[1] as { isNameMatch?: boolean } | undefined)?.isNameMatch, false);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens skips name-match work when disabled', async () => {
|
||||
let scannerScript = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('termsFind')) {
|
||||
scannerScript = script;
|
||||
}
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
surface: 'アクア',
|
||||
reading: 'あくあ',
|
||||
headword: 'アクア',
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
const result = await requestYomitanScanTokens(
|
||||
'アクア',
|
||||
deps,
|
||||
{ error: () => undefined },
|
||||
{ includeNameMatchMetadata: false },
|
||||
);
|
||||
|
||||
assert.equal(result?.length, 1);
|
||||
assert.equal((result?.[0] as { isNameMatch?: boolean } | undefined)?.isNameMatch, undefined);
|
||||
assert.match(scannerScript, /const includeNameMatchMetadata = false;/);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens marks grouped entries when SubMiner dictionary alias only exists on definitions', async () => {
|
||||
let scannerScript = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('termsFind')) {
|
||||
scannerScript = script;
|
||||
return [];
|
||||
}
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
await requestYomitanScanTokens(
|
||||
'カズマ',
|
||||
deps,
|
||||
{ error: () => undefined },
|
||||
{ includeNameMatchMetadata: true },
|
||||
);
|
||||
|
||||
assert.match(scannerScript, /getPreferredHeadword/);
|
||||
|
||||
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
|
||||
if (action === 'termsFind') {
|
||||
const text = (params as { text?: string } | undefined)?.text;
|
||||
if (text === 'カズマ') {
|
||||
return {
|
||||
originalTextLength: 3,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
dictionaryAlias: '',
|
||||
headwords: [
|
||||
{
|
||||
term: 'カズマ',
|
||||
reading: 'かずま',
|
||||
sources: [{ originalText: 'カズマ', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
definitions: [
|
||||
{ dictionary: 'JMdict', dictionaryAlias: 'JMdict' },
|
||||
{
|
||||
dictionary: 'SubMiner Character Dictionary (AniList 130298)',
|
||||
dictionaryAlias: 'SubMiner Character Dictionary (AniList 130298)',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return { originalTextLength: 0, dictionaryEntries: [] };
|
||||
}
|
||||
throw new Error(`unexpected action: ${action}`);
|
||||
});
|
||||
|
||||
assert.equal(Array.isArray(result), true);
|
||||
assert.equal((result as { length?: number } | null)?.length, 1);
|
||||
assert.equal((result as Array<{ surface?: string }>)[0]?.surface, 'カズマ');
|
||||
assert.equal((result as Array<{ headword?: string }>)[0]?.headword, 'カズマ');
|
||||
assert.equal((result as Array<{ startPos?: number }>)[0]?.startPos, 0);
|
||||
assert.equal((result as Array<{ endPos?: number }>)[0]?.endPos, 3);
|
||||
assert.equal((result as Array<{ isNameMatch?: boolean }>)[0]?.isNameMatch, true);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens skips fallback fragments without exact primary source matches', async () => {
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
return await runInjectedYomitanScript(script, (action, params) => {
|
||||
if (action !== 'termsFind') {
|
||||
throw new Error(`unexpected action: ${action}`);
|
||||
}
|
||||
|
||||
const text = (params as { text?: string } | undefined)?.text ?? '';
|
||||
if (text.startsWith('だが ')) {
|
||||
return {
|
||||
originalTextLength: 2,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: 'だが',
|
||||
reading: 'だが',
|
||||
sources: [{ originalText: 'だが', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
if (text.startsWith('それでも')) {
|
||||
return {
|
||||
originalTextLength: 4,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: 'それでも',
|
||||
reading: 'それでも',
|
||||
sources: [{ originalText: 'それでも', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
if (text.startsWith('届かぬ')) {
|
||||
return {
|
||||
originalTextLength: 3,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: '届く',
|
||||
reading: 'とどく',
|
||||
sources: [{ originalText: '届かぬ', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
if (text.startsWith('高み')) {
|
||||
return {
|
||||
originalTextLength: 2,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: '高み',
|
||||
reading: 'たかみ',
|
||||
sources: [{ originalText: '高み', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
if (text.startsWith('があった')) {
|
||||
return {
|
||||
originalTextLength: 2,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: 'があ',
|
||||
reading: '',
|
||||
sources: [{ originalText: 'が', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
if (text.startsWith('あった')) {
|
||||
return {
|
||||
originalTextLength: 3,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: 'ある',
|
||||
reading: 'ある',
|
||||
sources: [{ originalText: 'あった', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return { originalTextLength: 0, dictionaryEntries: [] };
|
||||
});
|
||||
});
|
||||
|
||||
const result = await requestYomitanScanTokens('だが それでも届かぬ高みがあった', deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.deepEqual(
|
||||
result?.map((token) => ({
|
||||
surface: token.surface,
|
||||
headword: token.headword,
|
||||
startPos: token.startPos,
|
||||
endPos: token.endPos,
|
||||
})),
|
||||
[
|
||||
{
|
||||
surface: 'だが',
|
||||
headword: 'だが',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
},
|
||||
{
|
||||
surface: 'それでも',
|
||||
headword: 'それでも',
|
||||
startPos: 3,
|
||||
endPos: 7,
|
||||
},
|
||||
{
|
||||
surface: '届かぬ',
|
||||
headword: '届く',
|
||||
startPos: 7,
|
||||
endPos: 10,
|
||||
},
|
||||
{
|
||||
surface: '高み',
|
||||
headword: '高み',
|
||||
startPos: 10,
|
||||
endPos: 12,
|
||||
},
|
||||
{
|
||||
surface: 'あった',
|
||||
headword: 'ある',
|
||||
startPos: 13,
|
||||
endPos: 16,
|
||||
},
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
test('getYomitanDictionaryInfo requests dictionary info via backend action', async () => {
|
||||
let scriptValue = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
scriptValue = script;
|
||||
return [{ title: 'SubMiner Character Dictionary (AniList 130298)', revision: '1' }];
|
||||
});
|
||||
|
||||
const dictionaries = await getYomitanDictionaryInfo(deps, { error: () => undefined });
|
||||
assert.equal(dictionaries.length, 1);
|
||||
assert.equal(dictionaries[0]?.title, 'SubMiner Character Dictionary (AniList 130298)');
|
||||
assert.match(scriptValue, /getDictionaryInfo/);
|
||||
});
|
||||
|
||||
test('dictionary settings helpers upsert and remove dictionary entries without reordering', async () => {
|
||||
const scripts: string[] = [];
|
||||
const optionsFull = {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
dictionaries: [
|
||||
{
|
||||
name: 'Jitendex',
|
||||
alias: 'Jitendex',
|
||||
enabled: true,
|
||||
},
|
||||
{
|
||||
name: 'SubMiner Character Dictionary (AniList 1)',
|
||||
alias: 'SubMiner Character Dictionary (AniList 1)',
|
||||
enabled: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const deps = createDeps(async (script) => {
|
||||
scripts.push(script);
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return JSON.parse(JSON.stringify(optionsFull));
|
||||
}
|
||||
if (script.includes('setAllSettings')) {
|
||||
return true;
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
const title = 'SubMiner Character Dictionary (AniList 1)';
|
||||
const upserted = await upsertYomitanDictionarySettings(title, 'all', deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
const removed = await removeYomitanDictionarySettings(title, 'all', 'delete', deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(upserted, true);
|
||||
assert.equal(removed, true);
|
||||
const setCalls = scripts.filter((script) => script.includes('setAllSettings')).length;
|
||||
assert.equal(setCalls, 2);
|
||||
|
||||
const upsertScript = scripts.find(
|
||||
(script) =>
|
||||
script.includes('setAllSettings') &&
|
||||
script.includes('"SubMiner Character Dictionary (AniList 1)"'),
|
||||
);
|
||||
assert.ok(upsertScript);
|
||||
const jitendexOffset = upsertScript?.indexOf('"Jitendex"') ?? -1;
|
||||
const subMinerOffset = upsertScript?.indexOf('"SubMiner Character Dictionary (AniList 1)"') ?? -1;
|
||||
assert.equal(jitendexOffset >= 0, true);
|
||||
assert.equal(subMinerOffset >= 0, true);
|
||||
assert.equal(jitendexOffset < subMinerOffset, true);
|
||||
assert.match(upsertScript ?? '', /"enabled":true/);
|
||||
});
|
||||
|
||||
test('importYomitanDictionaryFromZip uses settings automation bridge instead of custom backend action', async () => {
|
||||
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-yomitan-import-'));
|
||||
const zipPath = path.join(tempDir, 'dict.zip');
|
||||
fs.writeFileSync(zipPath, Buffer.from('zip-bytes'));
|
||||
|
||||
const scripts: string[] = [];
|
||||
const settingsWindow = {
|
||||
isDestroyed: () => false,
|
||||
destroy: () => undefined,
|
||||
webContents: {
|
||||
executeJavaScript: async (script: string) => {
|
||||
scripts.push(script);
|
||||
return true;
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const deps = createDeps(async () => true, {
|
||||
createYomitanExtensionWindow: async (pageName: string) => {
|
||||
assert.equal(pageName, 'settings.html');
|
||||
return settingsWindow;
|
||||
},
|
||||
});
|
||||
|
||||
const imported = await importYomitanDictionaryFromZip(zipPath, deps, {
|
||||
error: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(imported, true);
|
||||
assert.equal(
|
||||
scripts.some((script) => script.includes('__subminerYomitanSettingsAutomation')),
|
||||
true,
|
||||
);
|
||||
assert.equal(
|
||||
scripts.some((script) => script.includes('importDictionaryArchiveBase64')),
|
||||
true,
|
||||
);
|
||||
assert.equal(
|
||||
scripts.some((script) => script.includes('subminerImportDictionary')),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test('deleteYomitanDictionaryByTitle uses settings automation bridge instead of custom backend action', async () => {
|
||||
const scripts: string[] = [];
|
||||
const settingsWindow = {
|
||||
isDestroyed: () => false,
|
||||
destroy: () => undefined,
|
||||
webContents: {
|
||||
executeJavaScript: async (script: string) => {
|
||||
scripts.push(script);
|
||||
return true;
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const deps = createDeps(async () => true, {
|
||||
createYomitanExtensionWindow: async (pageName: string) => {
|
||||
assert.equal(pageName, 'settings.html');
|
||||
return settingsWindow;
|
||||
},
|
||||
});
|
||||
|
||||
const deleted = await deleteYomitanDictionaryByTitle(
|
||||
'SubMiner Character Dictionary (AniList 130298)',
|
||||
deps,
|
||||
{ error: () => undefined },
|
||||
);
|
||||
|
||||
assert.equal(deleted, true);
|
||||
assert.equal(
|
||||
scripts.some((script) => script.includes('__subminerYomitanSettingsAutomation')),
|
||||
true,
|
||||
);
|
||||
assert.equal(
|
||||
scripts.some((script) => script.includes('deleteDictionary')),
|
||||
true,
|
||||
);
|
||||
assert.equal(
|
||||
scripts.some((script) => script.includes('subminerDeleteDictionary')),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import type { BrowserWindow, Extension } from 'electron';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { selectYomitanParseTokens } from './parser-selection-stage';
|
||||
|
||||
interface LoggerLike {
|
||||
error: (message: string, ...args: unknown[]) => void;
|
||||
@@ -13,6 +16,12 @@ interface YomitanParserRuntimeDeps {
|
||||
setYomitanParserReadyPromise: (promise: Promise<void> | null) => void;
|
||||
getYomitanParserInitPromise: () => Promise<boolean> | null;
|
||||
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
||||
createYomitanExtensionWindow?: (pageName: string) => Promise<BrowserWindow | null>;
|
||||
}
|
||||
|
||||
export interface YomitanDictionaryInfo {
|
||||
title: string;
|
||||
revision?: string | number;
|
||||
}
|
||||
|
||||
export interface YomitanTermFrequency {
|
||||
@@ -30,6 +39,15 @@ export interface YomitanTermReadingPair {
|
||||
reading: string | null;
|
||||
}
|
||||
|
||||
export interface YomitanScanToken {
|
||||
surface: string;
|
||||
reading: string;
|
||||
headword: string;
|
||||
startPos: number;
|
||||
endPos: number;
|
||||
isNameMatch?: boolean;
|
||||
}
|
||||
|
||||
interface YomitanProfileMetadata {
|
||||
profileIndex: number;
|
||||
scanLength: number;
|
||||
@@ -48,6 +66,22 @@ function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value && typeof value === 'object');
|
||||
}
|
||||
|
||||
function isScanTokenArray(value: unknown): value is YomitanScanToken[] {
|
||||
return (
|
||||
Array.isArray(value) &&
|
||||
value.every(
|
||||
(entry) =>
|
||||
isObject(entry) &&
|
||||
typeof entry.surface === 'string' &&
|
||||
typeof entry.reading === 'string' &&
|
||||
typeof entry.headword === 'string' &&
|
||||
typeof entry.startPos === 'number' &&
|
||||
typeof entry.endPos === 'number' &&
|
||||
(entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean'),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
function makeTermReadingCacheKey(term: string, reading: string | null): string {
|
||||
return `${term}\u0000${reading ?? ''}`;
|
||||
}
|
||||
@@ -489,6 +523,392 @@ async function ensureYomitanParserWindow(
|
||||
return initPromise;
|
||||
}
|
||||
|
||||
async function createYomitanExtensionWindow(
|
||||
pageName: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<BrowserWindow | null> {
|
||||
if (typeof deps.createYomitanExtensionWindow === 'function') {
|
||||
return await deps.createYomitanExtensionWindow(pageName);
|
||||
}
|
||||
|
||||
const electron = await import('electron');
|
||||
const yomitanExt = deps.getYomitanExt();
|
||||
if (!yomitanExt) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const { BrowserWindow, session } = electron;
|
||||
const window = new BrowserWindow({
|
||||
show: false,
|
||||
width: 1200,
|
||||
height: 800,
|
||||
webPreferences: {
|
||||
contextIsolation: true,
|
||||
nodeIntegration: false,
|
||||
session: session.defaultSession,
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
window.webContents.once('did-finish-load', () => resolve());
|
||||
window.webContents.once('did-fail-load', (_event, _errorCode, errorDescription) => {
|
||||
reject(new Error(errorDescription));
|
||||
});
|
||||
void window
|
||||
.loadURL(`chrome-extension://${yomitanExt.id}/${pageName}`)
|
||||
.catch((error: Error) => reject(error));
|
||||
});
|
||||
return window;
|
||||
} catch (err) {
|
||||
logger.error(`Failed to create hidden Yomitan ${pageName} window: ${(err as Error).message}`);
|
||||
if (!window.isDestroyed()) {
|
||||
window.destroy();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function invokeYomitanSettingsAutomation<T>(
|
||||
script: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<T | null> {
|
||||
const settingsWindow = await createYomitanExtensionWindow('settings.html', deps, logger);
|
||||
if (!settingsWindow || settingsWindow.isDestroyed()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
await settingsWindow.webContents.executeJavaScript(
|
||||
`
|
||||
(async () => {
|
||||
const deadline = Date.now() + 10000;
|
||||
while (Date.now() < deadline) {
|
||||
if (globalThis.__subminerYomitanSettingsAutomation?.ready === true) {
|
||||
return true;
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||
}
|
||||
throw new Error("Yomitan settings automation bridge did not become ready");
|
||||
})();
|
||||
`,
|
||||
true,
|
||||
);
|
||||
|
||||
return (await settingsWindow.webContents.executeJavaScript(script, true)) as T;
|
||||
} catch (err) {
|
||||
logger.error('Failed to drive Yomitan settings automation:', (err as Error).message);
|
||||
return null;
|
||||
} finally {
|
||||
if (!settingsWindow.isDestroyed()) {
|
||||
settingsWindow.destroy();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const YOMITAN_SCANNING_HELPERS = String.raw`
|
||||
const HIRAGANA_CONVERSION_RANGE = [0x3041, 0x3096];
|
||||
const KATAKANA_CONVERSION_RANGE = [0x30a1, 0x30f6];
|
||||
const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
|
||||
const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
|
||||
const KATAKANA_SMALL_KE_CODE_POINT = 0x30f6;
|
||||
const KANA_RANGES = [[0x3040, 0x309f], [0x30a0, 0x30ff]];
|
||||
const JAPANESE_RANGES = [[0x3040, 0x30ff], [0x3400, 0x9fff]];
|
||||
function isCodePointInRange(codePoint, range) { return codePoint >= range[0] && codePoint <= range[1]; }
|
||||
function isCodePointInRanges(codePoint, ranges) { return ranges.some((range) => isCodePointInRange(codePoint, range)); }
|
||||
function isCodePointKana(codePoint) { return isCodePointInRanges(codePoint, KANA_RANGES); }
|
||||
function isCodePointJapanese(codePoint) { return isCodePointInRanges(codePoint, JAPANESE_RANGES); }
|
||||
function createFuriganaSegment(text, reading) { return {text, reading}; }
|
||||
function getProlongedHiragana(previousCharacter) {
|
||||
switch (previousCharacter) {
|
||||
case "あ": case "か": case "が": case "さ": case "ざ": case "た": case "だ": case "な": case "は": case "ば": case "ぱ": case "ま": case "や": case "ら": case "わ": case "ぁ": case "ゃ": case "ゎ": return "あ";
|
||||
case "い": case "き": case "ぎ": case "し": case "じ": case "ち": case "ぢ": case "に": case "ひ": case "び": case "ぴ": case "み": case "り": case "ぃ": return "い";
|
||||
case "う": case "く": case "ぐ": case "す": case "ず": case "つ": case "づ": case "ぬ": case "ふ": case "ぶ": case "ぷ": case "む": case "ゆ": case "る": case "ぅ": case "ゅ": return "う";
|
||||
case "え": case "け": case "げ": case "せ": case "ぜ": case "て": case "で": case "ね": case "へ": case "べ": case "ぺ": case "め": case "れ": case "ぇ": return "え";
|
||||
case "お": case "こ": case "ご": case "そ": case "ぞ": case "と": case "ど": case "の": case "ほ": case "ぼ": case "ぽ": case "も": case "よ": case "ろ": case "を": case "ぉ": case "ょ": return "う";
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
function getFuriganaKanaSegments(text, reading) {
|
||||
const newSegments = [];
|
||||
let start = 0;
|
||||
let state = (reading[0] === text[0]);
|
||||
for (let i = 1; i < text.length; ++i) {
|
||||
const newState = (reading[i] === text[i]);
|
||||
if (state === newState) { continue; }
|
||||
newSegments.push(createFuriganaSegment(text.substring(start, i), state ? '' : reading.substring(start, i)));
|
||||
state = newState;
|
||||
start = i;
|
||||
}
|
||||
newSegments.push(createFuriganaSegment(text.substring(start), state ? '' : reading.substring(start)));
|
||||
return newSegments;
|
||||
}
|
||||
function convertKatakanaToHiragana(text, keepProlongedSoundMarks = false) {
|
||||
let result = '';
|
||||
const offset = (HIRAGANA_CONVERSION_RANGE[0] - KATAKANA_CONVERSION_RANGE[0]);
|
||||
for (let char of text) {
|
||||
const codePoint = char.codePointAt(0);
|
||||
switch (codePoint) {
|
||||
case KATAKANA_SMALL_KA_CODE_POINT:
|
||||
case KATAKANA_SMALL_KE_CODE_POINT:
|
||||
break;
|
||||
case KANA_PROLONGED_SOUND_MARK_CODE_POINT:
|
||||
if (!keepProlongedSoundMarks && result.length > 0) {
|
||||
const char2 = getProlongedHiragana(result[result.length - 1]);
|
||||
if (char2 !== null) { char = char2; }
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (isCodePointInRange(codePoint, KATAKANA_CONVERSION_RANGE)) {
|
||||
char = String.fromCodePoint(codePoint + offset);
|
||||
}
|
||||
break;
|
||||
}
|
||||
result += char;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
function segmentizeFurigana(reading, readingNormalized, groups, groupsStart) {
|
||||
const groupCount = groups.length - groupsStart;
|
||||
if (groupCount <= 0) { return reading.length === 0 ? [] : null; }
|
||||
const group = groups[groupsStart];
|
||||
const {isKana, text} = group;
|
||||
if (isKana) {
|
||||
if (group.textNormalized !== null && readingNormalized.startsWith(group.textNormalized)) {
|
||||
const segments = segmentizeFurigana(reading.substring(text.length), readingNormalized.substring(text.length), groups, groupsStart + 1);
|
||||
if (segments !== null) {
|
||||
if (reading.startsWith(text)) { segments.unshift(createFuriganaSegment(text, '')); }
|
||||
else { segments.unshift(...getFuriganaKanaSegments(text, reading)); }
|
||||
return segments;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
let result = null;
|
||||
for (let i = reading.length; i >= text.length; --i) {
|
||||
const segments = segmentizeFurigana(reading.substring(i), readingNormalized.substring(i), groups, groupsStart + 1);
|
||||
if (segments !== null) {
|
||||
if (result !== null) { return null; }
|
||||
segments.unshift(createFuriganaSegment(text, reading.substring(0, i)));
|
||||
result = segments;
|
||||
}
|
||||
if (groupCount === 1) { break; }
|
||||
}
|
||||
return result;
|
||||
}
|
||||
function distributeFurigana(term, reading) {
|
||||
if (reading === term) { return [createFuriganaSegment(term, '')]; }
|
||||
const groups = [];
|
||||
let groupPre = null;
|
||||
let isKanaPre = null;
|
||||
for (const c of term) {
|
||||
const isKana = isCodePointKana(c.codePointAt(0));
|
||||
if (isKana === isKanaPre) { groupPre.text += c; }
|
||||
else {
|
||||
groupPre = {isKana, text: c, textNormalized: null};
|
||||
groups.push(groupPre);
|
||||
isKanaPre = isKana;
|
||||
}
|
||||
}
|
||||
for (const group of groups) {
|
||||
if (group.isKana) { group.textNormalized = convertKatakanaToHiragana(group.text); }
|
||||
}
|
||||
const segments = segmentizeFurigana(reading, convertKatakanaToHiragana(reading), groups, 0);
|
||||
return segments !== null ? segments : [createFuriganaSegment(term, reading)];
|
||||
}
|
||||
function getStemLength(text1, text2) {
|
||||
const minLength = Math.min(text1.length, text2.length);
|
||||
if (minLength === 0) { return 0; }
|
||||
let i = 0;
|
||||
while (true) {
|
||||
const char1 = text1.codePointAt(i);
|
||||
const char2 = text2.codePointAt(i);
|
||||
if (char1 !== char2) { break; }
|
||||
const charLength = String.fromCodePoint(char1).length;
|
||||
i += charLength;
|
||||
if (i >= minLength) {
|
||||
if (i > minLength) { i -= charLength; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
function distributeFuriganaInflected(term, reading, source) {
|
||||
const termNormalized = convertKatakanaToHiragana(term);
|
||||
const readingNormalized = convertKatakanaToHiragana(reading);
|
||||
const sourceNormalized = convertKatakanaToHiragana(source);
|
||||
let mainText = term;
|
||||
let stemLength = getStemLength(termNormalized, sourceNormalized);
|
||||
const readingStemLength = getStemLength(readingNormalized, sourceNormalized);
|
||||
if (readingStemLength > 0 && readingStemLength >= stemLength) {
|
||||
mainText = reading;
|
||||
stemLength = readingStemLength;
|
||||
reading = source.substring(0, stemLength) + reading.substring(stemLength);
|
||||
}
|
||||
const segments = [];
|
||||
if (stemLength > 0) {
|
||||
mainText = source.substring(0, stemLength) + mainText.substring(stemLength);
|
||||
const segments2 = distributeFurigana(mainText, reading);
|
||||
let consumed = 0;
|
||||
for (const segment of segments2) {
|
||||
const start = consumed;
|
||||
consumed += segment.text.length;
|
||||
if (consumed < stemLength) { segments.push(segment); }
|
||||
else if (consumed === stemLength) { segments.push(segment); break; }
|
||||
else {
|
||||
if (start < stemLength) { segments.push(createFuriganaSegment(mainText.substring(start, stemLength), '')); }
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (stemLength < source.length) {
|
||||
const remainder = source.substring(stemLength);
|
||||
const last = segments[segments.length - 1];
|
||||
if (last && last.reading.length === 0) { last.text += remainder; }
|
||||
else { segments.push(createFuriganaSegment(remainder, '')); }
|
||||
}
|
||||
return segments;
|
||||
}
|
||||
function getPreferredHeadword(dictionaryEntries, token) {
|
||||
function appendDictionaryNames(target, value) {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return;
|
||||
}
|
||||
const candidates = [
|
||||
value.dictionary,
|
||||
value.dictionaryName,
|
||||
value.name,
|
||||
value.title,
|
||||
value.dictionaryTitle,
|
||||
value.dictionaryAlias
|
||||
];
|
||||
for (const candidate of candidates) {
|
||||
if (typeof candidate === 'string' && candidate.trim().length > 0) {
|
||||
target.push(candidate.trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
function getDictionaryEntryNames(entry) {
|
||||
const names = [];
|
||||
appendDictionaryNames(names, entry);
|
||||
for (const definition of entry?.definitions || []) {
|
||||
appendDictionaryNames(names, definition);
|
||||
}
|
||||
for (const frequency of entry?.frequencies || []) {
|
||||
appendDictionaryNames(names, frequency);
|
||||
}
|
||||
for (const pronunciation of entry?.pronunciations || []) {
|
||||
appendDictionaryNames(names, pronunciation);
|
||||
}
|
||||
return names;
|
||||
}
|
||||
function isNameDictionaryEntry(entry) {
|
||||
if (!includeNameMatchMetadata || !entry || typeof entry !== 'object') {
|
||||
return false;
|
||||
}
|
||||
return getDictionaryEntryNames(entry).some((name) => name.startsWith("SubMiner Character Dictionary"));
|
||||
}
|
||||
function hasExactPrimarySource(headword, token) {
|
||||
for (const src of headword.sources || []) {
|
||||
if (src.originalText !== token) { continue; }
|
||||
if (!src.isPrimary) { continue; }
|
||||
if (src.matchType !== 'exact') { continue; }
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
let matchedNameDictionary = false;
|
||||
if (includeNameMatchMetadata) {
|
||||
for (const dictionaryEntry of dictionaryEntries || []) {
|
||||
if (!isNameDictionaryEntry(dictionaryEntry)) { continue; }
|
||||
for (const headword of dictionaryEntry.headwords || []) {
|
||||
if (!hasExactPrimarySource(headword, token)) { continue; }
|
||||
matchedNameDictionary = true;
|
||||
break;
|
||||
}
|
||||
if (matchedNameDictionary) { break; }
|
||||
}
|
||||
}
|
||||
for (const dictionaryEntry of dictionaryEntries || []) {
|
||||
for (const headword of dictionaryEntry.headwords || []) {
|
||||
if (!hasExactPrimarySource(headword, token)) { continue; }
|
||||
return {
|
||||
term: headword.term,
|
||||
reading: headword.reading,
|
||||
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(dictionaryEntry)
|
||||
};
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
`;
|
||||
|
||||
function buildYomitanScanningScript(
|
||||
text: string,
|
||||
profileIndex: number,
|
||||
scanLength: number,
|
||||
includeNameMatchMetadata: boolean,
|
||||
): string {
|
||||
return `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
${YOMITAN_SCANNING_HELPERS}
|
||||
const includeNameMatchMetadata = ${includeNameMatchMetadata ? 'true' : 'false'};
|
||||
const text = ${JSON.stringify(text)};
|
||||
const details = {matchType: "exact", deinflect: true};
|
||||
const tokens = [];
|
||||
let i = 0;
|
||||
while (i < text.length) {
|
||||
const codePoint = text.codePointAt(i);
|
||||
const character = String.fromCodePoint(codePoint);
|
||||
const substring = text.substring(i, i + ${scanLength});
|
||||
const result = await invoke("termsFind", { text: substring, details, optionsContext: { index: ${profileIndex} } });
|
||||
const dictionaryEntries = Array.isArray(result?.dictionaryEntries) ? result.dictionaryEntries : [];
|
||||
const originalTextLength = typeof result?.originalTextLength === "number" ? result.originalTextLength : 0;
|
||||
if (dictionaryEntries.length > 0 && originalTextLength > 0 && (originalTextLength !== character.length || isCodePointJapanese(codePoint))) {
|
||||
const source = substring.substring(0, originalTextLength);
|
||||
const preferredHeadword = getPreferredHeadword(dictionaryEntries, source);
|
||||
if (preferredHeadword && typeof preferredHeadword.term === "string") {
|
||||
const reading = typeof preferredHeadword.reading === "string" ? preferredHeadword.reading : "";
|
||||
const segments = distributeFuriganaInflected(preferredHeadword.term, reading, source);
|
||||
tokens.push({
|
||||
surface: segments.map((segment) => segment.text).join("") || source,
|
||||
reading: segments.map((segment) => typeof segment.reading === "string" ? segment.reading : "").join(""),
|
||||
headword: preferredHeadword.term,
|
||||
startPos: i,
|
||||
endPos: i + originalTextLength,
|
||||
isNameMatch: includeNameMatchMetadata && preferredHeadword.isNameMatch === true,
|
||||
});
|
||||
i += originalTextLength;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
i += character.length;
|
||||
}
|
||||
return tokens;
|
||||
})();
|
||||
`;
|
||||
}
|
||||
|
||||
export async function requestYomitanParseResults(
|
||||
text: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
@@ -583,6 +1003,61 @@ export async function requestYomitanParseResults(
|
||||
}
|
||||
}
|
||||
|
||||
export async function requestYomitanScanTokens(
|
||||
text: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
options?: {
|
||||
includeNameMatchMetadata?: boolean;
|
||||
},
|
||||
): Promise<YomitanScanToken[] | null> {
|
||||
const yomitanExt = deps.getYomitanExt();
|
||||
if (!text || !yomitanExt) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const isReady = await ensureYomitanParserWindow(deps, logger);
|
||||
const parserWindow = deps.getYomitanParserWindow();
|
||||
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const metadata = await requestYomitanProfileMetadata(parserWindow, logger);
|
||||
const profileIndex = metadata?.profileIndex ?? 0;
|
||||
const scanLength = metadata?.scanLength ?? DEFAULT_YOMITAN_SCAN_LENGTH;
|
||||
|
||||
try {
|
||||
const rawResult = await parserWindow.webContents.executeJavaScript(
|
||||
buildYomitanScanningScript(
|
||||
text,
|
||||
profileIndex,
|
||||
scanLength,
|
||||
options?.includeNameMatchMetadata === true,
|
||||
),
|
||||
true,
|
||||
);
|
||||
if (isScanTokenArray(rawResult)) {
|
||||
return rawResult;
|
||||
}
|
||||
if (Array.isArray(rawResult)) {
|
||||
const selectedTokens = selectYomitanParseTokens(rawResult, () => false, 'headword');
|
||||
return (
|
||||
selectedTokens?.map((token) => ({
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
headword: token.headword,
|
||||
startPos: token.startPos,
|
||||
endPos: token.endPos,
|
||||
})) ?? null
|
||||
);
|
||||
}
|
||||
return null;
|
||||
} catch (err) {
|
||||
logger.error('Yomitan scanner request failed:', (err as Error).message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchYomitanTermFrequencies(
|
||||
parserWindow: BrowserWindow,
|
||||
termReadingList: YomitanTermReadingPair[],
|
||||
@@ -963,3 +1438,325 @@ export async function syncYomitanDefaultAnkiServer(
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function buildYomitanInvokeScript(actionLiteral: string, paramsLiteral: string): string {
|
||||
return `
|
||||
(async () => {
|
||||
const invoke = (action, params) =>
|
||||
new Promise((resolve, reject) => {
|
||||
chrome.runtime.sendMessage({ action, params }, (response) => {
|
||||
if (chrome.runtime.lastError) {
|
||||
reject(new Error(chrome.runtime.lastError.message));
|
||||
return;
|
||||
}
|
||||
if (!response || typeof response !== "object") {
|
||||
reject(new Error("Invalid response from Yomitan backend"));
|
||||
return;
|
||||
}
|
||||
if (response.error) {
|
||||
reject(new Error(response.error.message || "Yomitan backend error"));
|
||||
return;
|
||||
}
|
||||
resolve(response.result);
|
||||
});
|
||||
});
|
||||
|
||||
return await invoke(${actionLiteral}, ${paramsLiteral});
|
||||
})();
|
||||
`;
|
||||
}
|
||||
|
||||
async function invokeYomitanBackendAction<T>(
|
||||
action: string,
|
||||
params: unknown,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<T | null> {
|
||||
const isReady = await ensureYomitanParserWindow(deps, logger);
|
||||
const parserWindow = deps.getYomitanParserWindow();
|
||||
if (!isReady || !parserWindow || parserWindow.isDestroyed()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const script = buildYomitanInvokeScript(
|
||||
JSON.stringify(action),
|
||||
params === undefined ? 'undefined' : JSON.stringify(params),
|
||||
);
|
||||
|
||||
try {
|
||||
return (await parserWindow.webContents.executeJavaScript(script, true)) as T;
|
||||
} catch (err) {
|
||||
logger.error(`Yomitan backend action failed (${action}):`, (err as Error).message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function createDefaultDictionarySettings(name: string, enabled: boolean): Record<string, unknown> {
|
||||
return {
|
||||
name,
|
||||
alias: name,
|
||||
enabled,
|
||||
allowSecondarySearches: false,
|
||||
definitionsCollapsible: 'not-collapsible',
|
||||
partsOfSpeechFilter: true,
|
||||
useDeinflections: true,
|
||||
styles: '',
|
||||
};
|
||||
}
|
||||
|
||||
function getTargetProfileIndices(
|
||||
optionsFull: Record<string, unknown>,
|
||||
profileScope: 'all' | 'active',
|
||||
): number[] {
|
||||
const profiles = Array.isArray(optionsFull.profiles) ? optionsFull.profiles : [];
|
||||
if (profileScope === 'active') {
|
||||
const profileCurrent =
|
||||
typeof optionsFull.profileCurrent === 'number' && Number.isFinite(optionsFull.profileCurrent)
|
||||
? Math.max(0, Math.floor(optionsFull.profileCurrent))
|
||||
: 0;
|
||||
return profileCurrent < profiles.length ? [profileCurrent] : [];
|
||||
}
|
||||
return profiles.map((_profile, index) => index);
|
||||
}
|
||||
|
||||
export async function getYomitanDictionaryInfo(
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<YomitanDictionaryInfo[]> {
|
||||
const result = await invokeYomitanBackendAction<unknown>(
|
||||
'getDictionaryInfo',
|
||||
undefined,
|
||||
deps,
|
||||
logger,
|
||||
);
|
||||
if (!Array.isArray(result)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return result
|
||||
.filter((entry): entry is Record<string, unknown> => isObject(entry))
|
||||
.map((entry) => {
|
||||
const title = typeof entry.title === 'string' ? entry.title.trim() : '';
|
||||
const revision = entry.revision;
|
||||
return {
|
||||
title,
|
||||
revision:
|
||||
typeof revision === 'string' || typeof revision === 'number' ? revision : undefined,
|
||||
};
|
||||
})
|
||||
.filter((entry) => entry.title.length > 0);
|
||||
}
|
||||
|
||||
export async function getYomitanSettingsFull(
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<Record<string, unknown> | null> {
|
||||
const result = await invokeYomitanBackendAction<unknown>(
|
||||
'optionsGetFull',
|
||||
undefined,
|
||||
deps,
|
||||
logger,
|
||||
);
|
||||
return isObject(result) ? result : null;
|
||||
}
|
||||
|
||||
export async function setYomitanSettingsFull(
|
||||
value: Record<string, unknown>,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
source = 'subminer',
|
||||
): Promise<boolean> {
|
||||
const result = await invokeYomitanBackendAction<unknown>(
|
||||
'setAllSettings',
|
||||
{ value, source },
|
||||
deps,
|
||||
logger,
|
||||
);
|
||||
return result !== null;
|
||||
}
|
||||
|
||||
export async function importYomitanDictionaryFromZip(
|
||||
zipPath: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<boolean> {
|
||||
const normalizedZipPath = zipPath.trim();
|
||||
if (!normalizedZipPath || !fs.existsSync(normalizedZipPath)) {
|
||||
logger.error(`Dictionary ZIP not found: ${zipPath}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
const archiveBase64 = fs.readFileSync(normalizedZipPath).toString('base64');
|
||||
const script = `
|
||||
(async () => {
|
||||
await globalThis.__subminerYomitanSettingsAutomation.importDictionaryArchiveBase64(
|
||||
${JSON.stringify(archiveBase64)},
|
||||
${JSON.stringify(path.basename(normalizedZipPath))}
|
||||
);
|
||||
return true;
|
||||
})();
|
||||
`;
|
||||
const result = await invokeYomitanSettingsAutomation<boolean>(script, deps, logger);
|
||||
return result === true;
|
||||
}
|
||||
|
||||
export async function deleteYomitanDictionaryByTitle(
|
||||
dictionaryTitle: string,
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<boolean> {
|
||||
const normalizedTitle = dictionaryTitle.trim();
|
||||
if (!normalizedTitle) {
|
||||
return false;
|
||||
}
|
||||
const result = await invokeYomitanSettingsAutomation<boolean>(
|
||||
`
|
||||
(async () => {
|
||||
await globalThis.__subminerYomitanSettingsAutomation.deleteDictionary(
|
||||
${JSON.stringify(normalizedTitle)}
|
||||
);
|
||||
return true;
|
||||
})();
|
||||
`,
|
||||
deps,
|
||||
logger,
|
||||
);
|
||||
return result === true;
|
||||
}
|
||||
|
||||
export async function upsertYomitanDictionarySettings(
|
||||
dictionaryTitle: string,
|
||||
profileScope: 'all' | 'active',
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<boolean> {
|
||||
const normalizedTitle = dictionaryTitle.trim();
|
||||
if (!normalizedTitle) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const optionsFull = await getYomitanSettingsFull(deps, logger);
|
||||
if (!optionsFull) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const profiles = Array.isArray(optionsFull.profiles) ? optionsFull.profiles : [];
|
||||
const indices = getTargetProfileIndices(optionsFull, profileScope);
|
||||
let changed = false;
|
||||
|
||||
for (const index of indices) {
|
||||
const profile = profiles[index];
|
||||
if (!isObject(profile)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isObject(profile.options)) {
|
||||
profile.options = {};
|
||||
}
|
||||
const profileOptions = profile.options as Record<string, unknown>;
|
||||
if (!Array.isArray(profileOptions.dictionaries)) {
|
||||
profileOptions.dictionaries = [];
|
||||
}
|
||||
|
||||
const dictionaries = profileOptions.dictionaries as unknown[];
|
||||
const existingIndex = dictionaries.findIndex(
|
||||
(entry) =>
|
||||
isObject(entry) &&
|
||||
typeof (entry as { name?: unknown }).name === 'string' &&
|
||||
(entry as { name: string }).name.trim() === normalizedTitle,
|
||||
);
|
||||
|
||||
if (existingIndex >= 0) {
|
||||
const existing = dictionaries[existingIndex] as Record<string, unknown>;
|
||||
if (existing.enabled !== true) {
|
||||
existing.enabled = true;
|
||||
changed = true;
|
||||
}
|
||||
if (typeof existing.alias !== 'string' || existing.alias.trim().length === 0) {
|
||||
existing.alias = normalizedTitle;
|
||||
changed = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
dictionaries.push(createDefaultDictionarySettings(normalizedTitle, true));
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return await setYomitanSettingsFull(optionsFull, deps, logger);
|
||||
}
|
||||
|
||||
export async function removeYomitanDictionarySettings(
|
||||
dictionaryTitle: string,
|
||||
profileScope: 'all' | 'active',
|
||||
mode: 'delete' | 'disable',
|
||||
deps: YomitanParserRuntimeDeps,
|
||||
logger: LoggerLike,
|
||||
): Promise<boolean> {
|
||||
const normalizedTitle = dictionaryTitle.trim();
|
||||
if (!normalizedTitle) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const optionsFull = await getYomitanSettingsFull(deps, logger);
|
||||
if (!optionsFull) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const profiles = Array.isArray(optionsFull.profiles) ? optionsFull.profiles : [];
|
||||
const indices = getTargetProfileIndices(optionsFull, profileScope);
|
||||
let changed = false;
|
||||
|
||||
for (const index of indices) {
|
||||
const profile = profiles[index];
|
||||
if (!isObject(profile) || !isObject(profile.options)) {
|
||||
continue;
|
||||
}
|
||||
const profileOptions = profile.options as Record<string, unknown>;
|
||||
if (!Array.isArray(profileOptions.dictionaries)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const dictionaries = profileOptions.dictionaries as unknown[];
|
||||
if (mode === 'delete') {
|
||||
const before = dictionaries.length;
|
||||
profileOptions.dictionaries = dictionaries.filter(
|
||||
(entry) =>
|
||||
!(
|
||||
isObject(entry) &&
|
||||
typeof (entry as { name?: unknown }).name === 'string' &&
|
||||
(entry as { name: string }).name.trim() === normalizedTitle
|
||||
),
|
||||
);
|
||||
if ((profileOptions.dictionaries as unknown[]).length !== before) {
|
||||
changed = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const entry of dictionaries) {
|
||||
if (
|
||||
!isObject(entry) ||
|
||||
typeof (entry as { name?: unknown }).name !== 'string' ||
|
||||
(entry as { name: string }).name.trim() !== normalizedTitle
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
const dictionaryEntry = entry as Record<string, unknown>;
|
||||
if (dictionaryEntry.enabled !== false) {
|
||||
dictionaryEntry.enabled = false;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return await setYomitanSettingsFull(optionsFull, deps, logger);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user