mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-06 19:57:26 -08:00
feat: merge AniList character dictionaries by recent usage
This commit is contained in:
@@ -146,20 +146,23 @@ export function buildIntegrationConfigOptionRegistry(
|
||||
path: 'anilist.characterDictionary.refreshTtlHours',
|
||||
kind: 'number',
|
||||
defaultValue: defaultConfig.anilist.characterDictionary.refreshTtlHours,
|
||||
description: 'TTL in hours before refreshing the currently watched media dictionary.',
|
||||
description:
|
||||
'Legacy setting; merged character dictionary retention is now usage-based and this value is ignored.',
|
||||
},
|
||||
{
|
||||
path: 'anilist.characterDictionary.maxLoaded',
|
||||
kind: 'number',
|
||||
defaultValue: defaultConfig.anilist.characterDictionary.maxLoaded,
|
||||
description: 'Maximum number of auto-synced AniList dictionaries kept loaded at once.',
|
||||
description:
|
||||
'Maximum number of most-recently-used anime snapshots included in the merged Yomitan character dictionary.',
|
||||
},
|
||||
{
|
||||
path: 'anilist.characterDictionary.evictionPolicy',
|
||||
kind: 'enum',
|
||||
enumValues: ['disable', 'delete'],
|
||||
defaultValue: defaultConfig.anilist.characterDictionary.evictionPolicy,
|
||||
description: 'Eviction behavior when maxLoaded is exceeded.',
|
||||
description:
|
||||
'Legacy setting; merged character dictionary eviction is usage-based and this value is ignored.',
|
||||
},
|
||||
{
|
||||
path: 'anilist.characterDictionary.profileScope',
|
||||
|
||||
@@ -106,7 +106,7 @@ const INTEGRATION_TEMPLATE_SECTIONS: ConfigTemplateSection[] = [
|
||||
title: 'Anilist',
|
||||
description: [
|
||||
'Anilist API credentials and update behavior.',
|
||||
'Includes optional auto-sync for per-media character dictionaries in bundled Yomitan.',
|
||||
'Includes optional auto-sync for a merged MRU-based character dictionary in bundled Yomitan.',
|
||||
'Character dictionaries are keyed by AniList media ID (no season/franchise merge).',
|
||||
],
|
||||
key: 'anilist',
|
||||
|
||||
@@ -263,6 +263,7 @@ function isKanaChar(char: string): boolean {
|
||||
return (
|
||||
(code >= 0x3041 && code <= 0x3096) ||
|
||||
(code >= 0x309b && code <= 0x309f) ||
|
||||
code === 0x30fc ||
|
||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||
(code >= 0x30fd && code <= 0x30ff)
|
||||
);
|
||||
|
||||
@@ -231,6 +231,7 @@ function isKanaChar(char: string): boolean {
|
||||
return (
|
||||
(code >= 0x3041 && code <= 0x3096) ||
|
||||
(code >= 0x309b && code <= 0x309f) ||
|
||||
code === 0x30fc ||
|
||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||
(code >= 0x30fd && code <= 0x30ff)
|
||||
);
|
||||
|
||||
@@ -127,3 +127,88 @@ test('drops scanning parser tokens which have no dictionary headword', () => {
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
test('prefers the longest dictionary headword across merged segments', () => {
|
||||
const parseResults = [
|
||||
makeParseItem('scanning-parser', [
|
||||
[
|
||||
{ text: 'バニ', reading: 'ばに', headword: 'バニ' },
|
||||
{ text: 'ール', reading: 'ーる', headword: 'バニール' },
|
||||
],
|
||||
]),
|
||||
];
|
||||
|
||||
const tokens = selectYomitanParseTokens(parseResults, () => false, 'headword');
|
||||
assert.deepEqual(
|
||||
tokens?.map((token) => ({
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
headword: token.headword,
|
||||
})),
|
||||
[
|
||||
{
|
||||
surface: 'バニール',
|
||||
reading: 'ばにーる',
|
||||
headword: 'バニール',
|
||||
},
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
test('keeps the first headword when later segments are standalone words', () => {
|
||||
const parseResults = [
|
||||
makeParseItem('scanning-parser', [
|
||||
[
|
||||
{ text: '猫', reading: 'ねこ', headword: '猫' },
|
||||
{ text: 'です', reading: 'です', headword: 'です' },
|
||||
],
|
||||
]),
|
||||
];
|
||||
|
||||
const tokens = selectYomitanParseTokens(parseResults, () => false, 'headword');
|
||||
assert.deepEqual(
|
||||
tokens?.map((token) => ({
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
headword: token.headword,
|
||||
})),
|
||||
[
|
||||
{
|
||||
surface: '猫です',
|
||||
reading: 'ねこです',
|
||||
headword: '猫',
|
||||
},
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
test('merges trailing katakana continuation without headword into previous token', () => {
|
||||
const parseResults = [
|
||||
makeParseItem('scanning-parser', [
|
||||
[{ text: 'カズ', reading: 'かず', headword: 'カズマ' }],
|
||||
[{ text: 'マ', reading: 'ま' }],
|
||||
[{ text: '魔王軍', reading: 'まおうぐん', headword: '魔王軍' }],
|
||||
]),
|
||||
];
|
||||
|
||||
const tokens = selectYomitanParseTokens(parseResults, () => false, 'headword');
|
||||
assert.deepEqual(
|
||||
tokens?.map((token) => ({
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
headword: token.headword,
|
||||
})),
|
||||
[
|
||||
{
|
||||
surface: 'カズマ',
|
||||
reading: 'かずま',
|
||||
headword: 'カズマ',
|
||||
},
|
||||
{
|
||||
surface: '魔王軍',
|
||||
reading: 'まおうぐん',
|
||||
headword: '魔王軍',
|
||||
},
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
@@ -49,6 +49,7 @@ function isKanaChar(char: string): boolean {
|
||||
return (
|
||||
(code >= 0x3041 && code <= 0x3096) ||
|
||||
(code >= 0x309b && code <= 0x309f) ||
|
||||
code === 0x30fc ||
|
||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||
(code >= 0x30fd && code <= 0x30ff)
|
||||
);
|
||||
@@ -111,6 +112,51 @@ function extractYomitanHeadword(segment: YomitanParseSegment): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
function selectMergedHeadword(
|
||||
firstHeadword: string,
|
||||
expandedHeadwords: string[],
|
||||
surface: string,
|
||||
): string {
|
||||
if (expandedHeadwords.length > 0) {
|
||||
const exactSurfaceMatch = expandedHeadwords.find((headword) => headword === surface);
|
||||
if (exactSurfaceMatch) {
|
||||
return exactSurfaceMatch;
|
||||
}
|
||||
|
||||
return expandedHeadwords.reduce((best, current) => {
|
||||
if (current.length !== best.length) {
|
||||
return current.length > best.length ? current : best;
|
||||
}
|
||||
return best;
|
||||
});
|
||||
}
|
||||
|
||||
if (!firstHeadword) {
|
||||
return '';
|
||||
}
|
||||
return firstHeadword;
|
||||
}
|
||||
|
||||
function isKanaOnlyText(text: string): boolean {
|
||||
return text.length > 0 && Array.from(text).every((char) => isKanaChar(char));
|
||||
}
|
||||
|
||||
function shouldMergeKanaContinuation(
|
||||
previousToken: MergedToken | undefined,
|
||||
continuationSurface: string,
|
||||
): previousToken is MergedToken {
|
||||
if (!previousToken || !continuationSurface || !isKanaOnlyText(continuationSurface)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!previousToken.headword || previousToken.headword.length <= previousToken.surface.length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const appendedSurface = previousToken.surface + continuationSurface;
|
||||
return previousToken.headword.startsWith(appendedSurface);
|
||||
}
|
||||
|
||||
export function mapYomitanParseResultItemToMergedTokens(
|
||||
parseResult: YomitanParseResultItem,
|
||||
isKnownWord: (text: string) => boolean,
|
||||
@@ -140,7 +186,8 @@ export function mapYomitanParseResultItemToMergedTokens(
|
||||
|
||||
let combinedSurface = '';
|
||||
let combinedReading = '';
|
||||
let combinedHeadword = '';
|
||||
let firstHeadword = '';
|
||||
const expandedHeadwords: string[] = [];
|
||||
|
||||
for (const segment of line) {
|
||||
const segmentText = segment.text;
|
||||
@@ -152,8 +199,14 @@ export function mapYomitanParseResultItemToMergedTokens(
|
||||
if (typeof segment.reading === 'string') {
|
||||
combinedReading += segment.reading;
|
||||
}
|
||||
if (!combinedHeadword) {
|
||||
combinedHeadword = extractYomitanHeadword(segment);
|
||||
const segmentHeadword = extractYomitanHeadword(segment);
|
||||
if (segmentHeadword) {
|
||||
if (!firstHeadword) {
|
||||
firstHeadword = segmentHeadword;
|
||||
}
|
||||
if (segmentHeadword.length > segmentText.length) {
|
||||
expandedHeadwords.push(segmentHeadword);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,7 +217,20 @@ export function mapYomitanParseResultItemToMergedTokens(
|
||||
const start = charOffset;
|
||||
const end = start + combinedSurface.length;
|
||||
charOffset = end;
|
||||
const combinedHeadword = selectMergedHeadword(
|
||||
firstHeadword,
|
||||
expandedHeadwords,
|
||||
combinedSurface,
|
||||
);
|
||||
if (!combinedHeadword) {
|
||||
const previousToken = tokens[tokens.length - 1];
|
||||
if (shouldMergeKanaContinuation(previousToken, combinedSurface)) {
|
||||
previousToken.surface += combinedSurface;
|
||||
previousToken.reading += combinedReading;
|
||||
previousToken.endPos = end;
|
||||
continue;
|
||||
}
|
||||
|
||||
// No dictionary-backed headword for this merged unit; skip it entirely so
|
||||
// downstream keyboard/frequency/JLPT flows only operate on lookup-backed tokens.
|
||||
continue;
|
||||
|
||||
27
src/main.ts
27
src/main.ts
@@ -361,7 +361,6 @@ import {
|
||||
registerGlobalShortcuts as registerGlobalShortcutsCore,
|
||||
replayCurrentSubtitleRuntime,
|
||||
resolveJellyfinPlaybackPlanRuntime,
|
||||
removeYomitanDictionarySettings,
|
||||
runStartupBootstrapRuntime,
|
||||
saveSubtitlePosition as saveSubtitlePositionCore,
|
||||
clearYomitanParserCachesForWindow,
|
||||
@@ -1236,9 +1235,16 @@ const characterDictionaryRuntime = createCharacterDictionaryRuntimeService({
|
||||
|
||||
const characterDictionaryAutoSyncRuntime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||
userDataPath: USER_DATA_PATH,
|
||||
getConfig: () => getResolvedConfig().anilist.characterDictionary,
|
||||
generateCharacterDictionary: (options) =>
|
||||
characterDictionaryRuntime.generateForCurrentMedia(undefined, options),
|
||||
getConfig: () => {
|
||||
const config = getResolvedConfig().anilist.characterDictionary;
|
||||
return {
|
||||
enabled: config.enabled,
|
||||
maxLoaded: config.maxLoaded,
|
||||
profileScope: config.profileScope,
|
||||
};
|
||||
},
|
||||
getOrCreateCurrentSnapshot: () => characterDictionaryRuntime.getOrCreateCurrentSnapshot(),
|
||||
buildMergedDictionary: (mediaIds) => characterDictionaryRuntime.buildMergedDictionary(mediaIds),
|
||||
getYomitanDictionaryInfo: async () => {
|
||||
await ensureYomitanExtensionLoaded();
|
||||
return await getYomitanDictionaryInfo(getYomitanParserRuntimeDeps(), {
|
||||
@@ -1272,19 +1278,6 @@ const characterDictionaryAutoSyncRuntime = createCharacterDictionaryAutoSyncRunt
|
||||
},
|
||||
);
|
||||
},
|
||||
removeYomitanDictionarySettings: async (dictionaryTitle, profileScope, mode) => {
|
||||
await ensureYomitanExtensionLoaded();
|
||||
return await removeYomitanDictionarySettings(
|
||||
dictionaryTitle,
|
||||
profileScope,
|
||||
mode,
|
||||
getYomitanParserRuntimeDeps(),
|
||||
{
|
||||
error: (message, ...args) => logger.error(message, ...args),
|
||||
info: (message, ...args) => logger.info(message, ...args),
|
||||
},
|
||||
);
|
||||
},
|
||||
now: () => Date.now(),
|
||||
schedule: (fn, delayMs) => setTimeout(fn, delayMs),
|
||||
clearSchedule: (timer) => clearTimeout(timer),
|
||||
|
||||
@@ -178,7 +178,7 @@ test('generateForCurrentMedia emits structured-content glossary so image stays w
|
||||
|
||||
const image = entry.content[0] as Record<string, unknown>;
|
||||
assert.equal(image.tag, 'img');
|
||||
assert.equal(image.path, 'img/c123.png');
|
||||
assert.equal(image.path, 'img/m130298-c123.png');
|
||||
assert.equal(image.sizeUnits, 'em');
|
||||
|
||||
const descriptionLine = entry.content[5];
|
||||
@@ -196,37 +196,10 @@ test('generateForCurrentMedia emits structured-content glossary so image stays w
|
||||
}
|
||||
});
|
||||
|
||||
test('generateForCurrentMedia regenerates dictionary when cached format version is stale', async () => {
|
||||
test('getOrCreateCurrentSnapshot persists and reuses normalized snapshot data', async () => {
|
||||
const userDataPath = makeTempDir();
|
||||
const dictionariesDir = path.join(userDataPath, 'character-dictionaries');
|
||||
fs.mkdirSync(dictionariesDir, { recursive: true });
|
||||
|
||||
const staleZipPath = path.join(dictionariesDir, 'anilist-130298.zip');
|
||||
fs.writeFileSync(staleZipPath, Buffer.from('not-a-real-zip'));
|
||||
fs.writeFileSync(
|
||||
path.join(dictionariesDir, 'cache.json'),
|
||||
JSON.stringify(
|
||||
{
|
||||
anilistById: {
|
||||
'130298': {
|
||||
mediaId: 130298,
|
||||
mediaTitle: 'The Eminence in Shadow',
|
||||
entryCount: 1,
|
||||
zipPath: staleZipPath,
|
||||
updatedAt: 1_700_000_000_000,
|
||||
formatVersion: 6,
|
||||
dictionaryTitle: 'SubMiner Character Dictionary (AniList 130298)',
|
||||
revision: 'stale-revision',
|
||||
},
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'utf8',
|
||||
);
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
let searchQueryCount = 0;
|
||||
let characterQueryCount = 0;
|
||||
|
||||
globalThis.fetch = (async (input: string | URL | Request, init?: RequestInit) => {
|
||||
@@ -237,6 +210,7 @@ test('generateForCurrentMedia regenerates dictionary when cached format version
|
||||
};
|
||||
|
||||
if (body.query?.includes('Page(perPage: 10)')) {
|
||||
searchQueryCount += 1;
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
data: {
|
||||
@@ -314,7 +288,7 @@ test('generateForCurrentMedia regenerates dictionary when cached format version
|
||||
throw new Error(`Unexpected fetch URL: ${url}`);
|
||||
}) as typeof globalThis.fetch;
|
||||
|
||||
try {
|
||||
try {
|
||||
const runtime = createCharacterDictionaryRuntimeService({
|
||||
userDataPath,
|
||||
getCurrentMediaPath: () => '/tmp/eminence-s01e05.mkv',
|
||||
@@ -328,17 +302,227 @@ test('generateForCurrentMedia regenerates dictionary when cached format version
|
||||
now: () => 1_700_000_000_100,
|
||||
});
|
||||
|
||||
const result = await runtime.generateForCurrentMedia(undefined, {
|
||||
refreshTtlMs: 60 * 60 * 1000,
|
||||
});
|
||||
assert.equal(result.fromCache, false);
|
||||
assert.equal(characterQueryCount, 1);
|
||||
const first = await runtime.getOrCreateCurrentSnapshot();
|
||||
const second = await runtime.getOrCreateCurrentSnapshot();
|
||||
|
||||
const termBank = JSON.parse(readStoredZipEntry(result.zipPath, 'term_bank_1.json').toString('utf8')) as Array<
|
||||
assert.equal(first.fromCache, false);
|
||||
assert.equal(second.fromCache, true);
|
||||
assert.equal(searchQueryCount, 2);
|
||||
assert.equal(characterQueryCount, 1);
|
||||
assert.equal(
|
||||
fs.existsSync(path.join(userDataPath, 'character-dictionaries', 'cache.json')),
|
||||
false,
|
||||
);
|
||||
|
||||
const snapshotPath = path.join(
|
||||
userDataPath,
|
||||
'character-dictionaries',
|
||||
'snapshots',
|
||||
'anilist-130298.json',
|
||||
);
|
||||
const snapshot = JSON.parse(fs.readFileSync(snapshotPath, 'utf8')) as {
|
||||
mediaId: number;
|
||||
entryCount: number;
|
||||
termEntries: Array<
|
||||
[string, string, string, string, number, Array<string | Record<string, unknown>>, number, string]
|
||||
>;
|
||||
};
|
||||
assert.equal(snapshot.mediaId, 130298);
|
||||
assert.equal(snapshot.entryCount > 0, true);
|
||||
const alpha = snapshot.termEntries.find(([term]) => term === 'アルファ');
|
||||
assert.ok(alpha);
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
test('buildMergedDictionary combines stored snapshots into one stable dictionary', async () => {
|
||||
const userDataPath = makeTempDir();
|
||||
const originalFetch = globalThis.fetch;
|
||||
const current = { title: 'The Eminence in Shadow', episode: 5 };
|
||||
|
||||
globalThis.fetch = (async (input: string | URL | Request, init?: RequestInit) => {
|
||||
const url = typeof input === 'string' ? input : input instanceof URL ? input.href : input.url;
|
||||
if (url === GRAPHQL_URL) {
|
||||
const body = JSON.parse(String(init?.body ?? '{}')) as {
|
||||
query?: string;
|
||||
variables?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
if (body.query?.includes('Page(perPage: 10)')) {
|
||||
if (body.variables?.search === 'The Eminence in Shadow') {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
data: {
|
||||
Page: {
|
||||
media: [
|
||||
{
|
||||
id: 130298,
|
||||
episodes: 20,
|
||||
title: {
|
||||
romaji: 'Kage no Jitsuryokusha ni Naritakute!',
|
||||
english: 'The Eminence in Shadow',
|
||||
native: '陰の実力者になりたくて!',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'content-type': 'application/json' },
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
data: {
|
||||
Page: {
|
||||
media: [
|
||||
{
|
||||
id: 21,
|
||||
episodes: 28,
|
||||
title: {
|
||||
romaji: 'Sousou no Frieren',
|
||||
english: 'Frieren: Beyond Journey’s End',
|
||||
native: '葬送のフリーレン',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'content-type': 'application/json' },
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
if (body.query?.includes('characters(page: $page')) {
|
||||
const mediaId = Number(body.variables?.id);
|
||||
if (mediaId === 130298) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
data: {
|
||||
Media: {
|
||||
title: {
|
||||
english: 'The Eminence in Shadow',
|
||||
},
|
||||
characters: {
|
||||
pageInfo: { hasNextPage: false },
|
||||
edges: [
|
||||
{
|
||||
role: 'MAIN',
|
||||
node: {
|
||||
id: 111,
|
||||
description: 'Leader of Shadow Garden.',
|
||||
image: {
|
||||
large: 'https://example.com/alpha.png',
|
||||
medium: null,
|
||||
},
|
||||
name: {
|
||||
full: 'Alpha',
|
||||
native: 'アルファ',
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'content-type': 'application/json' },
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
data: {
|
||||
Media: {
|
||||
title: {
|
||||
english: 'Frieren: Beyond Journey’s End',
|
||||
},
|
||||
characters: {
|
||||
pageInfo: { hasNextPage: false },
|
||||
edges: [
|
||||
{
|
||||
role: 'MAIN',
|
||||
node: {
|
||||
id: 222,
|
||||
description: 'Elven mage.',
|
||||
image: {
|
||||
large: 'https://example.com/frieren.png',
|
||||
medium: null,
|
||||
},
|
||||
name: {
|
||||
full: 'Frieren',
|
||||
native: 'フリーレン',
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { 'content-type': 'application/json' },
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (url === 'https://example.com/alpha.png' || url === 'https://example.com/frieren.png') {
|
||||
return new Response(PNG_1X1, {
|
||||
status: 200,
|
||||
headers: { 'content-type': 'image/png' },
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected fetch URL: ${url}`);
|
||||
}) as typeof globalThis.fetch;
|
||||
|
||||
try {
|
||||
const runtime = createCharacterDictionaryRuntimeService({
|
||||
userDataPath,
|
||||
getCurrentMediaPath: () => '/tmp/current.mkv',
|
||||
getCurrentMediaTitle: () => current.title,
|
||||
resolveMediaPathForJimaku: (mediaPath) => mediaPath,
|
||||
guessAnilistMediaInfo: async () => ({
|
||||
title: current.title,
|
||||
episode: current.episode,
|
||||
source: 'fallback',
|
||||
}),
|
||||
now: () => 1_700_000_000_100,
|
||||
});
|
||||
|
||||
await runtime.getOrCreateCurrentSnapshot();
|
||||
current.title = 'Frieren: Beyond Journey’s End';
|
||||
current.episode = 1;
|
||||
await runtime.getOrCreateCurrentSnapshot();
|
||||
|
||||
const merged = await runtime.buildMergedDictionary([21, 130298]);
|
||||
const index = JSON.parse(readStoredZipEntry(merged.zipPath, 'index.json').toString('utf8')) as {
|
||||
title: string;
|
||||
};
|
||||
const termBank = JSON.parse(readStoredZipEntry(merged.zipPath, 'term_bank_1.json').toString('utf8')) as Array<
|
||||
[string, string, string, string, number, Array<string | Record<string, unknown>>, number, string]
|
||||
>;
|
||||
const frieren = termBank.find(([term]) => term === 'フリーレン');
|
||||
const alpha = termBank.find(([term]) => term === 'アルファ');
|
||||
|
||||
assert.equal(index.title, 'SubMiner Character Dictionary');
|
||||
assert.equal(merged.entryCount >= 2, true);
|
||||
assert.ok(frieren);
|
||||
assert.ok(alpha);
|
||||
assert.equal((frieren[5][0] as { type?: string }).type, 'structured-content');
|
||||
assert.equal((alpha[5][0] as { type?: string }).type, 'structured-content');
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import { createHash } from 'node:crypto';
|
||||
import type { AnilistMediaGuess } from '../core/services/anilist/anilist-updater';
|
||||
import { hasVideoExtension } from '../shared/video-extensions';
|
||||
|
||||
@@ -26,22 +27,35 @@ const HONORIFIC_SUFFIXES = [
|
||||
] as const;
|
||||
type CharacterDictionaryRole = 'main' | 'primary' | 'side' | 'appears';
|
||||
|
||||
type CharacterDictionaryCacheEntry = {
|
||||
type CharacterDictionaryGlossaryEntry = string | Record<string, unknown>;
|
||||
type CharacterDictionaryTermEntry = [
|
||||
string,
|
||||
string,
|
||||
string,
|
||||
string,
|
||||
number,
|
||||
CharacterDictionaryGlossaryEntry[],
|
||||
number,
|
||||
string,
|
||||
];
|
||||
|
||||
type CharacterDictionarySnapshotImage = {
|
||||
path: string;
|
||||
dataBase64: string;
|
||||
};
|
||||
|
||||
export type CharacterDictionarySnapshot = {
|
||||
formatVersion: number;
|
||||
mediaId: number;
|
||||
mediaTitle: string;
|
||||
entryCount: number;
|
||||
zipPath: string;
|
||||
updatedAt: number;
|
||||
formatVersion?: number;
|
||||
dictionaryTitle?: string;
|
||||
revision?: string;
|
||||
termEntries: CharacterDictionaryTermEntry[];
|
||||
images: CharacterDictionarySnapshotImage[];
|
||||
};
|
||||
|
||||
type CharacterDictionaryCacheFile = {
|
||||
anilistById: Record<string, CharacterDictionaryCacheEntry>;
|
||||
};
|
||||
|
||||
const CHARACTER_DICTIONARY_FORMAT_VERSION = 8;
|
||||
const CHARACTER_DICTIONARY_FORMAT_VERSION = 9;
|
||||
const CHARACTER_DICTIONARY_MERGED_TITLE = 'SubMiner Character Dictionary';
|
||||
|
||||
type AniListSearchResponse = {
|
||||
Page?: {
|
||||
@@ -117,6 +131,21 @@ export type CharacterDictionaryGenerateOptions = {
|
||||
refreshTtlMs?: number;
|
||||
};
|
||||
|
||||
export type CharacterDictionarySnapshotResult = {
|
||||
mediaId: number;
|
||||
mediaTitle: string;
|
||||
entryCount: number;
|
||||
fromCache: boolean;
|
||||
updatedAt: number;
|
||||
};
|
||||
|
||||
export type MergedCharacterDictionaryBuildResult = {
|
||||
zipPath: string;
|
||||
revision: string;
|
||||
dictionaryTitle: string;
|
||||
entryCount: number;
|
||||
};
|
||||
|
||||
export interface CharacterDictionaryRuntimeDeps {
|
||||
userDataPath: string;
|
||||
getCurrentMediaPath: () => string | null;
|
||||
@@ -383,29 +412,60 @@ function resolveDictionaryGuessInputs(targetPath: string): {
|
||||
throw new Error(`Dictionary target must be a file or directory path: ${targetPath}`);
|
||||
}
|
||||
|
||||
function readCache(cachePath: string): CharacterDictionaryCacheFile {
|
||||
function getSnapshotsDir(outputDir: string): string {
|
||||
return path.join(outputDir, 'snapshots');
|
||||
}
|
||||
|
||||
function getSnapshotPath(outputDir: string, mediaId: number): string {
|
||||
return path.join(getSnapshotsDir(outputDir), `anilist-${mediaId}.json`);
|
||||
}
|
||||
|
||||
function getMergedZipPath(outputDir: string): string {
|
||||
return path.join(outputDir, 'merged.zip');
|
||||
}
|
||||
|
||||
function readSnapshot(snapshotPath: string): CharacterDictionarySnapshot | null {
|
||||
try {
|
||||
const raw = fs.readFileSync(cachePath, 'utf8');
|
||||
const parsed = JSON.parse(raw) as CharacterDictionaryCacheFile;
|
||||
if (!parsed || typeof parsed !== 'object' || !parsed.anilistById) {
|
||||
return { anilistById: {} };
|
||||
const raw = fs.readFileSync(snapshotPath, 'utf8');
|
||||
const parsed = JSON.parse(raw) as Partial<CharacterDictionarySnapshot>;
|
||||
if (!parsed || typeof parsed !== 'object') {
|
||||
return null;
|
||||
}
|
||||
return parsed;
|
||||
if (
|
||||
parsed.formatVersion !== CHARACTER_DICTIONARY_FORMAT_VERSION ||
|
||||
typeof parsed.mediaId !== 'number' ||
|
||||
typeof parsed.mediaTitle !== 'string' ||
|
||||
typeof parsed.entryCount !== 'number' ||
|
||||
typeof parsed.updatedAt !== 'number' ||
|
||||
!Array.isArray(parsed.termEntries) ||
|
||||
!Array.isArray(parsed.images)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
formatVersion: parsed.formatVersion,
|
||||
mediaId: parsed.mediaId,
|
||||
mediaTitle: parsed.mediaTitle,
|
||||
entryCount: parsed.entryCount,
|
||||
updatedAt: parsed.updatedAt,
|
||||
termEntries: parsed.termEntries as CharacterDictionaryTermEntry[],
|
||||
images: parsed.images as CharacterDictionarySnapshotImage[],
|
||||
};
|
||||
} catch {
|
||||
return { anilistById: {} };
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function writeCache(cachePath: string, cache: CharacterDictionaryCacheFile): void {
|
||||
ensureDir(path.dirname(cachePath));
|
||||
fs.writeFileSync(cachePath, JSON.stringify(cache, null, 2), 'utf8');
|
||||
function writeSnapshot(snapshotPath: string, snapshot: CharacterDictionarySnapshot): void {
|
||||
ensureDir(path.dirname(snapshotPath));
|
||||
fs.writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2), 'utf8');
|
||||
}
|
||||
|
||||
function createDefinitionGlossary(
|
||||
character: CharacterRecord,
|
||||
mediaTitle: string,
|
||||
imagePath: string | null,
|
||||
): Array<string | Record<string, unknown>> {
|
||||
): CharacterDictionaryGlossaryEntry[] {
|
||||
const displayName = character.nativeName || character.fullName || `Character ${character.id}`;
|
||||
const lines: string[] = [`${displayName} [${roleLabel(character.role)}]`, `${mediaTitle} · AniList`];
|
||||
|
||||
@@ -449,12 +509,16 @@ function createDefinitionGlossary(
|
||||
];
|
||||
}
|
||||
|
||||
function buildSnapshotImagePath(mediaId: number, charId: number, ext: string): string {
|
||||
return `img/m${mediaId}-c${charId}.${ext}`;
|
||||
}
|
||||
|
||||
function buildTermEntry(
|
||||
term: string,
|
||||
reading: string,
|
||||
role: CharacterDictionaryRole,
|
||||
glossary: Array<string | Record<string, unknown>>,
|
||||
): Array<string | number | Array<string | Record<string, unknown>>> {
|
||||
glossary: CharacterDictionaryGlossaryEntry[],
|
||||
): CharacterDictionaryTermEntry {
|
||||
const { tag, score } = roleInfo(role);
|
||||
return [term, reading, `name ${tag}`, '', score, glossary, 0, ''];
|
||||
}
|
||||
@@ -754,6 +818,7 @@ async function fetchCharactersForMedia(
|
||||
|
||||
async function downloadCharacterImage(imageUrl: string, charId: number): Promise<{
|
||||
filename: string;
|
||||
ext: string;
|
||||
bytes: Buffer;
|
||||
} | null> {
|
||||
try {
|
||||
@@ -764,6 +829,7 @@ async function downloadCharacterImage(imageUrl: string, charId: number): Promise
|
||||
const ext = inferImageExt(response.headers.get('content-type'));
|
||||
return {
|
||||
filename: `c${charId}.${ext}`,
|
||||
ext,
|
||||
bytes,
|
||||
};
|
||||
} catch {
|
||||
@@ -775,14 +841,17 @@ function buildDictionaryTitle(mediaId: number): string {
|
||||
return `SubMiner Character Dictionary (AniList ${mediaId})`;
|
||||
}
|
||||
|
||||
function createIndex(mediaId: number, mediaTitle: string, revision: string): Record<string, unknown> {
|
||||
const dictionaryTitle = buildDictionaryTitle(mediaId);
|
||||
function createIndex(
|
||||
dictionaryTitle: string,
|
||||
description: string,
|
||||
revision: string,
|
||||
): Record<string, unknown> {
|
||||
return {
|
||||
title: dictionaryTitle,
|
||||
revision,
|
||||
format: 3,
|
||||
author: 'SubMiner',
|
||||
description: `Character names from ${mediaTitle} [AniList media ID ${mediaId}]`,
|
||||
description,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -796,21 +865,195 @@ function createTagBank(): Array<[string, string, number, string, number]> {
|
||||
];
|
||||
}
|
||||
|
||||
function buildSnapshotFromCharacters(
|
||||
mediaId: number,
|
||||
mediaTitle: string,
|
||||
characters: CharacterRecord[],
|
||||
imagesByCharacterId: Map<number, CharacterDictionarySnapshotImage>,
|
||||
updatedAt: number,
|
||||
): CharacterDictionarySnapshot {
|
||||
const termEntries: CharacterDictionaryTermEntry[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const character of characters) {
|
||||
const imagePath = imagesByCharacterId.get(character.id)?.path ?? null;
|
||||
const glossary = createDefinitionGlossary(character, mediaTitle, imagePath);
|
||||
const candidateTerms = buildNameTerms(character);
|
||||
for (const term of candidateTerms) {
|
||||
const reading = buildReading(term);
|
||||
const dedupeKey = `${term}|${reading}|${character.role}`;
|
||||
if (seen.has(dedupeKey)) continue;
|
||||
seen.add(dedupeKey);
|
||||
termEntries.push(buildTermEntry(term, reading, character.role, glossary));
|
||||
}
|
||||
}
|
||||
|
||||
if (termEntries.length === 0) {
|
||||
throw new Error('No dictionary entries generated from AniList character data.');
|
||||
}
|
||||
|
||||
return {
|
||||
formatVersion: CHARACTER_DICTIONARY_FORMAT_VERSION,
|
||||
mediaId,
|
||||
mediaTitle,
|
||||
entryCount: termEntries.length,
|
||||
updatedAt,
|
||||
termEntries,
|
||||
images: [...imagesByCharacterId.values()],
|
||||
};
|
||||
}
|
||||
|
||||
function buildDictionaryZip(
|
||||
outputPath: string,
|
||||
dictionaryTitle: string,
|
||||
description: string,
|
||||
revision: string,
|
||||
termEntries: CharacterDictionaryTermEntry[],
|
||||
images: CharacterDictionarySnapshotImage[],
|
||||
): { zipPath: string; entryCount: number } {
|
||||
const zipFiles: Array<{ name: string; data: Buffer }> = [
|
||||
{
|
||||
name: 'index.json',
|
||||
data: Buffer.from(JSON.stringify(createIndex(dictionaryTitle, description, revision), null, 2), 'utf8'),
|
||||
},
|
||||
{
|
||||
name: 'tag_bank_1.json',
|
||||
data: Buffer.from(JSON.stringify(createTagBank()), 'utf8'),
|
||||
},
|
||||
];
|
||||
|
||||
for (const image of images) {
|
||||
zipFiles.push({
|
||||
name: image.path,
|
||||
data: Buffer.from(image.dataBase64, 'base64'),
|
||||
});
|
||||
}
|
||||
|
||||
const entriesPerBank = 10_000;
|
||||
for (let i = 0; i < termEntries.length; i += entriesPerBank) {
|
||||
zipFiles.push({
|
||||
name: `term_bank_${Math.floor(i / entriesPerBank) + 1}.json`,
|
||||
data: Buffer.from(JSON.stringify(termEntries.slice(i, i + entriesPerBank)), 'utf8'),
|
||||
});
|
||||
}
|
||||
|
||||
ensureDir(path.dirname(outputPath));
|
||||
fs.writeFileSync(outputPath, createStoredZip(zipFiles));
|
||||
return { zipPath: outputPath, entryCount: termEntries.length };
|
||||
}
|
||||
|
||||
function buildMergedRevision(mediaIds: number[], snapshots: CharacterDictionarySnapshot[]): string {
|
||||
const hash = createHash('sha1');
|
||||
hash.update(
|
||||
JSON.stringify({
|
||||
mediaIds,
|
||||
snapshots: snapshots.map((snapshot) => ({
|
||||
mediaId: snapshot.mediaId,
|
||||
updatedAt: snapshot.updatedAt,
|
||||
entryCount: snapshot.entryCount,
|
||||
})),
|
||||
}),
|
||||
);
|
||||
return hash.digest('hex').slice(0, 12);
|
||||
}
|
||||
|
||||
export function createCharacterDictionaryRuntimeService(deps: CharacterDictionaryRuntimeDeps): {
|
||||
getOrCreateCurrentSnapshot: (targetPath?: string) => Promise<CharacterDictionarySnapshotResult>;
|
||||
buildMergedDictionary: (mediaIds: number[]) => Promise<MergedCharacterDictionaryBuildResult>;
|
||||
generateForCurrentMedia: (
|
||||
targetPath?: string,
|
||||
options?: CharacterDictionaryGenerateOptions,
|
||||
) => Promise<CharacterDictionaryBuildResult>;
|
||||
} {
|
||||
const outputDir = path.join(deps.userDataPath, 'character-dictionaries');
|
||||
const cachePath = path.join(outputDir, 'cache.json');
|
||||
const sleepMs = deps.sleep ?? sleep;
|
||||
|
||||
const resolveCurrentMedia = async (
|
||||
targetPath?: string,
|
||||
beforeRequest?: () => Promise<void>,
|
||||
): Promise<ResolvedAniListMedia> => {
|
||||
const dictionaryTarget = targetPath?.trim() || '';
|
||||
const guessInput =
|
||||
dictionaryTarget.length > 0
|
||||
? resolveDictionaryGuessInputs(dictionaryTarget)
|
||||
: {
|
||||
mediaPath: deps.getCurrentMediaPath(),
|
||||
mediaTitle: deps.getCurrentMediaTitle(),
|
||||
};
|
||||
const mediaPathForGuess = deps.resolveMediaPathForJimaku(guessInput.mediaPath);
|
||||
const mediaTitle = guessInput.mediaTitle;
|
||||
const guessed = await deps.guessAnilistMediaInfo(mediaPathForGuess, mediaTitle);
|
||||
if (!guessed || !guessed.title.trim()) {
|
||||
throw new Error('Unable to resolve current anime from media path/title.');
|
||||
}
|
||||
return resolveAniListMediaIdFromGuess(guessed, beforeRequest);
|
||||
};
|
||||
|
||||
const getOrCreateSnapshot = async (
|
||||
mediaId: number,
|
||||
mediaTitleHint?: string,
|
||||
beforeRequest?: () => Promise<void>,
|
||||
): Promise<CharacterDictionarySnapshotResult> => {
|
||||
const snapshotPath = getSnapshotPath(outputDir, mediaId);
|
||||
const cachedSnapshot = readSnapshot(snapshotPath);
|
||||
if (cachedSnapshot) {
|
||||
deps.logInfo?.(`[dictionary] snapshot hit for AniList ${mediaId}`);
|
||||
return {
|
||||
mediaId: cachedSnapshot.mediaId,
|
||||
mediaTitle: cachedSnapshot.mediaTitle,
|
||||
entryCount: cachedSnapshot.entryCount,
|
||||
fromCache: true,
|
||||
updatedAt: cachedSnapshot.updatedAt,
|
||||
};
|
||||
}
|
||||
|
||||
const { mediaTitle: fetchedMediaTitle, characters } = await fetchCharactersForMedia(
|
||||
mediaId,
|
||||
beforeRequest,
|
||||
);
|
||||
if (characters.length === 0) {
|
||||
throw new Error(`No characters returned for AniList media ${mediaId}.`);
|
||||
}
|
||||
|
||||
const imagesByCharacterId = new Map<number, CharacterDictionarySnapshotImage>();
|
||||
let hasAttemptedCharacterImageDownload = false;
|
||||
for (const character of characters) {
|
||||
if (!character.imageUrl) continue;
|
||||
if (hasAttemptedCharacterImageDownload) {
|
||||
await sleepMs(CHARACTER_IMAGE_DOWNLOAD_DELAY_MS);
|
||||
}
|
||||
hasAttemptedCharacterImageDownload = true;
|
||||
const image = await downloadCharacterImage(character.imageUrl, character.id);
|
||||
if (!image) continue;
|
||||
imagesByCharacterId.set(character.id, {
|
||||
path: buildSnapshotImagePath(mediaId, character.id, image.ext),
|
||||
dataBase64: image.bytes.toString('base64'),
|
||||
});
|
||||
}
|
||||
|
||||
const snapshot = buildSnapshotFromCharacters(
|
||||
mediaId,
|
||||
fetchedMediaTitle || mediaTitleHint || `AniList ${mediaId}`,
|
||||
characters,
|
||||
imagesByCharacterId,
|
||||
deps.now(),
|
||||
);
|
||||
writeSnapshot(snapshotPath, snapshot);
|
||||
deps.logInfo?.(
|
||||
`[dictionary] stored snapshot for AniList ${mediaId}: ${snapshot.entryCount} terms`,
|
||||
);
|
||||
|
||||
return {
|
||||
mediaId: snapshot.mediaId,
|
||||
mediaTitle: snapshot.mediaTitle,
|
||||
entryCount: snapshot.entryCount,
|
||||
fromCache: false,
|
||||
updatedAt: snapshot.updatedAt,
|
||||
};
|
||||
};
|
||||
|
||||
return {
|
||||
generateForCurrentMedia: async (
|
||||
targetPath?: string,
|
||||
options?: CharacterDictionaryGenerateOptions,
|
||||
) => {
|
||||
getOrCreateCurrentSnapshot: async (targetPath?: string) => {
|
||||
let hasAniListRequest = false;
|
||||
const waitForAniListRequestSlot = async (): Promise<void> => {
|
||||
if (!hasAniListRequest) {
|
||||
@@ -819,149 +1062,83 @@ export function createCharacterDictionaryRuntimeService(deps: CharacterDictionar
|
||||
}
|
||||
await sleepMs(ANILIST_REQUEST_DELAY_MS);
|
||||
};
|
||||
|
||||
const dictionaryTarget = targetPath?.trim() || '';
|
||||
const guessInput =
|
||||
dictionaryTarget.length > 0
|
||||
? resolveDictionaryGuessInputs(dictionaryTarget)
|
||||
: {
|
||||
mediaPath: deps.getCurrentMediaPath(),
|
||||
mediaTitle: deps.getCurrentMediaTitle(),
|
||||
};
|
||||
const mediaPathForGuess = deps.resolveMediaPathForJimaku(guessInput.mediaPath);
|
||||
const mediaTitle = guessInput.mediaTitle;
|
||||
const guessed = await deps.guessAnilistMediaInfo(mediaPathForGuess, mediaTitle);
|
||||
if (!guessed || !guessed.title.trim()) {
|
||||
throw new Error('Unable to resolve current anime from media path/title.');
|
||||
}
|
||||
|
||||
const resolvedMedia = await resolveAniListMediaIdFromGuess(guessed, waitForAniListRequestSlot);
|
||||
const cache = readCache(cachePath);
|
||||
const cached = cache.anilistById[String(resolvedMedia.id)];
|
||||
const refreshTtlMsRaw = options?.refreshTtlMs;
|
||||
const hasRefreshTtl =
|
||||
typeof refreshTtlMsRaw === 'number' && Number.isFinite(refreshTtlMsRaw) && refreshTtlMsRaw > 0;
|
||||
const now = deps.now();
|
||||
const cacheAgeMs =
|
||||
cached && typeof cached.updatedAt === 'number' && Number.isFinite(cached.updatedAt)
|
||||
? Math.max(0, now - cached.updatedAt)
|
||||
: Number.POSITIVE_INFINITY;
|
||||
const isCacheFresh = !hasRefreshTtl || cacheAgeMs <= refreshTtlMsRaw;
|
||||
const isCacheFormatCurrent =
|
||||
cached?.formatVersion === undefined
|
||||
? false
|
||||
: cached.formatVersion >= CHARACTER_DICTIONARY_FORMAT_VERSION;
|
||||
if (cached?.zipPath && fs.existsSync(cached.zipPath) && isCacheFresh && isCacheFormatCurrent) {
|
||||
deps.logInfo?.(
|
||||
`[dictionary] cache hit for AniList ${resolvedMedia.id}: ${path.basename(cached.zipPath)}`,
|
||||
);
|
||||
return {
|
||||
zipPath: cached.zipPath,
|
||||
fromCache: true,
|
||||
mediaId: cached.mediaId,
|
||||
mediaTitle: cached.mediaTitle,
|
||||
entryCount: cached.entryCount,
|
||||
dictionaryTitle: cached.dictionaryTitle ?? buildDictionaryTitle(cached.mediaId),
|
||||
revision: cached.revision,
|
||||
};
|
||||
}
|
||||
|
||||
const { mediaTitle: fetchedMediaTitle, characters } = await fetchCharactersForMedia(
|
||||
resolvedMedia.id,
|
||||
waitForAniListRequestSlot,
|
||||
);
|
||||
if (characters.length === 0) {
|
||||
throw new Error(`No characters returned for AniList media ${resolvedMedia.id}.`);
|
||||
}
|
||||
|
||||
ensureDir(outputDir);
|
||||
const zipFiles: Array<{ name: string; data: Buffer }> = [];
|
||||
const termEntries: Array<Array<string | number | Array<string | Record<string, unknown>>>> =
|
||||
[];
|
||||
const seen = new Set<string>();
|
||||
|
||||
let hasAttemptedCharacterImageDownload = false;
|
||||
for (const character of characters) {
|
||||
let imagePath: string | null = null;
|
||||
if (character.imageUrl) {
|
||||
if (hasAttemptedCharacterImageDownload) {
|
||||
await sleepMs(CHARACTER_IMAGE_DOWNLOAD_DELAY_MS);
|
||||
}
|
||||
hasAttemptedCharacterImageDownload = true;
|
||||
const image = await downloadCharacterImage(character.imageUrl, character.id);
|
||||
if (image) {
|
||||
imagePath = `img/${image.filename}`;
|
||||
zipFiles.push({
|
||||
name: imagePath,
|
||||
data: image.bytes,
|
||||
});
|
||||
}
|
||||
const resolvedMedia = await resolveCurrentMedia(targetPath, waitForAniListRequestSlot);
|
||||
return getOrCreateSnapshot(resolvedMedia.id, resolvedMedia.title, waitForAniListRequestSlot);
|
||||
},
|
||||
buildMergedDictionary: async (mediaIds: number[]) => {
|
||||
const normalizedMediaIds = mediaIds
|
||||
.filter((mediaId) => Number.isFinite(mediaId) && mediaId > 0)
|
||||
.map((mediaId) => Math.floor(mediaId));
|
||||
const snapshots = normalizedMediaIds.map((mediaId) => {
|
||||
const snapshot = readSnapshot(getSnapshotPath(outputDir, mediaId));
|
||||
if (!snapshot) {
|
||||
throw new Error(`Missing character dictionary snapshot for AniList ${mediaId}.`);
|
||||
}
|
||||
const glossary = createDefinitionGlossary(character, fetchedMediaTitle, imagePath);
|
||||
const candidateTerms = buildNameTerms(character);
|
||||
for (const term of candidateTerms) {
|
||||
const reading = buildReading(term);
|
||||
const dedupeKey = `${term}|${reading}|${character.role}`;
|
||||
if (seen.has(dedupeKey)) continue;
|
||||
seen.add(dedupeKey);
|
||||
termEntries.push(buildTermEntry(term, reading, character.role, glossary));
|
||||
}
|
||||
}
|
||||
|
||||
if (termEntries.length === 0) {
|
||||
throw new Error('No dictionary entries generated from AniList character data.');
|
||||
}
|
||||
|
||||
const revision = String(now);
|
||||
const dictionaryTitle = buildDictionaryTitle(resolvedMedia.id);
|
||||
zipFiles.push({
|
||||
name: 'index.json',
|
||||
data: Buffer.from(
|
||||
JSON.stringify(createIndex(resolvedMedia.id, fetchedMediaTitle, revision), null, 2),
|
||||
'utf8',
|
||||
),
|
||||
return snapshot;
|
||||
});
|
||||
zipFiles.push({
|
||||
name: 'tag_bank_1.json',
|
||||
data: Buffer.from(JSON.stringify(createTagBank()), 'utf8'),
|
||||
});
|
||||
|
||||
const entriesPerBank = 10_000;
|
||||
for (let i = 0; i < termEntries.length; i += entriesPerBank) {
|
||||
const chunk = termEntries.slice(i, i + entriesPerBank);
|
||||
zipFiles.push({
|
||||
name: `term_bank_${Math.floor(i / entriesPerBank) + 1}.json`,
|
||||
data: Buffer.from(JSON.stringify(chunk), 'utf8'),
|
||||
});
|
||||
}
|
||||
|
||||
const zipBuffer = createStoredZip(zipFiles);
|
||||
const zipPath = path.join(outputDir, `anilist-${resolvedMedia.id}.zip`);
|
||||
fs.writeFileSync(zipPath, zipBuffer);
|
||||
|
||||
const cacheEntry: CharacterDictionaryCacheEntry = {
|
||||
mediaId: resolvedMedia.id,
|
||||
mediaTitle: fetchedMediaTitle,
|
||||
entryCount: termEntries.length,
|
||||
zipPath,
|
||||
updatedAt: now,
|
||||
formatVersion: CHARACTER_DICTIONARY_FORMAT_VERSION,
|
||||
dictionaryTitle,
|
||||
const revision = buildMergedRevision(normalizedMediaIds, snapshots);
|
||||
const description =
|
||||
snapshots.length === 1
|
||||
? `Character names from ${snapshots[0]!.mediaTitle}`
|
||||
: `Character names from ${snapshots.length} recent anime`;
|
||||
const { zipPath, entryCount } = buildDictionaryZip(
|
||||
getMergedZipPath(outputDir),
|
||||
CHARACTER_DICTIONARY_MERGED_TITLE,
|
||||
description,
|
||||
revision,
|
||||
};
|
||||
cache.anilistById[String(resolvedMedia.id)] = cacheEntry;
|
||||
writeCache(cachePath, cache);
|
||||
|
||||
deps.logInfo?.(
|
||||
`[dictionary] generated AniList ${resolvedMedia.id}: ${termEntries.length} terms -> ${zipPath}`,
|
||||
snapshots.flatMap((snapshot) => snapshot.termEntries),
|
||||
snapshots.flatMap((snapshot) => snapshot.images),
|
||||
);
|
||||
deps.logInfo?.(
|
||||
`[dictionary] rebuilt merged dictionary: ${normalizedMediaIds.join(', ') || '<empty>'} -> ${zipPath}`,
|
||||
);
|
||||
|
||||
return {
|
||||
zipPath,
|
||||
fromCache: false,
|
||||
revision,
|
||||
dictionaryTitle: CHARACTER_DICTIONARY_MERGED_TITLE,
|
||||
entryCount,
|
||||
};
|
||||
},
|
||||
generateForCurrentMedia: async (targetPath?: string, _options?: CharacterDictionaryGenerateOptions) => {
|
||||
let hasAniListRequest = false;
|
||||
const waitForAniListRequestSlot = async (): Promise<void> => {
|
||||
if (!hasAniListRequest) {
|
||||
hasAniListRequest = true;
|
||||
return;
|
||||
}
|
||||
await sleepMs(ANILIST_REQUEST_DELAY_MS);
|
||||
};
|
||||
const resolvedMedia = await resolveCurrentMedia(targetPath, waitForAniListRequestSlot);
|
||||
const snapshot = await getOrCreateSnapshot(
|
||||
resolvedMedia.id,
|
||||
resolvedMedia.title,
|
||||
waitForAniListRequestSlot,
|
||||
);
|
||||
const storedSnapshot = readSnapshot(getSnapshotPath(outputDir, resolvedMedia.id));
|
||||
if (!storedSnapshot) {
|
||||
throw new Error(`Snapshot missing after generation for AniList ${resolvedMedia.id}.`);
|
||||
}
|
||||
const revision = String(storedSnapshot.updatedAt);
|
||||
const dictionaryTitle = buildDictionaryTitle(resolvedMedia.id);
|
||||
const description = `Character names from ${storedSnapshot.mediaTitle} [AniList media ID ${resolvedMedia.id}]`;
|
||||
const zipPath = path.join(outputDir, `anilist-${resolvedMedia.id}.zip`);
|
||||
buildDictionaryZip(
|
||||
zipPath,
|
||||
dictionaryTitle,
|
||||
description,
|
||||
revision,
|
||||
storedSnapshot.termEntries,
|
||||
storedSnapshot.images,
|
||||
);
|
||||
deps.logInfo?.(
|
||||
`[dictionary] generated AniList ${resolvedMedia.id}: ${storedSnapshot.entryCount} terms -> ${zipPath}`,
|
||||
);
|
||||
return {
|
||||
zipPath,
|
||||
fromCache: snapshot.fromCache,
|
||||
mediaId: resolvedMedia.id,
|
||||
mediaTitle: fetchedMediaTitle,
|
||||
entryCount: termEntries.length,
|
||||
mediaTitle: storedSnapshot.mediaTitle,
|
||||
entryCount: storedSnapshot.entryCount,
|
||||
dictionaryTitle,
|
||||
revision,
|
||||
};
|
||||
|
||||
@@ -9,213 +9,249 @@ function makeTempDir(): string {
|
||||
return fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-char-dict-auto-sync-'));
|
||||
}
|
||||
|
||||
test('auto sync imports current dictionary and updates persisted state', async () => {
|
||||
test('auto sync imports merged dictionary and persists MRU state', async () => {
|
||||
const userDataPath = makeTempDir();
|
||||
const imported: string[] = [];
|
||||
const deleted: string[] = [];
|
||||
const upserts: Array<{ title: string; scope: 'all' | 'active' }> = [];
|
||||
const mergedBuilds: number[][] = [];
|
||||
|
||||
let importedRevision: string | null = null;
|
||||
|
||||
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||
userDataPath,
|
||||
getConfig: () => ({
|
||||
enabled: true,
|
||||
refreshTtlHours: 168,
|
||||
maxLoaded: 3,
|
||||
evictionPolicy: 'delete',
|
||||
profileScope: 'all',
|
||||
}),
|
||||
generateCharacterDictionary: async () => ({
|
||||
zipPath: '/tmp/anilist-130298.zip',
|
||||
fromCache: false,
|
||||
getOrCreateCurrentSnapshot: async () => ({
|
||||
mediaId: 130298,
|
||||
mediaTitle: 'The Eminence in Shadow',
|
||||
entryCount: 2544,
|
||||
dictionaryTitle: 'SubMiner Character Dictionary (AniList 130298)',
|
||||
revision: '100',
|
||||
fromCache: false,
|
||||
updatedAt: 1000,
|
||||
}),
|
||||
getYomitanDictionaryInfo: async () => [],
|
||||
buildMergedDictionary: async (mediaIds) => {
|
||||
mergedBuilds.push([...mediaIds]);
|
||||
return {
|
||||
zipPath: '/tmp/subminer-character-dictionary.zip',
|
||||
revision: 'rev-1',
|
||||
dictionaryTitle: 'SubMiner Character Dictionary',
|
||||
entryCount: 2544,
|
||||
};
|
||||
},
|
||||
getYomitanDictionaryInfo: async () =>
|
||||
importedRevision
|
||||
? [{ title: 'SubMiner Character Dictionary', revision: importedRevision }]
|
||||
: [],
|
||||
importYomitanDictionary: async (zipPath) => {
|
||||
imported.push(zipPath);
|
||||
importedRevision = 'rev-1';
|
||||
return true;
|
||||
},
|
||||
deleteYomitanDictionary: async (dictionaryTitle) => {
|
||||
deleted.push(dictionaryTitle);
|
||||
importedRevision = null;
|
||||
return true;
|
||||
},
|
||||
deleteYomitanDictionary: async () => true,
|
||||
upsertYomitanDictionarySettings: async (dictionaryTitle, profileScope) => {
|
||||
upserts.push({ title: dictionaryTitle, scope: profileScope });
|
||||
return true;
|
||||
},
|
||||
removeYomitanDictionarySettings: async () => true,
|
||||
now: () => 1000,
|
||||
});
|
||||
|
||||
await runtime.runSyncNow();
|
||||
|
||||
assert.deepEqual(imported, ['/tmp/anilist-130298.zip']);
|
||||
assert.deepEqual(upserts, [
|
||||
{ title: 'SubMiner Character Dictionary (AniList 130298)', scope: 'all' },
|
||||
]);
|
||||
assert.deepEqual(mergedBuilds, [[130298]]);
|
||||
assert.deepEqual(imported, ['/tmp/subminer-character-dictionary.zip']);
|
||||
assert.deepEqual(deleted, []);
|
||||
assert.deepEqual(upserts, [{ title: 'SubMiner Character Dictionary', scope: 'all' }]);
|
||||
|
||||
const statePath = path.join(userDataPath, 'character-dictionaries', 'auto-sync-state.json');
|
||||
const state = JSON.parse(fs.readFileSync(statePath, 'utf8')) as {
|
||||
activeMediaIds: number[];
|
||||
dictionariesByMediaId: Record<string, { lastImportedRevision: string }>;
|
||||
mergedRevision: string | null;
|
||||
mergedDictionaryTitle: string | null;
|
||||
};
|
||||
assert.deepEqual(state.activeMediaIds, [130298]);
|
||||
assert.equal(state.dictionariesByMediaId['130298']?.lastImportedRevision, '100');
|
||||
assert.equal(state.mergedRevision, 'rev-1');
|
||||
assert.equal(state.mergedDictionaryTitle, 'SubMiner Character Dictionary');
|
||||
});
|
||||
|
||||
test('auto sync rotates dictionaries by LRU and deletes overflow when policy=delete', async () => {
|
||||
test('auto sync skips rebuild/import on unchanged revisit when merged dictionary is current', async () => {
|
||||
const userDataPath = makeTempDir();
|
||||
const generated = [
|
||||
{ mediaId: 1, zipPath: '/tmp/anilist-1.zip', title: 'SubMiner Character Dictionary (AniList 1)' },
|
||||
{ mediaId: 2, zipPath: '/tmp/anilist-2.zip', title: 'SubMiner Character Dictionary (AniList 2)' },
|
||||
{ mediaId: 3, zipPath: '/tmp/anilist-3.zip', title: 'SubMiner Character Dictionary (AniList 3)' },
|
||||
{ mediaId: 4, zipPath: '/tmp/anilist-4.zip', title: 'SubMiner Character Dictionary (AniList 4)' },
|
||||
];
|
||||
let runIndex = 0;
|
||||
const deletes: string[] = [];
|
||||
const removals: Array<{ title: string; mode: 'delete' | 'disable' }> = [];
|
||||
const mergedBuilds: number[][] = [];
|
||||
const imports: string[] = [];
|
||||
let importedRevision: string | null = null;
|
||||
|
||||
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||
userDataPath,
|
||||
getConfig: () => ({
|
||||
enabled: true,
|
||||
refreshTtlHours: 168,
|
||||
maxLoaded: 3,
|
||||
evictionPolicy: 'delete',
|
||||
profileScope: 'all',
|
||||
}),
|
||||
generateCharacterDictionary: async () => {
|
||||
const current = generated[Math.min(runIndex, generated.length - 1)]!;
|
||||
runIndex += 1;
|
||||
return {
|
||||
zipPath: current.zipPath,
|
||||
fromCache: false,
|
||||
mediaId: current.mediaId,
|
||||
mediaTitle: `Title ${current.mediaId}`,
|
||||
entryCount: 10,
|
||||
dictionaryTitle: current.title,
|
||||
revision: String(current.mediaId),
|
||||
};
|
||||
},
|
||||
getYomitanDictionaryInfo: async () => [],
|
||||
importYomitanDictionary: async () => true,
|
||||
deleteYomitanDictionary: async (dictionaryTitle) => {
|
||||
deletes.push(dictionaryTitle);
|
||||
return true;
|
||||
},
|
||||
upsertYomitanDictionarySettings: async () => true,
|
||||
removeYomitanDictionarySettings: async (dictionaryTitle, _scope, mode) => {
|
||||
removals.push({ title: dictionaryTitle, mode });
|
||||
return true;
|
||||
},
|
||||
now: () => Date.now(),
|
||||
});
|
||||
|
||||
await runtime.runSyncNow();
|
||||
await runtime.runSyncNow();
|
||||
await runtime.runSyncNow();
|
||||
await runtime.runSyncNow();
|
||||
|
||||
assert.ok(removals.some((entry) => entry.title.includes('(AniList 1)') && entry.mode === 'delete'));
|
||||
assert.ok(deletes.some((title) => title.includes('(AniList 1)')));
|
||||
|
||||
const statePath = path.join(userDataPath, 'character-dictionaries', 'auto-sync-state.json');
|
||||
const state = JSON.parse(fs.readFileSync(statePath, 'utf8')) as {
|
||||
activeMediaIds: number[];
|
||||
dictionariesByMediaId: Record<string, unknown>;
|
||||
};
|
||||
assert.deepEqual(state.activeMediaIds, [4, 3, 2]);
|
||||
assert.equal(state.dictionariesByMediaId['1'], undefined);
|
||||
});
|
||||
|
||||
test('auto sync disable eviction keeps dictionary in DB and only disables settings', async () => {
|
||||
const userDataPath = makeTempDir();
|
||||
let runIndex = 0;
|
||||
const deletes: string[] = [];
|
||||
const removals: Array<{ title: string; mode: 'delete' | 'disable' }> = [];
|
||||
|
||||
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||
userDataPath,
|
||||
getConfig: () => ({
|
||||
enabled: true,
|
||||
refreshTtlHours: 168,
|
||||
maxLoaded: 1,
|
||||
evictionPolicy: 'disable',
|
||||
profileScope: 'all',
|
||||
}),
|
||||
generateCharacterDictionary: async () => {
|
||||
runIndex += 1;
|
||||
return {
|
||||
zipPath: `/tmp/anilist-${runIndex}.zip`,
|
||||
fromCache: false,
|
||||
mediaId: runIndex,
|
||||
mediaTitle: `Title ${runIndex}`,
|
||||
entryCount: 10,
|
||||
dictionaryTitle: `SubMiner Character Dictionary (AniList ${runIndex})`,
|
||||
revision: String(runIndex),
|
||||
};
|
||||
},
|
||||
getYomitanDictionaryInfo: async () => [],
|
||||
importYomitanDictionary: async () => true,
|
||||
deleteYomitanDictionary: async (dictionaryTitle) => {
|
||||
deletes.push(dictionaryTitle);
|
||||
return true;
|
||||
},
|
||||
upsertYomitanDictionarySettings: async () => true,
|
||||
removeYomitanDictionarySettings: async (dictionaryTitle, _scope, mode) => {
|
||||
removals.push({ title: dictionaryTitle, mode });
|
||||
return true;
|
||||
},
|
||||
now: () => Date.now(),
|
||||
});
|
||||
|
||||
await runtime.runSyncNow();
|
||||
await runtime.runSyncNow();
|
||||
|
||||
assert.ok(removals.some((entry) => entry.mode === 'disable' && entry.title.includes('(AniList 1)')));
|
||||
assert.equal(deletes.some((title) => title.includes('(AniList 1)')), false);
|
||||
|
||||
const statePath = path.join(userDataPath, 'character-dictionaries', 'auto-sync-state.json');
|
||||
const state = JSON.parse(fs.readFileSync(statePath, 'utf8')) as {
|
||||
activeMediaIds: number[];
|
||||
dictionariesByMediaId: Record<string, unknown>;
|
||||
};
|
||||
assert.deepEqual(state.activeMediaIds, [2]);
|
||||
assert.ok(state.dictionariesByMediaId['1']);
|
||||
assert.ok(state.dictionariesByMediaId['2']);
|
||||
});
|
||||
|
||||
test('auto sync fails fast when yomitan import hangs', async () => {
|
||||
const userDataPath = makeTempDir();
|
||||
|
||||
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||
userDataPath,
|
||||
operationTimeoutMs: 5,
|
||||
getConfig: () => ({
|
||||
enabled: true,
|
||||
refreshTtlHours: 168,
|
||||
maxLoaded: 3,
|
||||
evictionPolicy: 'delete',
|
||||
profileScope: 'all',
|
||||
}),
|
||||
generateCharacterDictionary: async () => ({
|
||||
zipPath: '/tmp/anilist-130298.zip',
|
||||
getOrCreateCurrentSnapshot: async () => ({
|
||||
mediaId: 7,
|
||||
mediaTitle: 'Frieren',
|
||||
entryCount: 100,
|
||||
fromCache: true,
|
||||
mediaId: 130298,
|
||||
mediaTitle: 'The Eminence in Shadow',
|
||||
entryCount: 2544,
|
||||
dictionaryTitle: 'SubMiner Character Dictionary (AniList 130298)',
|
||||
revision: '100',
|
||||
updatedAt: 1000,
|
||||
}),
|
||||
getYomitanDictionaryInfo: async () => [],
|
||||
importYomitanDictionary: async () =>
|
||||
new Promise<boolean>(() => {
|
||||
// never resolve
|
||||
}),
|
||||
buildMergedDictionary: async (mediaIds) => {
|
||||
mergedBuilds.push([...mediaIds]);
|
||||
return {
|
||||
zipPath: '/tmp/merged.zip',
|
||||
revision: 'rev-7',
|
||||
dictionaryTitle: 'SubMiner Character Dictionary',
|
||||
entryCount: 100,
|
||||
};
|
||||
},
|
||||
getYomitanDictionaryInfo: async () =>
|
||||
importedRevision
|
||||
? [{ title: 'SubMiner Character Dictionary', revision: importedRevision }]
|
||||
: [],
|
||||
importYomitanDictionary: async (zipPath) => {
|
||||
imports.push(zipPath);
|
||||
importedRevision = 'rev-7';
|
||||
return true;
|
||||
},
|
||||
deleteYomitanDictionary: async () => true,
|
||||
upsertYomitanDictionarySettings: async () => true,
|
||||
removeYomitanDictionarySettings: async () => true,
|
||||
now: () => 1000,
|
||||
});
|
||||
|
||||
await runtime.runSyncNow();
|
||||
await runtime.runSyncNow();
|
||||
|
||||
assert.deepEqual(mergedBuilds, [[7]]);
|
||||
assert.deepEqual(imports, ['/tmp/merged.zip']);
|
||||
});
|
||||
|
||||
test('auto sync rebuilds merged dictionary when MRU order changes', async () => {
|
||||
const userDataPath = makeTempDir();
|
||||
const sequence = [1, 2, 1];
|
||||
const mergedBuilds: number[][] = [];
|
||||
const deleted: string[] = [];
|
||||
let importedRevision: string | null = null;
|
||||
let runIndex = 0;
|
||||
|
||||
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||
userDataPath,
|
||||
getConfig: () => ({
|
||||
enabled: true,
|
||||
maxLoaded: 3,
|
||||
profileScope: 'all',
|
||||
}),
|
||||
getOrCreateCurrentSnapshot: async () => {
|
||||
const mediaId = sequence[Math.min(runIndex, sequence.length - 1)]!;
|
||||
runIndex += 1;
|
||||
return {
|
||||
mediaId,
|
||||
mediaTitle: `Title ${mediaId}`,
|
||||
entryCount: 10,
|
||||
fromCache: true,
|
||||
updatedAt: mediaId,
|
||||
};
|
||||
},
|
||||
buildMergedDictionary: async (mediaIds) => {
|
||||
mergedBuilds.push([...mediaIds]);
|
||||
const revision = `rev-${mediaIds.join('-')}`;
|
||||
return {
|
||||
zipPath: `/tmp/${revision}.zip`,
|
||||
revision,
|
||||
dictionaryTitle: 'SubMiner Character Dictionary',
|
||||
entryCount: mediaIds.length * 10,
|
||||
};
|
||||
},
|
||||
getYomitanDictionaryInfo: async () =>
|
||||
importedRevision
|
||||
? [{ title: 'SubMiner Character Dictionary', revision: importedRevision }]
|
||||
: [],
|
||||
importYomitanDictionary: async (zipPath) => {
|
||||
importedRevision = path.basename(zipPath, '.zip');
|
||||
return true;
|
||||
},
|
||||
deleteYomitanDictionary: async (dictionaryTitle) => {
|
||||
deleted.push(dictionaryTitle);
|
||||
importedRevision = null;
|
||||
return true;
|
||||
},
|
||||
upsertYomitanDictionarySettings: async () => true,
|
||||
now: () => 1000,
|
||||
});
|
||||
|
||||
await runtime.runSyncNow();
|
||||
await runtime.runSyncNow();
|
||||
await runtime.runSyncNow();
|
||||
|
||||
assert.deepEqual(mergedBuilds, [[1], [2, 1], [1, 2]]);
|
||||
assert.ok(deleted.length >= 2);
|
||||
});
|
||||
|
||||
test('auto sync evicts least recently used media from merged set', async () => {
|
||||
const userDataPath = makeTempDir();
|
||||
const sequence = [1, 2, 3, 4];
|
||||
const mergedBuilds: number[][] = [];
|
||||
let runIndex = 0;
|
||||
let importedRevision: string | null = null;
|
||||
|
||||
const runtime = createCharacterDictionaryAutoSyncRuntimeService({
|
||||
userDataPath,
|
||||
getConfig: () => ({
|
||||
enabled: true,
|
||||
maxLoaded: 3,
|
||||
profileScope: 'all',
|
||||
}),
|
||||
getOrCreateCurrentSnapshot: async () => {
|
||||
const mediaId = sequence[Math.min(runIndex, sequence.length - 1)]!;
|
||||
runIndex += 1;
|
||||
return {
|
||||
mediaId,
|
||||
mediaTitle: `Title ${mediaId}`,
|
||||
entryCount: 10,
|
||||
fromCache: true,
|
||||
updatedAt: mediaId,
|
||||
};
|
||||
},
|
||||
buildMergedDictionary: async (mediaIds) => {
|
||||
mergedBuilds.push([...mediaIds]);
|
||||
const revision = `rev-${mediaIds.join('-')}`;
|
||||
return {
|
||||
zipPath: `/tmp/${revision}.zip`,
|
||||
revision,
|
||||
dictionaryTitle: 'SubMiner Character Dictionary',
|
||||
entryCount: mediaIds.length * 10,
|
||||
};
|
||||
},
|
||||
getYomitanDictionaryInfo: async () =>
|
||||
importedRevision
|
||||
? [{ title: 'SubMiner Character Dictionary', revision: importedRevision }]
|
||||
: [],
|
||||
importYomitanDictionary: async (zipPath) => {
|
||||
importedRevision = path.basename(zipPath, '.zip');
|
||||
return true;
|
||||
},
|
||||
deleteYomitanDictionary: async () => {
|
||||
importedRevision = null;
|
||||
return true;
|
||||
},
|
||||
upsertYomitanDictionarySettings: async () => true,
|
||||
now: () => Date.now(),
|
||||
});
|
||||
|
||||
await assert.rejects(async () => runtime.runSyncNow(), /importYomitanDictionary\(anilist-130298\.zip\) timed out after 5ms/);
|
||||
await runtime.runSyncNow();
|
||||
await runtime.runSyncNow();
|
||||
await runtime.runSyncNow();
|
||||
await runtime.runSyncNow();
|
||||
|
||||
assert.deepEqual(mergedBuilds, [[1], [2, 1], [3, 2, 1], [4, 3, 2]]);
|
||||
|
||||
const statePath = path.join(userDataPath, 'character-dictionaries', 'auto-sync-state.json');
|
||||
const state = JSON.parse(fs.readFileSync(statePath, 'utf8')) as {
|
||||
activeMediaIds: number[];
|
||||
};
|
||||
assert.deepEqual(state.activeMediaIds, [4, 3, 2]);
|
||||
});
|
||||
|
||||
@@ -1,24 +1,15 @@
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import type { AnilistCharacterDictionaryProfileScope } from '../../types';
|
||||
import type {
|
||||
AnilistCharacterDictionaryEvictionPolicy,
|
||||
AnilistCharacterDictionaryProfileScope,
|
||||
} from '../../types';
|
||||
import type {
|
||||
CharacterDictionaryBuildResult,
|
||||
CharacterDictionaryGenerateOptions,
|
||||
CharacterDictionarySnapshotResult,
|
||||
MergedCharacterDictionaryBuildResult,
|
||||
} from '../character-dictionary-runtime';
|
||||
|
||||
type AutoSyncStateDictionaryEntry = {
|
||||
mediaId: number;
|
||||
dictionaryTitle: string;
|
||||
lastImportedRevision: string | null;
|
||||
lastUsedAt: number;
|
||||
};
|
||||
|
||||
type AutoSyncState = {
|
||||
activeMediaIds: number[];
|
||||
dictionariesByMediaId: Record<string, AutoSyncStateDictionaryEntry>;
|
||||
mergedRevision: string | null;
|
||||
mergedDictionaryTitle: string | null;
|
||||
};
|
||||
|
||||
type AutoSyncDictionaryInfo = {
|
||||
@@ -28,18 +19,15 @@ type AutoSyncDictionaryInfo = {
|
||||
|
||||
export interface CharacterDictionaryAutoSyncConfig {
|
||||
enabled: boolean;
|
||||
refreshTtlHours: number;
|
||||
maxLoaded: number;
|
||||
evictionPolicy: AnilistCharacterDictionaryEvictionPolicy;
|
||||
profileScope: AnilistCharacterDictionaryProfileScope;
|
||||
}
|
||||
|
||||
export interface CharacterDictionaryAutoSyncRuntimeDeps {
|
||||
userDataPath: string;
|
||||
getConfig: () => CharacterDictionaryAutoSyncConfig;
|
||||
generateCharacterDictionary: (
|
||||
options?: CharacterDictionaryGenerateOptions,
|
||||
) => Promise<CharacterDictionaryBuildResult>;
|
||||
getOrCreateCurrentSnapshot: (targetPath?: string) => Promise<CharacterDictionarySnapshotResult>;
|
||||
buildMergedDictionary: (mediaIds: number[]) => Promise<MergedCharacterDictionaryBuildResult>;
|
||||
getYomitanDictionaryInfo: () => Promise<AutoSyncDictionaryInfo[]>;
|
||||
importYomitanDictionary: (zipPath: string) => Promise<boolean>;
|
||||
deleteYomitanDictionary: (dictionaryTitle: string) => Promise<boolean>;
|
||||
@@ -47,11 +35,6 @@ export interface CharacterDictionaryAutoSyncRuntimeDeps {
|
||||
dictionaryTitle: string,
|
||||
profileScope: AnilistCharacterDictionaryProfileScope,
|
||||
) => Promise<boolean>;
|
||||
removeYomitanDictionarySettings: (
|
||||
dictionaryTitle: string,
|
||||
profileScope: AnilistCharacterDictionaryProfileScope,
|
||||
mode: 'delete' | 'disable',
|
||||
) => Promise<boolean>;
|
||||
now: () => number;
|
||||
schedule?: (fn: () => void, delayMs: number) => ReturnType<typeof setTimeout>;
|
||||
clearSchedule?: (timer: ReturnType<typeof setTimeout>) => void;
|
||||
@@ -70,56 +53,29 @@ function readAutoSyncState(statePath: string): AutoSyncState {
|
||||
try {
|
||||
const raw = fs.readFileSync(statePath, 'utf8');
|
||||
const parsed = JSON.parse(raw) as Partial<AutoSyncState>;
|
||||
if (!parsed || typeof parsed !== 'object') {
|
||||
return { activeMediaIds: [], dictionariesByMediaId: {} };
|
||||
}
|
||||
const dictionariesByMediaId = parsed.dictionariesByMediaId ?? {};
|
||||
if (!dictionariesByMediaId || typeof dictionariesByMediaId !== 'object') {
|
||||
return { activeMediaIds: [], dictionariesByMediaId: {} };
|
||||
}
|
||||
|
||||
const normalizedEntries: Record<string, AutoSyncStateDictionaryEntry> = {};
|
||||
for (const [key, value] of Object.entries(dictionariesByMediaId)) {
|
||||
if (!value || typeof value !== 'object') {
|
||||
continue;
|
||||
}
|
||||
const mediaId = Number.parseInt(key, 10);
|
||||
const dictionaryTitle =
|
||||
typeof (value as { dictionaryTitle?: unknown }).dictionaryTitle === 'string'
|
||||
? (value as { dictionaryTitle: string }).dictionaryTitle.trim()
|
||||
: '';
|
||||
if (!Number.isFinite(mediaId) || mediaId <= 0 || !dictionaryTitle) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const lastImportedRevisionRaw = (value as { lastImportedRevision?: unknown })
|
||||
.lastImportedRevision;
|
||||
const lastUsedAtRaw = (value as { lastUsedAt?: unknown }).lastUsedAt;
|
||||
normalizedEntries[String(mediaId)] = {
|
||||
mediaId,
|
||||
dictionaryTitle,
|
||||
lastImportedRevision:
|
||||
typeof lastImportedRevisionRaw === 'string' && lastImportedRevisionRaw.length > 0
|
||||
? lastImportedRevisionRaw
|
||||
: null,
|
||||
lastUsedAt:
|
||||
typeof lastUsedAtRaw === 'number' && Number.isFinite(lastUsedAtRaw) ? lastUsedAtRaw : 0,
|
||||
};
|
||||
}
|
||||
|
||||
const activeMediaIdsRaw = Array.isArray(parsed.activeMediaIds) ? parsed.activeMediaIds : [];
|
||||
const activeMediaIds = activeMediaIdsRaw
|
||||
.filter((value): value is number => typeof value === 'number' && Number.isFinite(value))
|
||||
.map((value) => Math.max(1, Math.floor(value)))
|
||||
.filter((value, index, all) => all.indexOf(value) === index)
|
||||
.filter((value) => normalizedEntries[String(value)] !== undefined);
|
||||
|
||||
const activeMediaIds = Array.isArray(parsed.activeMediaIds)
|
||||
? parsed.activeMediaIds
|
||||
.filter((value): value is number => typeof value === 'number' && Number.isFinite(value))
|
||||
.map((value) => Math.max(1, Math.floor(value)))
|
||||
.filter((value, index, all) => all.indexOf(value) === index)
|
||||
: [];
|
||||
return {
|
||||
activeMediaIds,
|
||||
dictionariesByMediaId: normalizedEntries,
|
||||
mergedRevision:
|
||||
typeof parsed.mergedRevision === 'string' && parsed.mergedRevision.length > 0
|
||||
? parsed.mergedRevision
|
||||
: null,
|
||||
mergedDictionaryTitle:
|
||||
typeof parsed.mergedDictionaryTitle === 'string' && parsed.mergedDictionaryTitle.length > 0
|
||||
? parsed.mergedDictionaryTitle
|
||||
: null,
|
||||
};
|
||||
} catch {
|
||||
return { activeMediaIds: [], dictionariesByMediaId: {} };
|
||||
return {
|
||||
activeMediaIds: [],
|
||||
mergedRevision: null,
|
||||
mergedDictionaryTitle: null,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,8 +84,12 @@ function writeAutoSyncState(statePath: string, state: AutoSyncState): void {
|
||||
fs.writeFileSync(statePath, JSON.stringify(state, null, 2), 'utf8');
|
||||
}
|
||||
|
||||
function buildDictionaryTitle(mediaId: number): string {
|
||||
return `SubMiner Character Dictionary (AniList ${mediaId})`;
|
||||
function arraysEqual(left: number[], right: number[]): boolean {
|
||||
if (left.length !== right.length) return false;
|
||||
for (let i = 0; i < left.length; i += 1) {
|
||||
if (left[i] !== right[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
export function createCharacterDictionaryAutoSyncRuntimeService(
|
||||
@@ -173,15 +133,30 @@ export function createCharacterDictionaryAutoSyncRuntimeService(
|
||||
return;
|
||||
}
|
||||
|
||||
const refreshTtlMs = Math.max(1, Math.floor(config.refreshTtlHours)) * 60 * 60 * 1000;
|
||||
const generation = await deps.generateCharacterDictionary({ refreshTtlMs });
|
||||
const dictionaryTitle = generation.dictionaryTitle ?? buildDictionaryTitle(generation.mediaId);
|
||||
const revision =
|
||||
typeof generation.revision === 'string' && generation.revision.length > 0
|
||||
? generation.revision
|
||||
: null;
|
||||
|
||||
const snapshot = await deps.getOrCreateCurrentSnapshot();
|
||||
const state = readAutoSyncState(statePath);
|
||||
const nextActiveMediaIds = [
|
||||
snapshot.mediaId,
|
||||
...state.activeMediaIds.filter((mediaId) => mediaId !== snapshot.mediaId),
|
||||
].slice(0, Math.max(1, Math.floor(config.maxLoaded)));
|
||||
|
||||
const retainedChanged = !arraysEqual(nextActiveMediaIds, state.activeMediaIds);
|
||||
let merged: MergedCharacterDictionaryBuildResult | null = null;
|
||||
if (
|
||||
retainedChanged ||
|
||||
!state.mergedRevision ||
|
||||
!state.mergedDictionaryTitle ||
|
||||
!snapshot.fromCache
|
||||
) {
|
||||
merged = await deps.buildMergedDictionary(nextActiveMediaIds);
|
||||
}
|
||||
|
||||
const dictionaryTitle = merged?.dictionaryTitle ?? state.mergedDictionaryTitle;
|
||||
const revision = merged?.revision ?? state.mergedRevision;
|
||||
if (!dictionaryTitle || !revision) {
|
||||
throw new Error('Merged character dictionary state is incomplete.');
|
||||
}
|
||||
|
||||
const dictionaryInfo = await withOperationTimeout(
|
||||
'getYomitanDictionaryInfo',
|
||||
deps.getYomitanDictionaryInfo(),
|
||||
@@ -192,7 +167,7 @@ export function createCharacterDictionaryAutoSyncRuntimeService(
|
||||
? String(existing.revision)
|
||||
: null;
|
||||
const shouldImport =
|
||||
existing === null || (revision !== null && existingRevision !== revision);
|
||||
merged !== null || existing === null || existingRevision === null || existingRevision !== revision;
|
||||
|
||||
if (shouldImport) {
|
||||
if (existing !== null) {
|
||||
@@ -201,15 +176,16 @@ export function createCharacterDictionaryAutoSyncRuntimeService(
|
||||
deps.deleteYomitanDictionary(dictionaryTitle),
|
||||
);
|
||||
}
|
||||
deps.logInfo?.(
|
||||
`[dictionary:auto-sync] importing AniList ${generation.mediaId}: ${generation.zipPath}`,
|
||||
);
|
||||
if (merged === null) {
|
||||
merged = await deps.buildMergedDictionary(nextActiveMediaIds);
|
||||
}
|
||||
deps.logInfo?.(`[dictionary:auto-sync] importing merged dictionary: ${merged.zipPath}`);
|
||||
const imported = await withOperationTimeout(
|
||||
`importYomitanDictionary(${path.basename(generation.zipPath)})`,
|
||||
deps.importYomitanDictionary(generation.zipPath),
|
||||
`importYomitanDictionary(${path.basename(merged.zipPath)})`,
|
||||
deps.importYomitanDictionary(merged.zipPath),
|
||||
);
|
||||
if (!imported) {
|
||||
throw new Error(`Failed to import dictionary ZIP: ${generation.zipPath}`);
|
||||
throw new Error(`Failed to import dictionary ZIP: ${merged.zipPath}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,49 +194,13 @@ export function createCharacterDictionaryAutoSyncRuntimeService(
|
||||
deps.upsertYomitanDictionarySettings(dictionaryTitle, config.profileScope),
|
||||
);
|
||||
|
||||
const mediaIdKey = String(generation.mediaId);
|
||||
state.dictionariesByMediaId[mediaIdKey] = {
|
||||
mediaId: generation.mediaId,
|
||||
dictionaryTitle,
|
||||
lastImportedRevision: revision,
|
||||
lastUsedAt: deps.now(),
|
||||
};
|
||||
state.activeMediaIds = [
|
||||
generation.mediaId,
|
||||
...state.activeMediaIds.filter((value) => value !== generation.mediaId),
|
||||
];
|
||||
|
||||
const maxLoaded = Math.max(1, Math.floor(config.maxLoaded));
|
||||
while (state.activeMediaIds.length > maxLoaded) {
|
||||
const evictedMediaId = state.activeMediaIds.pop();
|
||||
if (evictedMediaId === undefined) {
|
||||
break;
|
||||
}
|
||||
const evicted = state.dictionariesByMediaId[String(evictedMediaId)];
|
||||
if (!evicted) {
|
||||
continue;
|
||||
}
|
||||
|
||||
await withOperationTimeout(
|
||||
`removeYomitanDictionarySettings(${evicted.dictionaryTitle})`,
|
||||
deps.removeYomitanDictionarySettings(
|
||||
evicted.dictionaryTitle,
|
||||
config.profileScope,
|
||||
config.evictionPolicy,
|
||||
),
|
||||
);
|
||||
if (config.evictionPolicy === 'delete') {
|
||||
await withOperationTimeout(
|
||||
`deleteYomitanDictionary(${evicted.dictionaryTitle})`,
|
||||
deps.deleteYomitanDictionary(evicted.dictionaryTitle),
|
||||
);
|
||||
delete state.dictionariesByMediaId[String(evictedMediaId)];
|
||||
}
|
||||
}
|
||||
|
||||
writeAutoSyncState(statePath, state);
|
||||
writeAutoSyncState(statePath, {
|
||||
activeMediaIds: nextActiveMediaIds,
|
||||
mergedRevision: merged?.revision ?? revision,
|
||||
mergedDictionaryTitle: merged?.dictionaryTitle ?? dictionaryTitle,
|
||||
});
|
||||
deps.logInfo?.(
|
||||
`[dictionary:auto-sync] synced AniList ${generation.mediaId}: ${dictionaryTitle} (${generation.entryCount} entries)`,
|
||||
`[dictionary:auto-sync] synced AniList ${snapshot.mediaId}: ${dictionaryTitle} (${snapshot.entryCount} entries)`,
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
@@ -9,12 +9,114 @@ import {
|
||||
alignTokensToSourceText,
|
||||
buildSubtitleTokenHoverRanges,
|
||||
computeWordClass,
|
||||
createSubtitleRenderer,
|
||||
getFrequencyRankLabelForToken,
|
||||
getJlptLevelLabelForToken,
|
||||
normalizeSubtitle,
|
||||
sanitizeSubtitleHoverTokenColor,
|
||||
shouldRenderTokenizedSubtitle,
|
||||
} from './subtitle-render.js';
|
||||
import { createRendererState } from './state.js';
|
||||
|
||||
class FakeTextNode {
|
||||
constructor(public textContent: string) {}
|
||||
}
|
||||
|
||||
class FakeDocumentFragment {
|
||||
childNodes: Array<FakeElement | FakeTextNode> = [];
|
||||
|
||||
appendChild(
|
||||
child: FakeElement | FakeTextNode | FakeDocumentFragment,
|
||||
): FakeElement | FakeTextNode | FakeDocumentFragment {
|
||||
if (child instanceof FakeDocumentFragment) {
|
||||
this.childNodes.push(...child.childNodes);
|
||||
child.childNodes = [];
|
||||
return child;
|
||||
}
|
||||
|
||||
this.childNodes.push(child);
|
||||
return child;
|
||||
}
|
||||
}
|
||||
|
||||
class FakeStyleDeclaration {
|
||||
private values = new Map<string, string>();
|
||||
|
||||
setProperty(name: string, value: string) {
|
||||
this.values.set(name, value);
|
||||
}
|
||||
}
|
||||
|
||||
class FakeElement {
|
||||
childNodes: Array<FakeElement | FakeTextNode> = [];
|
||||
dataset: Record<string, string> = {};
|
||||
style = new FakeStyleDeclaration();
|
||||
className = '';
|
||||
private ownTextContent = '';
|
||||
|
||||
constructor(public tagName: string) {}
|
||||
|
||||
appendChild(
|
||||
child: FakeElement | FakeTextNode | FakeDocumentFragment,
|
||||
): FakeElement | FakeTextNode | FakeDocumentFragment {
|
||||
if (child instanceof FakeDocumentFragment) {
|
||||
this.childNodes.push(...child.childNodes);
|
||||
child.childNodes = [];
|
||||
return child;
|
||||
}
|
||||
|
||||
this.childNodes.push(child);
|
||||
return child;
|
||||
}
|
||||
|
||||
set textContent(value: string) {
|
||||
this.ownTextContent = value;
|
||||
this.childNodes = [];
|
||||
}
|
||||
|
||||
get textContent(): string {
|
||||
if (this.childNodes.length === 0) {
|
||||
return this.ownTextContent;
|
||||
}
|
||||
|
||||
return this.childNodes
|
||||
.map((child) => (child instanceof FakeTextNode ? child.textContent : child.textContent))
|
||||
.join('');
|
||||
}
|
||||
|
||||
set innerHTML(value: string) {
|
||||
if (value === '') {
|
||||
this.childNodes = [];
|
||||
this.ownTextContent = '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function installFakeDocument() {
|
||||
const previousDocument = (globalThis as { document?: unknown }).document;
|
||||
|
||||
Object.defineProperty(globalThis, 'document', {
|
||||
configurable: true,
|
||||
value: {
|
||||
createDocumentFragment: () => new FakeDocumentFragment(),
|
||||
createElement: (tagName: string) => new FakeElement(tagName),
|
||||
createTextNode: (text: string) => new FakeTextNode(text),
|
||||
},
|
||||
});
|
||||
|
||||
return () => {
|
||||
Object.defineProperty(globalThis, 'document', {
|
||||
configurable: true,
|
||||
value: previousDocument,
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
function collectWordNodes(root: FakeElement): FakeElement[] {
|
||||
return root.childNodes.filter(
|
||||
(child): child is FakeElement => child instanceof FakeElement && child.className.includes('word'),
|
||||
);
|
||||
}
|
||||
|
||||
function createToken(overrides: Partial<MergedToken>): MergedToken {
|
||||
return {
|
||||
@@ -288,6 +390,16 @@ test('alignTokensToSourceText treats whitespace-only token surfaces as plain tex
|
||||
);
|
||||
});
|
||||
|
||||
test('alignTokensToSourceText preserves unsupported punctuation between matched tokens', () => {
|
||||
const tokens = [createToken({ surface: 'えっ' }), createToken({ surface: 'マジ' })];
|
||||
|
||||
const segments = alignTokensToSourceText(tokens, 'えっ!?マジ');
|
||||
assert.deepEqual(
|
||||
segments.map((segment) => (segment.kind === 'text' ? `text:${segment.text}` : 'token')),
|
||||
['token', 'text:!?', 'token'],
|
||||
);
|
||||
});
|
||||
|
||||
test('alignTokensToSourceText avoids duplicate tail when later token surface does not match source', () => {
|
||||
const tokens = [
|
||||
createToken({ surface: '君たちが潰した拠点に' }),
|
||||
@@ -327,6 +439,55 @@ test('buildSubtitleTokenHoverRanges ignores unmatched token surfaces', () => {
|
||||
assert.deepEqual(ranges, [{ start: 0, end: 10, tokenIndex: 0 }]);
|
||||
});
|
||||
|
||||
test('buildSubtitleTokenHoverRanges skips unsupported punctuation while preserving later offsets', () => {
|
||||
const tokens = [createToken({ surface: 'えっ' }), createToken({ surface: 'マジ' })];
|
||||
|
||||
const ranges = buildSubtitleTokenHoverRanges(tokens, 'えっ!?マジ');
|
||||
assert.deepEqual(ranges, [
|
||||
{ start: 0, end: 2, tokenIndex: 0 },
|
||||
{ start: 4, end: 6, tokenIndex: 1 },
|
||||
]);
|
||||
});
|
||||
|
||||
test('renderSubtitle preserves unsupported punctuation while keeping it non-interactive', () => {
|
||||
const restoreDocument = installFakeDocument();
|
||||
|
||||
try {
|
||||
const subtitleRoot = new FakeElement('div');
|
||||
const renderer = createSubtitleRenderer({
|
||||
dom: {
|
||||
subtitleRoot,
|
||||
subtitleContainer: new FakeElement('div'),
|
||||
secondarySubRoot: new FakeElement('div'),
|
||||
secondarySubContainer: new FakeElement('div'),
|
||||
},
|
||||
platform: {
|
||||
isMacOSPlatform: false,
|
||||
isModalLayer: false,
|
||||
overlayLayer: 'visible',
|
||||
shouldToggleMouseIgnore: false,
|
||||
},
|
||||
state: createRendererState(),
|
||||
} as never);
|
||||
|
||||
renderer.renderSubtitle({
|
||||
text: 'えっ!?マジ',
|
||||
tokens: [createToken({ surface: 'えっ' }), createToken({ surface: 'マジ' })],
|
||||
});
|
||||
|
||||
assert.equal(subtitleRoot.textContent, 'えっ!?マジ');
|
||||
assert.deepEqual(
|
||||
collectWordNodes(subtitleRoot).map((node) => [node.textContent, node.dataset.tokenIndex]),
|
||||
[
|
||||
['えっ', '0'],
|
||||
['マジ', '1'],
|
||||
],
|
||||
);
|
||||
} finally {
|
||||
restoreDocument();
|
||||
}
|
||||
});
|
||||
|
||||
test('normalizeSubtitle collapses explicit line breaks when collapseLineBreaks is enabled', () => {
|
||||
assert.equal(
|
||||
normalizeSubtitle('常人が使えば\\Nその圧倒的な力に\\n体が耐えきれず死に至るが…', true, true),
|
||||
|
||||
@@ -241,13 +241,17 @@ function renderWithTokens(
|
||||
|
||||
const fragment = document.createDocumentFragment();
|
||||
|
||||
if (preserveLineBreaks && sourceText) {
|
||||
const normalizedSource = normalizeSubtitle(sourceText, true, false);
|
||||
if (sourceText) {
|
||||
const normalizedSource = normalizeSubtitle(sourceText, true, !preserveLineBreaks);
|
||||
const segments = alignTokensToSourceText(tokens, normalizedSource);
|
||||
|
||||
for (const segment of segments) {
|
||||
if (segment.kind === 'text') {
|
||||
renderPlainTextPreserveLineBreaks(fragment, segment.text);
|
||||
if (preserveLineBreaks) {
|
||||
renderPlainTextPreserveLineBreaks(fragment, segment.text);
|
||||
} else {
|
||||
fragment.appendChild(document.createTextNode(segment.text));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user