mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-21 00:11:27 -07:00
2257 lines
62 KiB
TypeScript
2257 lines
62 KiB
TypeScript
import * as fs from 'fs';
|
||
import * as os from 'os';
|
||
import * as path from 'path';
|
||
import { createHash } from 'node:crypto';
|
||
import type { AnilistMediaGuess } from '../core/services/anilist/anilist-updater';
|
||
import type { AnilistCharacterDictionaryCollapsibleSectionKey } from '../types';
|
||
import { hasVideoExtension } from '../shared/video-extensions';
|
||
|
||
const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co';
|
||
const ANILIST_REQUEST_DELAY_MS = 2000;
|
||
const CHARACTER_IMAGE_DOWNLOAD_DELAY_MS = 250;
|
||
const HONORIFIC_SUFFIXES = [
|
||
{ term: 'さん', reading: 'さん' },
|
||
{ term: '様', reading: 'さま' },
|
||
{ term: '先生', reading: 'せんせい' },
|
||
{ term: '先輩', reading: 'せんぱい' },
|
||
{ term: '後輩', reading: 'こうはい' },
|
||
{ term: '氏', reading: 'し' },
|
||
{ term: '君', reading: 'くん' },
|
||
{ term: 'くん', reading: 'くん' },
|
||
{ term: 'ちゃん', reading: 'ちゃん' },
|
||
{ term: 'たん', reading: 'たん' },
|
||
{ term: '坊', reading: 'ぼう' },
|
||
{ term: '殿', reading: 'どの' },
|
||
{ term: '博士', reading: 'はかせ' },
|
||
{ term: '社長', reading: 'しゃちょう' },
|
||
{ term: '部長', reading: 'ぶちょう' },
|
||
] as const;
|
||
type CharacterDictionaryRole = 'main' | 'primary' | 'side' | 'appears';
|
||
|
||
type CharacterDictionaryGlossaryEntry = string | Record<string, unknown>;
|
||
type CharacterDictionaryTermEntry = [
|
||
string,
|
||
string,
|
||
string,
|
||
string,
|
||
number,
|
||
CharacterDictionaryGlossaryEntry[],
|
||
number,
|
||
string,
|
||
];
|
||
|
||
type CharacterDictionarySnapshotImage = {
|
||
path: string;
|
||
dataBase64: string;
|
||
};
|
||
|
||
type CharacterBirthday = [number, number];
|
||
|
||
type JapaneseNameParts = {
|
||
hasSpace: boolean;
|
||
original: string;
|
||
combined: string;
|
||
family: string | null;
|
||
given: string | null;
|
||
};
|
||
|
||
type NameReadings = {
|
||
hasSpace: boolean;
|
||
original: string;
|
||
full: string;
|
||
family: string;
|
||
given: string;
|
||
};
|
||
|
||
export type CharacterDictionarySnapshot = {
|
||
formatVersion: number;
|
||
mediaId: number;
|
||
mediaTitle: string;
|
||
entryCount: number;
|
||
updatedAt: number;
|
||
termEntries: CharacterDictionaryTermEntry[];
|
||
images: CharacterDictionarySnapshotImage[];
|
||
};
|
||
|
||
const CHARACTER_DICTIONARY_FORMAT_VERSION = 15;
|
||
const CHARACTER_DICTIONARY_MERGED_TITLE = 'SubMiner Character Dictionary';
|
||
|
||
type AniListSearchResponse = {
|
||
Page?: {
|
||
media?: Array<{
|
||
id: number;
|
||
episodes?: number | null;
|
||
title?: {
|
||
romaji?: string | null;
|
||
english?: string | null;
|
||
native?: string | null;
|
||
};
|
||
}>;
|
||
};
|
||
};
|
||
|
||
type AniListCharacterPageResponse = {
|
||
Media?: {
|
||
title?: {
|
||
romaji?: string | null;
|
||
english?: string | null;
|
||
native?: string | null;
|
||
};
|
||
characters?: {
|
||
pageInfo?: {
|
||
hasNextPage?: boolean | null;
|
||
};
|
||
edges?: Array<{
|
||
role?: string | null;
|
||
voiceActors?: Array<{
|
||
id: number;
|
||
name?: {
|
||
full?: string | null;
|
||
native?: string | null;
|
||
} | null;
|
||
image?: {
|
||
large?: string | null;
|
||
medium?: string | null;
|
||
} | null;
|
||
}> | null;
|
||
node?: {
|
||
id: number;
|
||
description?: string | null;
|
||
image?: {
|
||
large?: string | null;
|
||
medium?: string | null;
|
||
} | null;
|
||
gender?: string | null;
|
||
age?: string | number | null;
|
||
dateOfBirth?: {
|
||
month?: number | null;
|
||
day?: number | null;
|
||
} | null;
|
||
bloodType?: string | null;
|
||
name?: {
|
||
first?: string | null;
|
||
full?: string | null;
|
||
last?: string | null;
|
||
native?: string | null;
|
||
alternative?: Array<string | null> | null;
|
||
} | null;
|
||
} | null;
|
||
} | null>;
|
||
} | null;
|
||
} | null;
|
||
};
|
||
|
||
type VoiceActorRecord = {
|
||
id: number;
|
||
fullName: string;
|
||
nativeName: string;
|
||
imageUrl: string | null;
|
||
};
|
||
|
||
type CharacterRecord = {
|
||
id: number;
|
||
role: CharacterDictionaryRole;
|
||
firstNameHint: string;
|
||
fullName: string;
|
||
lastNameHint: string;
|
||
nativeName: string;
|
||
alternativeNames: string[];
|
||
bloodType: string;
|
||
birthday: CharacterBirthday | null;
|
||
description: string;
|
||
imageUrl: string | null;
|
||
age: string;
|
||
sex: string;
|
||
voiceActors: VoiceActorRecord[];
|
||
};
|
||
|
||
type ZipEntry = {
|
||
name: string;
|
||
data: Buffer;
|
||
crc32: number;
|
||
localHeaderOffset: number;
|
||
};
|
||
|
||
export type CharacterDictionaryBuildResult = {
|
||
zipPath: string;
|
||
fromCache: boolean;
|
||
mediaId: number;
|
||
mediaTitle: string;
|
||
entryCount: number;
|
||
dictionaryTitle?: string;
|
||
revision?: string;
|
||
};
|
||
|
||
export type CharacterDictionaryGenerateOptions = {
|
||
refreshTtlMs?: number;
|
||
};
|
||
|
||
export type CharacterDictionarySnapshotResult = {
|
||
mediaId: number;
|
||
mediaTitle: string;
|
||
entryCount: number;
|
||
fromCache: boolean;
|
||
updatedAt: number;
|
||
};
|
||
|
||
export type CharacterDictionarySnapshotProgress = {
|
||
mediaId: number;
|
||
mediaTitle: string;
|
||
};
|
||
|
||
export type CharacterDictionarySnapshotProgressCallbacks = {
|
||
onChecking?: (progress: CharacterDictionarySnapshotProgress) => void;
|
||
onGenerating?: (progress: CharacterDictionarySnapshotProgress) => void;
|
||
};
|
||
|
||
export type MergedCharacterDictionaryBuildResult = {
|
||
zipPath: string;
|
||
revision: string;
|
||
dictionaryTitle: string;
|
||
entryCount: number;
|
||
};
|
||
|
||
export interface CharacterDictionaryRuntimeDeps {
|
||
userDataPath: string;
|
||
getCurrentMediaPath: () => string | null;
|
||
getCurrentMediaTitle: () => string | null;
|
||
resolveMediaPathForJimaku: (mediaPath: string | null) => string | null;
|
||
guessAnilistMediaInfo: (
|
||
mediaPath: string | null,
|
||
mediaTitle: string | null,
|
||
) => Promise<AnilistMediaGuess | null>;
|
||
now: () => number;
|
||
sleep?: (ms: number) => Promise<void>;
|
||
logInfo?: (message: string) => void;
|
||
logWarn?: (message: string) => void;
|
||
getCollapsibleSectionOpenState?: (
|
||
section: AnilistCharacterDictionaryCollapsibleSectionKey,
|
||
) => boolean;
|
||
}
|
||
|
||
type ResolvedAniListMedia = {
|
||
id: number;
|
||
title: string;
|
||
};
|
||
|
||
function sleep(ms: number): Promise<void> {
|
||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||
}
|
||
|
||
function normalizeTitle(value: string): string {
|
||
return value.trim().toLowerCase().replace(/\s+/g, ' ');
|
||
}
|
||
|
||
function pickAniListSearchResult(
|
||
title: string,
|
||
episode: number | null,
|
||
media: Array<{
|
||
id: number;
|
||
episodes?: number | null;
|
||
title?: {
|
||
romaji?: string | null;
|
||
english?: string | null;
|
||
native?: string | null;
|
||
};
|
||
}>,
|
||
): ResolvedAniListMedia | null {
|
||
if (media.length === 0) return null;
|
||
|
||
const episodeFiltered =
|
||
typeof episode === 'number' && episode > 0
|
||
? media.filter((entry) => entry.episodes == null || entry.episodes >= episode)
|
||
: media;
|
||
const candidates = episodeFiltered.length > 0 ? episodeFiltered : media;
|
||
const normalizedInput = normalizeTitle(title);
|
||
const exact = candidates.find((entry) => {
|
||
const candidateTitles = [entry.title?.romaji, entry.title?.english, entry.title?.native]
|
||
.filter((value): value is string => typeof value === 'string' && value.trim().length > 0)
|
||
.map((value) => normalizeTitle(value));
|
||
return candidateTitles.includes(normalizedInput);
|
||
});
|
||
const selected = exact ?? candidates[0]!;
|
||
const selectedTitle =
|
||
selected.title?.english?.trim() ||
|
||
selected.title?.romaji?.trim() ||
|
||
selected.title?.native?.trim() ||
|
||
title;
|
||
return {
|
||
id: selected.id,
|
||
title: selectedTitle,
|
||
};
|
||
}
|
||
|
||
function hasKanaOnly(value: string): boolean {
|
||
return /^[\u3040-\u309f\u30a0-\u30ffー]+$/.test(value);
|
||
}
|
||
|
||
function katakanaToHiragana(value: string): string {
|
||
let output = '';
|
||
for (const char of value) {
|
||
const code = char.charCodeAt(0);
|
||
if (code >= 0x30a1 && code <= 0x30f6) {
|
||
output += String.fromCharCode(code - 0x60);
|
||
continue;
|
||
}
|
||
output += char;
|
||
}
|
||
return output;
|
||
}
|
||
|
||
function buildReading(term: string): string {
|
||
const compact = term.replace(/\s+/g, '').trim();
|
||
if (!compact || !hasKanaOnly(compact)) {
|
||
return '';
|
||
}
|
||
return katakanaToHiragana(compact);
|
||
}
|
||
|
||
function containsKanji(value: string): boolean {
|
||
for (const char of value) {
|
||
const code = char.charCodeAt(0);
|
||
if ((code >= 0x4e00 && code <= 0x9fff) || (code >= 0x3400 && code <= 0x4dbf)) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
function isRomanizedName(value: string): boolean {
|
||
return /^[A-Za-zĀĪŪĒŌÂÊÎÔÛāīūēōâêîôû'’.\-\s]+$/.test(value);
|
||
}
|
||
|
||
function normalizeRomanizedName(value: string): string {
|
||
return value
|
||
.normalize('NFKC')
|
||
.toLowerCase()
|
||
.replace(/[’']/g, '')
|
||
.replace(/[.\-]/g, ' ')
|
||
.replace(/ā|â/g, 'aa')
|
||
.replace(/ī|î/g, 'ii')
|
||
.replace(/ū|û/g, 'uu')
|
||
.replace(/ē|ê/g, 'ei')
|
||
.replace(/ō|ô/g, 'ou')
|
||
.replace(/\s+/g, ' ')
|
||
.trim();
|
||
}
|
||
|
||
const ROMANIZED_KANA_DIGRAPHS: ReadonlyArray<[string, string]> = [
|
||
['kya', 'キャ'],
|
||
['kyu', 'キュ'],
|
||
['kyo', 'キョ'],
|
||
['gya', 'ギャ'],
|
||
['gyu', 'ギュ'],
|
||
['gyo', 'ギョ'],
|
||
['sha', 'シャ'],
|
||
['shu', 'シュ'],
|
||
['sho', 'ショ'],
|
||
['sya', 'シャ'],
|
||
['syu', 'シュ'],
|
||
['syo', 'ショ'],
|
||
['ja', 'ジャ'],
|
||
['ju', 'ジュ'],
|
||
['jo', 'ジョ'],
|
||
['jya', 'ジャ'],
|
||
['jyu', 'ジュ'],
|
||
['jyo', 'ジョ'],
|
||
['cha', 'チャ'],
|
||
['chu', 'チュ'],
|
||
['cho', 'チョ'],
|
||
['tya', 'チャ'],
|
||
['tyu', 'チュ'],
|
||
['tyo', 'チョ'],
|
||
['cya', 'チャ'],
|
||
['cyu', 'チュ'],
|
||
['cyo', 'チョ'],
|
||
['nya', 'ニャ'],
|
||
['nyu', 'ニュ'],
|
||
['nyo', 'ニョ'],
|
||
['hya', 'ヒャ'],
|
||
['hyu', 'ヒュ'],
|
||
['hyo', 'ヒョ'],
|
||
['bya', 'ビャ'],
|
||
['byu', 'ビュ'],
|
||
['byo', 'ビョ'],
|
||
['pya', 'ピャ'],
|
||
['pyu', 'ピュ'],
|
||
['pyo', 'ピョ'],
|
||
['mya', 'ミャ'],
|
||
['myu', 'ミュ'],
|
||
['myo', 'ミョ'],
|
||
['rya', 'リャ'],
|
||
['ryu', 'リュ'],
|
||
['ryo', 'リョ'],
|
||
['fa', 'ファ'],
|
||
['fi', 'フィ'],
|
||
['fe', 'フェ'],
|
||
['fo', 'フォ'],
|
||
['fyu', 'フュ'],
|
||
['fyo', 'フョ'],
|
||
['fya', 'フャ'],
|
||
['va', 'ヴァ'],
|
||
['vi', 'ヴィ'],
|
||
['vu', 'ヴ'],
|
||
['ve', 'ヴェ'],
|
||
['vo', 'ヴォ'],
|
||
['she', 'シェ'],
|
||
['che', 'チェ'],
|
||
['je', 'ジェ'],
|
||
['tsi', 'ツィ'],
|
||
['tse', 'ツェ'],
|
||
['tsa', 'ツァ'],
|
||
['tso', 'ツォ'],
|
||
['thi', 'ティ'],
|
||
['thu', 'テュ'],
|
||
['dhi', 'ディ'],
|
||
['dhu', 'デュ'],
|
||
['wi', 'ウィ'],
|
||
['we', 'ウェ'],
|
||
['wo', 'ウォ'],
|
||
];
|
||
|
||
const ROMANIZED_KANA_MONOGRAPHS: ReadonlyArray<[string, string]> = [
|
||
['a', 'ア'],
|
||
['i', 'イ'],
|
||
['u', 'ウ'],
|
||
['e', 'エ'],
|
||
['o', 'オ'],
|
||
['ka', 'カ'],
|
||
['ki', 'キ'],
|
||
['ku', 'ク'],
|
||
['ke', 'ケ'],
|
||
['ko', 'コ'],
|
||
['ga', 'ガ'],
|
||
['gi', 'ギ'],
|
||
['gu', 'グ'],
|
||
['ge', 'ゲ'],
|
||
['go', 'ゴ'],
|
||
['sa', 'サ'],
|
||
['shi', 'シ'],
|
||
['si', 'シ'],
|
||
['su', 'ス'],
|
||
['se', 'セ'],
|
||
['so', 'ソ'],
|
||
['za', 'ザ'],
|
||
['ji', 'ジ'],
|
||
['zi', 'ジ'],
|
||
['zu', 'ズ'],
|
||
['ze', 'ゼ'],
|
||
['zo', 'ゾ'],
|
||
['ta', 'タ'],
|
||
['chi', 'チ'],
|
||
['ti', 'チ'],
|
||
['tsu', 'ツ'],
|
||
['tu', 'ツ'],
|
||
['te', 'テ'],
|
||
['to', 'ト'],
|
||
['da', 'ダ'],
|
||
['de', 'デ'],
|
||
['do', 'ド'],
|
||
['na', 'ナ'],
|
||
['ni', 'ニ'],
|
||
['nu', 'ヌ'],
|
||
['ne', 'ネ'],
|
||
['no', 'ノ'],
|
||
['ha', 'ハ'],
|
||
['hi', 'ヒ'],
|
||
['fu', 'フ'],
|
||
['hu', 'フ'],
|
||
['he', 'ヘ'],
|
||
['ho', 'ホ'],
|
||
['ba', 'バ'],
|
||
['bi', 'ビ'],
|
||
['bu', 'ブ'],
|
||
['be', 'ベ'],
|
||
['bo', 'ボ'],
|
||
['pa', 'パ'],
|
||
['pi', 'ピ'],
|
||
['pu', 'プ'],
|
||
['pe', 'ペ'],
|
||
['po', 'ポ'],
|
||
['ma', 'マ'],
|
||
['mi', 'ミ'],
|
||
['mu', 'ム'],
|
||
['me', 'メ'],
|
||
['mo', 'モ'],
|
||
['ya', 'ヤ'],
|
||
['yu', 'ユ'],
|
||
['yo', 'ヨ'],
|
||
['ra', 'ラ'],
|
||
['ri', 'リ'],
|
||
['ru', 'ル'],
|
||
['re', 'レ'],
|
||
['ro', 'ロ'],
|
||
['wa', 'ワ'],
|
||
['w', 'ウ'],
|
||
['wo', 'ヲ'],
|
||
['n', 'ン'],
|
||
];
|
||
|
||
function romanizedTokenToKatakana(token: string): string | null {
|
||
const normalized = normalizeRomanizedName(token).replace(/\s+/g, '');
|
||
if (!normalized || !/^[a-z]+$/.test(normalized)) {
|
||
return null;
|
||
}
|
||
|
||
let output = '';
|
||
for (let i = 0; i < normalized.length; ) {
|
||
const current = normalized[i]!;
|
||
const next = normalized[i + 1] ?? '';
|
||
|
||
if (
|
||
i + 1 < normalized.length &&
|
||
current === next &&
|
||
current !== 'n' &&
|
||
!'aeiou'.includes(current)
|
||
) {
|
||
output += 'ッ';
|
||
i += 1;
|
||
continue;
|
||
}
|
||
|
||
if (current === 'n' && next.length > 0 && next !== 'y' && !'aeiou'.includes(next)) {
|
||
output += 'ン';
|
||
i += 1;
|
||
continue;
|
||
}
|
||
|
||
const digraph = ROMANIZED_KANA_DIGRAPHS.find(([romaji]) => normalized.startsWith(romaji, i));
|
||
if (digraph) {
|
||
output += digraph[1];
|
||
i += digraph[0].length;
|
||
continue;
|
||
}
|
||
|
||
const monograph = ROMANIZED_KANA_MONOGRAPHS.find(([romaji]) =>
|
||
normalized.startsWith(romaji, i),
|
||
);
|
||
if (monograph) {
|
||
output += monograph[1];
|
||
i += monograph[0].length;
|
||
continue;
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
return output.length > 0 ? output : null;
|
||
}
|
||
|
||
function buildReadingFromRomanized(value: string): string {
|
||
const katakana = romanizedTokenToKatakana(value);
|
||
return katakana ? katakanaToHiragana(katakana) : '';
|
||
}
|
||
|
||
function buildReadingFromHint(value: string): string {
|
||
return buildReading(value) || buildReadingFromRomanized(value);
|
||
}
|
||
|
||
function scoreJapaneseNamePartLength(length: number): number {
|
||
if (length === 2) return 3;
|
||
if (length === 1 || length === 3) return 2;
|
||
if (length === 4) return 1;
|
||
return 0;
|
||
}
|
||
|
||
function inferJapaneseNameSplitIndex(
|
||
nameOriginal: string,
|
||
firstNameHint: string,
|
||
lastNameHint: string,
|
||
): number | null {
|
||
const chars = [...nameOriginal];
|
||
if (chars.length < 2) return null;
|
||
|
||
const familyHintLength = [...buildReadingFromHint(lastNameHint)].length;
|
||
const givenHintLength = [...buildReadingFromHint(firstNameHint)].length;
|
||
const totalHintLength = familyHintLength + givenHintLength;
|
||
const defaultBoundary = Math.round(chars.length / 2);
|
||
let bestIndex: number | null = null;
|
||
let bestScore = Number.NEGATIVE_INFINITY;
|
||
|
||
for (let index = 1; index < chars.length; index += 1) {
|
||
const familyLength = index;
|
||
const givenLength = chars.length - index;
|
||
let score =
|
||
scoreJapaneseNamePartLength(familyLength) + scoreJapaneseNamePartLength(givenLength);
|
||
|
||
if (chars.length >= 4 && familyLength >= 2 && givenLength >= 2) {
|
||
score += 1;
|
||
}
|
||
|
||
if (totalHintLength > 0) {
|
||
const expectedFamilyLength = (chars.length * familyHintLength) / totalHintLength;
|
||
score -= Math.abs(familyLength - expectedFamilyLength) * 1.5;
|
||
} else {
|
||
score -= Math.abs(familyLength - defaultBoundary) * 0.5;
|
||
}
|
||
|
||
if (familyLength === givenLength) {
|
||
score += 0.25;
|
||
}
|
||
|
||
if (score > bestScore) {
|
||
bestScore = score;
|
||
bestIndex = index;
|
||
}
|
||
}
|
||
|
||
return bestIndex;
|
||
}
|
||
|
||
function addRomanizedKanaAliases(values: Iterable<string>): string[] {
|
||
const aliases = new Set<string>();
|
||
for (const value of values) {
|
||
const trimmed = value.trim();
|
||
if (!trimmed || !isRomanizedName(trimmed)) continue;
|
||
const katakana = romanizedTokenToKatakana(trimmed);
|
||
if (katakana) {
|
||
aliases.add(katakana);
|
||
}
|
||
}
|
||
return [...aliases];
|
||
}
|
||
|
||
function splitJapaneseName(
|
||
nameOriginal: string,
|
||
firstNameHint?: string,
|
||
lastNameHint?: string,
|
||
): JapaneseNameParts {
|
||
const trimmed = nameOriginal.trim();
|
||
if (!trimmed) {
|
||
return {
|
||
hasSpace: false,
|
||
original: '',
|
||
combined: '',
|
||
family: null,
|
||
given: null,
|
||
};
|
||
}
|
||
|
||
const normalizedSpace = trimmed.replace(/[\s\u3000]+/g, ' ').trim();
|
||
const spaceParts = normalizedSpace.split(' ').filter((part) => part.length > 0);
|
||
if (spaceParts.length === 2) {
|
||
const family = spaceParts[0]!;
|
||
const given = spaceParts[1]!;
|
||
return {
|
||
hasSpace: true,
|
||
original: normalizedSpace,
|
||
combined: `${family}${given}`,
|
||
family,
|
||
given,
|
||
};
|
||
}
|
||
|
||
const middleDotParts = trimmed
|
||
.split(/[・・·•]/)
|
||
.map((part) => part.trim())
|
||
.filter((part) => part.length > 0);
|
||
if (middleDotParts.length === 2) {
|
||
const family = middleDotParts[0]!;
|
||
const given = middleDotParts[1]!;
|
||
return {
|
||
hasSpace: true,
|
||
original: trimmed,
|
||
combined: `${family}${given}`,
|
||
family,
|
||
given,
|
||
};
|
||
}
|
||
|
||
const hintedFirst = firstNameHint?.trim() || '';
|
||
const hintedLast = lastNameHint?.trim() || '';
|
||
if (hintedFirst && hintedLast) {
|
||
const familyGiven = `${hintedLast}${hintedFirst}`;
|
||
if (trimmed === familyGiven) {
|
||
return {
|
||
hasSpace: true,
|
||
original: trimmed,
|
||
combined: familyGiven,
|
||
family: hintedLast,
|
||
given: hintedFirst,
|
||
};
|
||
}
|
||
|
||
const givenFamily = `${hintedFirst}${hintedLast}`;
|
||
if (trimmed === givenFamily) {
|
||
return {
|
||
hasSpace: true,
|
||
original: trimmed,
|
||
combined: givenFamily,
|
||
family: hintedFirst,
|
||
given: hintedLast,
|
||
};
|
||
}
|
||
}
|
||
|
||
if (hintedFirst && hintedLast && containsKanji(trimmed)) {
|
||
const splitIndex = inferJapaneseNameSplitIndex(trimmed, hintedFirst, hintedLast);
|
||
if (splitIndex != null) {
|
||
const chars = [...trimmed];
|
||
const family = chars.slice(0, splitIndex).join('');
|
||
const given = chars.slice(splitIndex).join('');
|
||
if (family && given) {
|
||
return {
|
||
hasSpace: true,
|
||
original: trimmed,
|
||
combined: trimmed,
|
||
family,
|
||
given,
|
||
};
|
||
}
|
||
}
|
||
}
|
||
|
||
return {
|
||
hasSpace: false,
|
||
original: trimmed,
|
||
combined: trimmed,
|
||
family: null,
|
||
given: null,
|
||
};
|
||
}
|
||
|
||
function generateNameReadings(
|
||
nameOriginal: string,
|
||
romanizedName: string,
|
||
firstNameHint?: string,
|
||
lastNameHint?: string,
|
||
): NameReadings {
|
||
const trimmed = nameOriginal.trim();
|
||
if (!trimmed) {
|
||
return {
|
||
hasSpace: false,
|
||
original: '',
|
||
full: '',
|
||
family: '',
|
||
given: '',
|
||
};
|
||
}
|
||
|
||
const nameParts = splitJapaneseName(trimmed, firstNameHint, lastNameHint);
|
||
if (!nameParts.hasSpace || !nameParts.family || !nameParts.given) {
|
||
const full = containsKanji(trimmed)
|
||
? buildReadingFromRomanized(romanizedName)
|
||
: buildReading(trimmed);
|
||
return {
|
||
hasSpace: false,
|
||
original: trimmed,
|
||
full,
|
||
family: full,
|
||
given: full,
|
||
};
|
||
}
|
||
|
||
const romanizedParts = romanizedName
|
||
.trim()
|
||
.split(/\s+/)
|
||
.filter((part) => part.length > 0);
|
||
const familyFromHints = buildReadingFromHint(lastNameHint || '');
|
||
const givenFromHints = buildReadingFromHint(firstNameHint || '');
|
||
const familyRomajiFallback = romanizedParts[0] || '';
|
||
const givenRomajiFallback = romanizedParts.slice(1).join(' ');
|
||
const family =
|
||
familyFromHints ||
|
||
(containsKanji(nameParts.family)
|
||
? buildReadingFromRomanized(familyRomajiFallback)
|
||
: buildReading(nameParts.family));
|
||
const given =
|
||
givenFromHints ||
|
||
(containsKanji(nameParts.given)
|
||
? buildReadingFromRomanized(givenRomajiFallback)
|
||
: buildReading(nameParts.given));
|
||
const full =
|
||
`${family}${given}` || buildReading(trimmed) || buildReadingFromRomanized(romanizedName);
|
||
|
||
return {
|
||
hasSpace: true,
|
||
original: nameParts.original,
|
||
full,
|
||
family,
|
||
given,
|
||
};
|
||
}
|
||
|
||
function expandRawNameVariants(rawName: string): string[] {
|
||
const trimmed = rawName.trim();
|
||
if (!trimmed) return [];
|
||
|
||
const variants = new Set<string>([trimmed]);
|
||
const outer = trimmed
|
||
.replace(/[((][^()()]+[))]/g, ' ')
|
||
.replace(/\s+/g, ' ')
|
||
.trim();
|
||
if (outer && outer !== trimmed) {
|
||
variants.add(outer);
|
||
}
|
||
|
||
for (const match of trimmed.matchAll(/[((]([^()()]+)[))]/g)) {
|
||
const inner = match[1]?.trim() || '';
|
||
if (inner) {
|
||
variants.add(inner);
|
||
}
|
||
}
|
||
|
||
return [...variants];
|
||
}
|
||
|
||
function buildNameTerms(character: CharacterRecord): string[] {
|
||
const base = new Set<string>();
|
||
const rawNames = [character.nativeName, character.fullName, ...character.alternativeNames];
|
||
for (const rawName of rawNames) {
|
||
for (const name of expandRawNameVariants(rawName)) {
|
||
base.add(name);
|
||
|
||
const compact = name.replace(/[\s\u3000]+/g, '');
|
||
if (compact && compact !== name) {
|
||
base.add(compact);
|
||
}
|
||
|
||
const noMiddleDots = compact.replace(/[・・·•]/g, '');
|
||
if (noMiddleDots && noMiddleDots !== compact) {
|
||
base.add(noMiddleDots);
|
||
}
|
||
|
||
const split = name.split(/[\s\u3000]+/).filter((part) => part.trim().length > 0);
|
||
if (split.length === 2) {
|
||
base.add(split[0]!);
|
||
base.add(split[1]!);
|
||
}
|
||
|
||
const splitByMiddleDot = name
|
||
.split(/[・・·•]/)
|
||
.map((part) => part.trim())
|
||
.filter((part) => part.length > 0);
|
||
if (splitByMiddleDot.length >= 2) {
|
||
for (const part of splitByMiddleDot) {
|
||
base.add(part);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
const nativeParts = splitJapaneseName(
|
||
character.nativeName,
|
||
character.firstNameHint,
|
||
character.lastNameHint,
|
||
);
|
||
if (nativeParts.family) {
|
||
base.add(nativeParts.family);
|
||
}
|
||
if (nativeParts.given) {
|
||
base.add(nativeParts.given);
|
||
}
|
||
|
||
const withHonorifics = new Set<string>();
|
||
for (const entry of base) {
|
||
withHonorifics.add(entry);
|
||
for (const suffix of HONORIFIC_SUFFIXES) {
|
||
withHonorifics.add(`${entry}${suffix.term}`);
|
||
}
|
||
}
|
||
|
||
for (const alias of addRomanizedKanaAliases(withHonorifics)) {
|
||
withHonorifics.add(alias);
|
||
for (const suffix of HONORIFIC_SUFFIXES) {
|
||
withHonorifics.add(`${alias}${suffix.term}`);
|
||
}
|
||
}
|
||
|
||
return [...withHonorifics].filter((entry) => entry.trim().length > 0);
|
||
}
|
||
|
||
const MONTH_NAMES: ReadonlyArray<[number, string]> = [
|
||
[1, 'January'],
|
||
[2, 'February'],
|
||
[3, 'March'],
|
||
[4, 'April'],
|
||
[5, 'May'],
|
||
[6, 'June'],
|
||
[7, 'July'],
|
||
[8, 'August'],
|
||
[9, 'September'],
|
||
[10, 'October'],
|
||
[11, 'November'],
|
||
[12, 'December'],
|
||
];
|
||
|
||
const SEX_DISPLAY: ReadonlyArray<[string, string]> = [
|
||
['m', '♂ Male'],
|
||
['f', '♀ Female'],
|
||
['male', '♂ Male'],
|
||
['female', '♀ Female'],
|
||
];
|
||
|
||
function formatBirthday(birthday: CharacterBirthday | null): string {
|
||
if (!birthday) return '';
|
||
const [month, day] = birthday;
|
||
const monthName = MONTH_NAMES.find(([m]) => m === month)?.[1] || 'Unknown';
|
||
return `${monthName} ${day}`;
|
||
}
|
||
|
||
function formatCharacterStats(character: CharacterRecord): string {
|
||
const parts: string[] = [];
|
||
const normalizedSex = character.sex.trim().toLowerCase();
|
||
const sexDisplay = SEX_DISPLAY.find(([key]) => key === normalizedSex)?.[1];
|
||
if (sexDisplay) parts.push(sexDisplay);
|
||
if (character.age.trim()) parts.push(`${character.age.trim()} years`);
|
||
if (character.bloodType.trim()) parts.push(`Blood Type ${character.bloodType.trim()}`);
|
||
const birthday = formatBirthday(character.birthday);
|
||
if (birthday) parts.push(`Birthday: ${birthday}`);
|
||
return parts.join(' • ');
|
||
}
|
||
|
||
function buildReadingForTerm(
|
||
term: string,
|
||
character: CharacterRecord,
|
||
readings: NameReadings,
|
||
nameParts: JapaneseNameParts,
|
||
): string {
|
||
for (const suffix of HONORIFIC_SUFFIXES) {
|
||
if (term.endsWith(suffix.term) && term.length > suffix.term.length) {
|
||
const baseTerm = term.slice(0, -suffix.term.length);
|
||
const baseReading = buildReadingForTerm(baseTerm, character, readings, nameParts);
|
||
return baseReading ? `${baseReading}${suffix.reading}` : '';
|
||
}
|
||
}
|
||
|
||
const compactNative = character.nativeName.replace(/[\s\u3000]+/g, '');
|
||
const noMiddleDotsNative = compactNative.replace(/[・・·•]/g, '');
|
||
if (
|
||
term === character.nativeName ||
|
||
term === compactNative ||
|
||
term === noMiddleDotsNative ||
|
||
term === nameParts.original ||
|
||
term === nameParts.combined
|
||
) {
|
||
return readings.full;
|
||
}
|
||
|
||
const familyCompact = nameParts.family?.replace(/[・・·•]/g, '') || '';
|
||
if (nameParts.family && (term === nameParts.family || term === familyCompact)) {
|
||
return readings.family;
|
||
}
|
||
|
||
const givenCompact = nameParts.given?.replace(/[・・·•]/g, '') || '';
|
||
if (nameParts.given && (term === nameParts.given || term === givenCompact)) {
|
||
return readings.given;
|
||
}
|
||
|
||
const compact = term.replace(/[\s\u3000]+/g, '');
|
||
if (hasKanaOnly(compact)) {
|
||
return buildReading(compact);
|
||
}
|
||
|
||
if (isRomanizedName(term)) {
|
||
return buildReadingFromRomanized(term) || readings.full;
|
||
}
|
||
|
||
return '';
|
||
}
|
||
|
||
function parseCharacterDescription(raw: string): {
|
||
fields: Array<{ key: string; value: string }>;
|
||
text: string;
|
||
} {
|
||
const cleaned = raw.replace(/<br\s*\/?>/gi, '\n').replace(/<[^>]+>/g, ' ');
|
||
const lines = cleaned.split(/\n/);
|
||
const fields: Array<{ key: string; value: string }> = [];
|
||
const textLines: string[] = [];
|
||
|
||
for (const line of lines) {
|
||
const trimmed = line.trim();
|
||
if (!trimmed) continue;
|
||
const match = trimmed.match(/^__([^_]+):__\s*(.+)$/);
|
||
if (match) {
|
||
const value = match[2]!
|
||
.replace(/__([^_]+)__/g, '$1')
|
||
.replace(/\*\*([^*]+)\*\*/g, '$1')
|
||
.replace(/_([^_]+)_/g, '$1')
|
||
.replace(/\*([^*]+)\*/g, '$1')
|
||
.trim();
|
||
fields.push({ key: match[1]!.trim(), value });
|
||
} else {
|
||
textLines.push(trimmed);
|
||
}
|
||
}
|
||
|
||
const text = textLines
|
||
.join(' ')
|
||
.replace(/\[([^\]]+)\]\((https?:\/\/[^)\s]+)\)/g, '$1')
|
||
.replace(/https?:\/\/\S+/g, '')
|
||
.replace(/__([^_]+)__/g, '$1')
|
||
.replace(/\*\*([^*]+)\*\*/g, '$1')
|
||
.replace(/~!/g, '')
|
||
.replace(/!~/g, '')
|
||
.replace(/\s+/g, ' ')
|
||
.trim();
|
||
|
||
return { fields, text };
|
||
}
|
||
|
||
function roleInfo(role: CharacterDictionaryRole): { tag: string; score: number } {
|
||
if (role === 'main') return { tag: 'main', score: 100 };
|
||
if (role === 'primary') return { tag: 'primary', score: 75 };
|
||
if (role === 'side') return { tag: 'side', score: 50 };
|
||
return { tag: 'appears', score: 25 };
|
||
}
|
||
|
||
function mapRole(input: string | null | undefined): CharacterDictionaryRole {
|
||
const value = (input || '').trim().toUpperCase();
|
||
if (value === 'MAIN') return 'main';
|
||
if (value === 'SUPPORTING') return 'primary';
|
||
if (value === 'BACKGROUND') return 'side';
|
||
return 'side';
|
||
}
|
||
|
||
function roleLabel(role: CharacterDictionaryRole): string {
|
||
if (role === 'main') return 'Protagonist';
|
||
if (role === 'primary') return 'Main Character';
|
||
if (role === 'side') return 'Side Character';
|
||
return 'Minor Role';
|
||
}
|
||
|
||
function inferImageExt(contentType: string | null): string {
|
||
const normalized = (contentType || '').toLowerCase();
|
||
if (normalized.includes('png')) return 'png';
|
||
if (normalized.includes('gif')) return 'gif';
|
||
if (normalized.includes('webp')) return 'webp';
|
||
return 'jpg';
|
||
}
|
||
|
||
function ensureDir(dirPath: string): void {
|
||
if (fs.existsSync(dirPath)) return;
|
||
fs.mkdirSync(dirPath, { recursive: true });
|
||
}
|
||
|
||
function expandUserPath(input: string): string {
|
||
if (input.startsWith('~')) {
|
||
return path.join(os.homedir(), input.slice(1));
|
||
}
|
||
return input;
|
||
}
|
||
|
||
function isVideoFile(filePath: string): boolean {
|
||
return hasVideoExtension(path.extname(filePath));
|
||
}
|
||
|
||
function findFirstVideoFileInDirectory(directoryPath: string): string | null {
|
||
const queue: string[] = [directoryPath];
|
||
while (queue.length > 0) {
|
||
const current = queue.shift()!;
|
||
let entries: fs.Dirent[] = [];
|
||
try {
|
||
entries = fs.readdirSync(current, { withFileTypes: true });
|
||
} catch {
|
||
continue;
|
||
}
|
||
entries.sort((a, b) => a.name.localeCompare(b.name));
|
||
for (const entry of entries) {
|
||
const fullPath = path.join(current, entry.name);
|
||
if (entry.isFile() && isVideoFile(fullPath)) {
|
||
return fullPath;
|
||
}
|
||
if (entry.isDirectory() && !entry.name.startsWith('.')) {
|
||
queue.push(fullPath);
|
||
}
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function resolveDictionaryGuessInputs(targetPath: string): {
|
||
mediaPath: string;
|
||
mediaTitle: string | null;
|
||
} {
|
||
const trimmed = targetPath.trim();
|
||
if (!trimmed) {
|
||
throw new Error('Dictionary target path is empty.');
|
||
}
|
||
const resolvedPath = path.resolve(expandUserPath(trimmed));
|
||
let stats: fs.Stats;
|
||
try {
|
||
stats = fs.statSync(resolvedPath);
|
||
} catch {
|
||
throw new Error(`Dictionary target path not found: ${targetPath}`);
|
||
}
|
||
|
||
if (stats.isFile()) {
|
||
return {
|
||
mediaPath: resolvedPath,
|
||
mediaTitle: path.basename(resolvedPath),
|
||
};
|
||
}
|
||
|
||
if (stats.isDirectory()) {
|
||
const firstVideo = findFirstVideoFileInDirectory(resolvedPath);
|
||
if (firstVideo) {
|
||
return {
|
||
mediaPath: firstVideo,
|
||
mediaTitle: path.basename(firstVideo),
|
||
};
|
||
}
|
||
return {
|
||
mediaPath: resolvedPath,
|
||
mediaTitle: path.basename(resolvedPath),
|
||
};
|
||
}
|
||
|
||
throw new Error(`Dictionary target must be a file or directory path: ${targetPath}`);
|
||
}
|
||
|
||
function getSnapshotsDir(outputDir: string): string {
|
||
return path.join(outputDir, 'snapshots');
|
||
}
|
||
|
||
function getSnapshotPath(outputDir: string, mediaId: number): string {
|
||
return path.join(getSnapshotsDir(outputDir), `anilist-${mediaId}.json`);
|
||
}
|
||
|
||
function getMergedZipPath(outputDir: string): string {
|
||
return path.join(outputDir, 'merged.zip');
|
||
}
|
||
|
||
function readSnapshot(snapshotPath: string): CharacterDictionarySnapshot | null {
|
||
try {
|
||
const raw = fs.readFileSync(snapshotPath, 'utf8');
|
||
const parsed = JSON.parse(raw) as Partial<CharacterDictionarySnapshot>;
|
||
if (!parsed || typeof parsed !== 'object') {
|
||
return null;
|
||
}
|
||
if (
|
||
parsed.formatVersion !== CHARACTER_DICTIONARY_FORMAT_VERSION ||
|
||
typeof parsed.mediaId !== 'number' ||
|
||
typeof parsed.mediaTitle !== 'string' ||
|
||
typeof parsed.entryCount !== 'number' ||
|
||
typeof parsed.updatedAt !== 'number' ||
|
||
!Array.isArray(parsed.termEntries) ||
|
||
!Array.isArray(parsed.images)
|
||
) {
|
||
return null;
|
||
}
|
||
return {
|
||
formatVersion: parsed.formatVersion,
|
||
mediaId: parsed.mediaId,
|
||
mediaTitle: parsed.mediaTitle,
|
||
entryCount: parsed.entryCount,
|
||
updatedAt: parsed.updatedAt,
|
||
termEntries: parsed.termEntries as CharacterDictionaryTermEntry[],
|
||
images: parsed.images as CharacterDictionarySnapshotImage[],
|
||
};
|
||
} catch {
|
||
return null;
|
||
}
|
||
}
|
||
|
||
function writeSnapshot(snapshotPath: string, snapshot: CharacterDictionarySnapshot): void {
|
||
ensureDir(path.dirname(snapshotPath));
|
||
fs.writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2), 'utf8');
|
||
}
|
||
|
||
function roleBadgeStyle(role: CharacterDictionaryRole): Record<string, string> {
|
||
const base = {
|
||
borderRadius: '4px',
|
||
padding: '0.15em 0.5em',
|
||
fontSize: '0.8em',
|
||
fontWeight: 'bold',
|
||
color: '#fff',
|
||
};
|
||
if (role === 'main') return { ...base, backgroundColor: '#4CAF50' };
|
||
if (role === 'primary') return { ...base, backgroundColor: '#2196F3' };
|
||
if (role === 'side') return { ...base, backgroundColor: '#FF9800' };
|
||
return { ...base, backgroundColor: '#9E9E9E' };
|
||
}
|
||
|
||
function buildCollapsibleSection(
|
||
title: string,
|
||
open: boolean,
|
||
body: Array<string | Record<string, unknown>> | string | Record<string, unknown>,
|
||
): Record<string, unknown> {
|
||
return {
|
||
tag: 'details',
|
||
open,
|
||
style: { marginTop: '0.4em' },
|
||
content: [
|
||
{
|
||
tag: 'summary',
|
||
style: { fontWeight: 'bold', fontSize: '0.95em', cursor: 'pointer' },
|
||
content: title,
|
||
},
|
||
{
|
||
tag: 'div',
|
||
style: { padding: '0.25em 0 0 0.4em', fontSize: '0.9em' },
|
||
content: body,
|
||
},
|
||
],
|
||
};
|
||
}
|
||
|
||
function buildVoicedByContent(
|
||
voiceActors: VoiceActorRecord[],
|
||
vaImagePaths: Map<number, string>,
|
||
): Record<string, unknown> {
|
||
if (voiceActors.length === 1) {
|
||
const va = voiceActors[0]!;
|
||
const vaImgPath = vaImagePaths.get(va.id);
|
||
const vaLabel = va.nativeName
|
||
? va.fullName
|
||
? `${va.nativeName} (${va.fullName})`
|
||
: va.nativeName
|
||
: va.fullName;
|
||
|
||
if (vaImgPath) {
|
||
return {
|
||
tag: 'table',
|
||
content: {
|
||
tag: 'tr',
|
||
content: [
|
||
{
|
||
tag: 'td',
|
||
style: {
|
||
verticalAlign: 'top',
|
||
padding: '0',
|
||
paddingRight: '0.4em',
|
||
borderWidth: '0',
|
||
},
|
||
content: {
|
||
tag: 'img',
|
||
path: vaImgPath,
|
||
width: 3,
|
||
height: 3,
|
||
sizeUnits: 'em',
|
||
title: vaLabel,
|
||
alt: vaLabel,
|
||
collapsed: false,
|
||
collapsible: false,
|
||
background: true,
|
||
},
|
||
},
|
||
{
|
||
tag: 'td',
|
||
style: { verticalAlign: 'middle', padding: '0', borderWidth: '0' },
|
||
content: vaLabel,
|
||
},
|
||
],
|
||
},
|
||
};
|
||
}
|
||
|
||
return { tag: 'div', content: vaLabel };
|
||
}
|
||
|
||
const items: Array<Record<string, unknown>> = [];
|
||
for (const va of voiceActors) {
|
||
const vaLabel = va.nativeName
|
||
? va.fullName
|
||
? `${va.nativeName} (${va.fullName})`
|
||
: va.nativeName
|
||
: va.fullName;
|
||
items.push({ tag: 'li', content: vaLabel });
|
||
}
|
||
return { tag: 'ul', style: { marginTop: '0.15em' }, content: items };
|
||
}
|
||
|
||
function createDefinitionGlossary(
|
||
character: CharacterRecord,
|
||
mediaTitle: string,
|
||
imagePath: string | null,
|
||
vaImagePaths: Map<number, string>,
|
||
getCollapsibleSectionOpenState: (
|
||
section: AnilistCharacterDictionaryCollapsibleSectionKey,
|
||
) => boolean,
|
||
): CharacterDictionaryGlossaryEntry[] {
|
||
const displayName = character.nativeName || character.fullName || `Character ${character.id}`;
|
||
const secondaryName =
|
||
character.nativeName && character.fullName && character.fullName !== character.nativeName
|
||
? character.fullName
|
||
: null;
|
||
const { fields, text: descriptionText } = parseCharacterDescription(character.description);
|
||
|
||
const content: Array<string | Record<string, unknown>> = [
|
||
{
|
||
tag: 'div',
|
||
style: { fontWeight: 'bold', fontSize: '1.1em', marginBottom: '0.1em' },
|
||
content: displayName,
|
||
},
|
||
];
|
||
|
||
if (secondaryName) {
|
||
content.push({
|
||
tag: 'div',
|
||
style: { fontSize: '0.85em', fontStyle: 'italic', color: '#b0b0b0', marginBottom: '0.2em' },
|
||
content: secondaryName,
|
||
});
|
||
}
|
||
|
||
if (imagePath) {
|
||
content.push({
|
||
tag: 'div',
|
||
style: { marginTop: '0.3em', marginBottom: '0.3em' },
|
||
content: {
|
||
tag: 'img',
|
||
path: imagePath,
|
||
width: 8,
|
||
height: 11,
|
||
sizeUnits: 'em',
|
||
title: displayName,
|
||
alt: displayName,
|
||
description: `${displayName} · ${mediaTitle}`,
|
||
collapsed: false,
|
||
collapsible: false,
|
||
background: true,
|
||
},
|
||
});
|
||
}
|
||
|
||
content.push({
|
||
tag: 'div',
|
||
style: { fontSize: '0.8em', color: '#999', marginBottom: '0.2em' },
|
||
content: `From: ${mediaTitle}`,
|
||
});
|
||
|
||
content.push({
|
||
tag: 'div',
|
||
style: { marginBottom: '0.15em' },
|
||
content: {
|
||
tag: 'span',
|
||
style: roleBadgeStyle(character.role),
|
||
content: roleLabel(character.role),
|
||
},
|
||
});
|
||
|
||
const statsLine = formatCharacterStats(character);
|
||
if (descriptionText) {
|
||
content.push(
|
||
buildCollapsibleSection(
|
||
'Description',
|
||
getCollapsibleSectionOpenState('description'),
|
||
descriptionText,
|
||
),
|
||
);
|
||
}
|
||
|
||
const fieldItems: Array<Record<string, unknown>> = [];
|
||
if (statsLine) {
|
||
fieldItems.push({
|
||
tag: 'li',
|
||
style: { fontWeight: 'bold' },
|
||
content: statsLine,
|
||
});
|
||
}
|
||
fieldItems.push(
|
||
...fields.map((f) => ({
|
||
tag: 'li',
|
||
content: `${f.key}: ${f.value}`,
|
||
})),
|
||
);
|
||
if (fieldItems.length > 0) {
|
||
content.push(
|
||
buildCollapsibleSection(
|
||
'Character Information',
|
||
getCollapsibleSectionOpenState('characterInformation'),
|
||
{
|
||
tag: 'ul',
|
||
style: { marginTop: '0.15em' },
|
||
content: fieldItems,
|
||
},
|
||
),
|
||
);
|
||
}
|
||
|
||
if (character.voiceActors.length > 0) {
|
||
content.push(
|
||
buildCollapsibleSection(
|
||
'Voiced by',
|
||
getCollapsibleSectionOpenState('voicedBy'),
|
||
buildVoicedByContent(character.voiceActors, vaImagePaths),
|
||
),
|
||
);
|
||
}
|
||
|
||
return [
|
||
{
|
||
type: 'structured-content',
|
||
content: { tag: 'div', content },
|
||
},
|
||
];
|
||
}
|
||
|
||
function buildSnapshotImagePath(mediaId: number, charId: number, ext: string): string {
|
||
return `img/m${mediaId}-c${charId}.${ext}`;
|
||
}
|
||
|
||
function buildVaImagePath(mediaId: number, vaId: number, ext: string): string {
|
||
return `img/m${mediaId}-va${vaId}.${ext}`;
|
||
}
|
||
|
||
function buildTermEntry(
|
||
term: string,
|
||
reading: string,
|
||
role: CharacterDictionaryRole,
|
||
glossary: CharacterDictionaryGlossaryEntry[],
|
||
): CharacterDictionaryTermEntry {
|
||
const { tag, score } = roleInfo(role);
|
||
return [term, reading, `name ${tag}`, '', score, glossary, 0, ''];
|
||
}
|
||
|
||
const CRC32_TABLE = (() => {
|
||
const table = new Uint32Array(256);
|
||
for (let i = 0; i < 256; i += 1) {
|
||
let crc = i;
|
||
for (let j = 0; j < 8; j += 1) {
|
||
crc = (crc & 1) !== 0 ? 0xedb88320 ^ (crc >>> 1) : crc >>> 1;
|
||
}
|
||
table[i] = crc >>> 0;
|
||
}
|
||
return table;
|
||
})();
|
||
|
||
function crc32(data: Buffer): number {
|
||
let crc = 0xffffffff;
|
||
for (const byte of data) {
|
||
crc = CRC32_TABLE[(crc ^ byte) & 0xff]! ^ (crc >>> 8);
|
||
}
|
||
return (crc ^ 0xffffffff) >>> 0;
|
||
}
|
||
|
||
function createStoredZip(files: Array<{ name: string; data: Buffer }>): Buffer {
|
||
const chunks: Buffer[] = [];
|
||
const entries: ZipEntry[] = [];
|
||
let offset = 0;
|
||
|
||
for (const file of files) {
|
||
const fileName = Buffer.from(file.name, 'utf8');
|
||
const fileData = file.data;
|
||
const fileCrc32 = crc32(fileData);
|
||
const local = Buffer.alloc(30 + fileName.length);
|
||
let cursor = 0;
|
||
local.writeUInt32LE(0x04034b50, cursor);
|
||
cursor += 4;
|
||
local.writeUInt16LE(20, cursor);
|
||
cursor += 2;
|
||
local.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
local.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
local.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
local.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
local.writeUInt32LE(fileCrc32, cursor);
|
||
cursor += 4;
|
||
local.writeUInt32LE(fileData.length, cursor);
|
||
cursor += 4;
|
||
local.writeUInt32LE(fileData.length, cursor);
|
||
cursor += 4;
|
||
local.writeUInt16LE(fileName.length, cursor);
|
||
cursor += 2;
|
||
local.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
fileName.copy(local, cursor);
|
||
|
||
chunks.push(local, fileData);
|
||
entries.push({
|
||
name: file.name,
|
||
data: fileData,
|
||
crc32: fileCrc32,
|
||
localHeaderOffset: offset,
|
||
});
|
||
offset += local.length + fileData.length;
|
||
}
|
||
|
||
const centralStart = offset;
|
||
const centralChunks: Buffer[] = [];
|
||
for (const entry of entries) {
|
||
const fileName = Buffer.from(entry.name, 'utf8');
|
||
const central = Buffer.alloc(46 + fileName.length);
|
||
let cursor = 0;
|
||
central.writeUInt32LE(0x02014b50, cursor);
|
||
cursor += 4;
|
||
central.writeUInt16LE(20, cursor);
|
||
cursor += 2;
|
||
central.writeUInt16LE(20, cursor);
|
||
cursor += 2;
|
||
central.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
central.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
central.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
central.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
central.writeUInt32LE(entry.crc32, cursor);
|
||
cursor += 4;
|
||
central.writeUInt32LE(entry.data.length, cursor);
|
||
cursor += 4;
|
||
central.writeUInt32LE(entry.data.length, cursor);
|
||
cursor += 4;
|
||
central.writeUInt16LE(fileName.length, cursor);
|
||
cursor += 2;
|
||
central.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
central.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
central.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
central.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
central.writeUInt32LE(0, cursor);
|
||
cursor += 4;
|
||
central.writeUInt32LE(entry.localHeaderOffset, cursor);
|
||
cursor += 4;
|
||
fileName.copy(central, cursor);
|
||
centralChunks.push(central);
|
||
offset += central.length;
|
||
}
|
||
|
||
const centralSize = offset - centralStart;
|
||
const end = Buffer.alloc(22);
|
||
let cursor = 0;
|
||
end.writeUInt32LE(0x06054b50, cursor);
|
||
cursor += 4;
|
||
end.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
end.writeUInt16LE(0, cursor);
|
||
cursor += 2;
|
||
end.writeUInt16LE(entries.length, cursor);
|
||
cursor += 2;
|
||
end.writeUInt16LE(entries.length, cursor);
|
||
cursor += 2;
|
||
end.writeUInt32LE(centralSize, cursor);
|
||
cursor += 4;
|
||
end.writeUInt32LE(centralStart, cursor);
|
||
cursor += 4;
|
||
end.writeUInt16LE(0, cursor);
|
||
|
||
return Buffer.concat([...chunks, ...centralChunks, end]);
|
||
}
|
||
|
||
async function fetchAniList<T>(
|
||
query: string,
|
||
variables: Record<string, unknown>,
|
||
beforeRequest?: () => Promise<void>,
|
||
): Promise<T> {
|
||
if (beforeRequest) {
|
||
await beforeRequest();
|
||
}
|
||
const response = await fetch(ANILIST_GRAPHQL_URL, {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json',
|
||
},
|
||
body: JSON.stringify({
|
||
query,
|
||
variables,
|
||
}),
|
||
});
|
||
if (!response.ok) {
|
||
throw new Error(`AniList request failed (${response.status})`);
|
||
}
|
||
const payload = (await response.json()) as {
|
||
data?: T;
|
||
errors?: Array<{ message?: string }>;
|
||
};
|
||
const firstError = payload.errors?.find((entry) => entry && typeof entry.message === 'string');
|
||
if (firstError?.message) {
|
||
throw new Error(firstError.message);
|
||
}
|
||
if (!payload.data) {
|
||
throw new Error('AniList response missing data');
|
||
}
|
||
return payload.data;
|
||
}
|
||
|
||
async function resolveAniListMediaIdFromGuess(
|
||
guess: AnilistMediaGuess,
|
||
beforeRequest?: () => Promise<void>,
|
||
): Promise<ResolvedAniListMedia> {
|
||
const data = await fetchAniList<AniListSearchResponse>(
|
||
`
|
||
query($search: String!) {
|
||
Page(perPage: 10) {
|
||
media(search: $search, type: ANIME, sort: [SEARCH_MATCH, POPULARITY_DESC]) {
|
||
id
|
||
episodes
|
||
title {
|
||
romaji
|
||
english
|
||
native
|
||
}
|
||
}
|
||
}
|
||
}
|
||
`,
|
||
{
|
||
search: guess.title,
|
||
},
|
||
beforeRequest,
|
||
);
|
||
|
||
const media = data.Page?.media ?? [];
|
||
const resolved = pickAniListSearchResult(guess.title, guess.episode, media);
|
||
if (!resolved) {
|
||
throw new Error(`No AniList media match found for "${guess.title}".`);
|
||
}
|
||
return resolved;
|
||
}
|
||
|
||
async function fetchCharactersForMedia(
|
||
mediaId: number,
|
||
beforeRequest?: () => Promise<void>,
|
||
onPageFetched?: (page: number) => void,
|
||
): Promise<{
|
||
mediaTitle: string;
|
||
characters: CharacterRecord[];
|
||
}> {
|
||
const characters: CharacterRecord[] = [];
|
||
let page = 1;
|
||
let mediaTitle = '';
|
||
for (;;) {
|
||
const data = await fetchAniList<AniListCharacterPageResponse>(
|
||
`
|
||
query($id: Int!, $page: Int!) {
|
||
Media(id: $id, type: ANIME) {
|
||
title {
|
||
romaji
|
||
english
|
||
native
|
||
}
|
||
characters(page: $page, perPage: 50, sort: [ROLE, RELEVANCE, ID]) {
|
||
pageInfo {
|
||
hasNextPage
|
||
}
|
||
edges {
|
||
role
|
||
voiceActors(language: JAPANESE) {
|
||
id
|
||
name {
|
||
full
|
||
native
|
||
}
|
||
image {
|
||
medium
|
||
}
|
||
}
|
||
node {
|
||
id
|
||
description(asHtml: false)
|
||
gender
|
||
age
|
||
dateOfBirth {
|
||
month
|
||
day
|
||
}
|
||
bloodType
|
||
image {
|
||
large
|
||
medium
|
||
}
|
||
name {
|
||
first
|
||
full
|
||
last
|
||
native
|
||
alternative
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
`,
|
||
{
|
||
id: mediaId,
|
||
page,
|
||
},
|
||
beforeRequest,
|
||
);
|
||
onPageFetched?.(page);
|
||
|
||
const media = data.Media;
|
||
if (!media) {
|
||
throw new Error(`AniList media ${mediaId} not found.`);
|
||
}
|
||
if (!mediaTitle) {
|
||
mediaTitle =
|
||
media.title?.english?.trim() ||
|
||
media.title?.romaji?.trim() ||
|
||
media.title?.native?.trim() ||
|
||
`AniList ${mediaId}`;
|
||
}
|
||
|
||
const edges = media.characters?.edges ?? [];
|
||
for (const edge of edges) {
|
||
const node = edge?.node;
|
||
if (!node || typeof node.id !== 'number') continue;
|
||
const firstNameHint = node.name?.first?.trim() || '';
|
||
const fullName = node.name?.full?.trim() || '';
|
||
const lastNameHint = node.name?.last?.trim() || '';
|
||
const nativeName = node.name?.native?.trim() || '';
|
||
const alternativeNames = [
|
||
...new Set(
|
||
(node.name?.alternative ?? [])
|
||
.filter((value): value is string => typeof value === 'string')
|
||
.map((value) => value.trim())
|
||
.filter((value) => value.length > 0),
|
||
),
|
||
];
|
||
if (!nativeName) continue;
|
||
const voiceActors: VoiceActorRecord[] = [];
|
||
for (const va of edge?.voiceActors ?? []) {
|
||
if (!va || typeof va.id !== 'number') continue;
|
||
const vaFull = va.name?.full?.trim() || '';
|
||
const vaNative = va.name?.native?.trim() || '';
|
||
if (!vaFull && !vaNative) continue;
|
||
voiceActors.push({
|
||
id: va.id,
|
||
fullName: vaFull,
|
||
nativeName: vaNative,
|
||
imageUrl: va.image?.medium || null,
|
||
});
|
||
}
|
||
characters.push({
|
||
id: node.id,
|
||
role: mapRole(edge?.role),
|
||
firstNameHint,
|
||
fullName,
|
||
lastNameHint,
|
||
nativeName,
|
||
alternativeNames,
|
||
bloodType: node.bloodType?.trim() || '',
|
||
birthday:
|
||
typeof node.dateOfBirth?.month === 'number' && typeof node.dateOfBirth?.day === 'number'
|
||
? [node.dateOfBirth.month, node.dateOfBirth.day]
|
||
: null,
|
||
description: node.description || '',
|
||
imageUrl: node.image?.large || node.image?.medium || null,
|
||
age:
|
||
typeof node.age === 'string'
|
||
? node.age.trim()
|
||
: typeof node.age === 'number'
|
||
? String(node.age)
|
||
: '',
|
||
sex: node.gender?.trim() || '',
|
||
voiceActors,
|
||
});
|
||
}
|
||
|
||
const hasNextPage = Boolean(media.characters?.pageInfo?.hasNextPage);
|
||
if (!hasNextPage) {
|
||
break;
|
||
}
|
||
page += 1;
|
||
}
|
||
|
||
return {
|
||
mediaTitle,
|
||
characters,
|
||
};
|
||
}
|
||
|
||
async function downloadCharacterImage(
|
||
imageUrl: string,
|
||
charId: number,
|
||
): Promise<{
|
||
filename: string;
|
||
ext: string;
|
||
bytes: Buffer;
|
||
} | null> {
|
||
try {
|
||
const response = await fetch(imageUrl);
|
||
if (!response.ok) return null;
|
||
const bytes = Buffer.from(await response.arrayBuffer());
|
||
if (bytes.length === 0) return null;
|
||
const ext = inferImageExt(response.headers.get('content-type'));
|
||
return {
|
||
filename: `c${charId}.${ext}`,
|
||
ext,
|
||
bytes,
|
||
};
|
||
} catch {
|
||
return null;
|
||
}
|
||
}
|
||
|
||
function buildDictionaryTitle(mediaId: number): string {
|
||
return `SubMiner Character Dictionary (AniList ${mediaId})`;
|
||
}
|
||
|
||
function createIndex(
|
||
dictionaryTitle: string,
|
||
description: string,
|
||
revision: string,
|
||
): Record<string, unknown> {
|
||
return {
|
||
title: dictionaryTitle,
|
||
revision,
|
||
format: 3,
|
||
author: 'SubMiner',
|
||
description,
|
||
};
|
||
}
|
||
|
||
function createTagBank(): Array<[string, string, number, string, number]> {
|
||
return [
|
||
['name', 'partOfSpeech', 0, 'Character name', 0],
|
||
['main', 'name', 0, 'Protagonist', 0],
|
||
['primary', 'name', 0, 'Main character', 0],
|
||
['side', 'name', 0, 'Side character', 0],
|
||
['appears', 'name', 0, 'Minor appearance', 0],
|
||
];
|
||
}
|
||
|
||
function buildSnapshotFromCharacters(
|
||
mediaId: number,
|
||
mediaTitle: string,
|
||
characters: CharacterRecord[],
|
||
imagesByCharacterId: Map<number, CharacterDictionarySnapshotImage>,
|
||
imagesByVaId: Map<number, CharacterDictionarySnapshotImage>,
|
||
updatedAt: number,
|
||
getCollapsibleSectionOpenState: (
|
||
section: AnilistCharacterDictionaryCollapsibleSectionKey,
|
||
) => boolean,
|
||
): CharacterDictionarySnapshot {
|
||
const termEntries: CharacterDictionaryTermEntry[] = [];
|
||
|
||
for (const character of characters) {
|
||
const seenTerms = new Set<string>();
|
||
const imagePath = imagesByCharacterId.get(character.id)?.path ?? null;
|
||
const vaImagePaths = new Map<number, string>();
|
||
for (const va of character.voiceActors) {
|
||
const vaImg = imagesByVaId.get(va.id);
|
||
if (vaImg) vaImagePaths.set(va.id, vaImg.path);
|
||
}
|
||
const glossary = createDefinitionGlossary(
|
||
character,
|
||
mediaTitle,
|
||
imagePath,
|
||
vaImagePaths,
|
||
getCollapsibleSectionOpenState,
|
||
);
|
||
const candidateTerms = buildNameTerms(character);
|
||
const nameParts = splitJapaneseName(
|
||
character.nativeName,
|
||
character.firstNameHint,
|
||
character.lastNameHint,
|
||
);
|
||
const readings = generateNameReadings(
|
||
character.nativeName,
|
||
character.fullName,
|
||
character.firstNameHint,
|
||
character.lastNameHint,
|
||
);
|
||
for (const term of candidateTerms) {
|
||
if (seenTerms.has(term)) continue;
|
||
seenTerms.add(term);
|
||
const reading = buildReadingForTerm(term, character, readings, nameParts);
|
||
termEntries.push(buildTermEntry(term, reading, character.role, glossary));
|
||
}
|
||
}
|
||
|
||
if (termEntries.length === 0) {
|
||
throw new Error('No dictionary entries generated from AniList character data.');
|
||
}
|
||
|
||
return {
|
||
formatVersion: CHARACTER_DICTIONARY_FORMAT_VERSION,
|
||
mediaId,
|
||
mediaTitle,
|
||
entryCount: termEntries.length,
|
||
updatedAt,
|
||
termEntries,
|
||
images: [...imagesByCharacterId.values(), ...imagesByVaId.values()],
|
||
};
|
||
}
|
||
|
||
function getCollapsibleSectionKeyFromTitle(
|
||
title: string,
|
||
): AnilistCharacterDictionaryCollapsibleSectionKey | null {
|
||
if (title === 'Description') return 'description';
|
||
if (title === 'Character Information') return 'characterInformation';
|
||
if (title === 'Voiced by') return 'voicedBy';
|
||
return null;
|
||
}
|
||
|
||
function applyCollapsibleOpenStatesToStructuredValue(
|
||
value: unknown,
|
||
getCollapsibleSectionOpenState: (
|
||
section: AnilistCharacterDictionaryCollapsibleSectionKey,
|
||
) => boolean,
|
||
): unknown {
|
||
if (Array.isArray(value)) {
|
||
return value.map((item) =>
|
||
applyCollapsibleOpenStatesToStructuredValue(item, getCollapsibleSectionOpenState),
|
||
);
|
||
}
|
||
if (!value || typeof value !== 'object') {
|
||
return value;
|
||
}
|
||
|
||
const record = value as Record<string, unknown>;
|
||
const next: Record<string, unknown> = {};
|
||
for (const [key, child] of Object.entries(record)) {
|
||
next[key] = applyCollapsibleOpenStatesToStructuredValue(child, getCollapsibleSectionOpenState);
|
||
}
|
||
|
||
if (record.tag === 'details') {
|
||
const content = Array.isArray(record.content) ? record.content : [];
|
||
const summary = content[0];
|
||
if (summary && typeof summary === 'object' && !Array.isArray(summary)) {
|
||
const summaryContent = (summary as Record<string, unknown>).content;
|
||
if (typeof summaryContent === 'string') {
|
||
const section = getCollapsibleSectionKeyFromTitle(summaryContent);
|
||
if (section) {
|
||
next.open = getCollapsibleSectionOpenState(section);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return next;
|
||
}
|
||
|
||
function applyCollapsibleOpenStatesToTermEntries(
|
||
termEntries: CharacterDictionaryTermEntry[],
|
||
getCollapsibleSectionOpenState: (
|
||
section: AnilistCharacterDictionaryCollapsibleSectionKey,
|
||
) => boolean,
|
||
): CharacterDictionaryTermEntry[] {
|
||
return termEntries.map((entry) => {
|
||
const glossary = entry[5].map((item) =>
|
||
applyCollapsibleOpenStatesToStructuredValue(item, getCollapsibleSectionOpenState),
|
||
) as CharacterDictionaryGlossaryEntry[];
|
||
return [...entry.slice(0, 5), glossary, ...entry.slice(6)] as CharacterDictionaryTermEntry;
|
||
});
|
||
}
|
||
|
||
function buildDictionaryZip(
|
||
outputPath: string,
|
||
dictionaryTitle: string,
|
||
description: string,
|
||
revision: string,
|
||
termEntries: CharacterDictionaryTermEntry[],
|
||
images: CharacterDictionarySnapshotImage[],
|
||
): { zipPath: string; entryCount: number } {
|
||
const zipFiles: Array<{ name: string; data: Buffer }> = [
|
||
{
|
||
name: 'index.json',
|
||
data: Buffer.from(
|
||
JSON.stringify(createIndex(dictionaryTitle, description, revision), null, 2),
|
||
'utf8',
|
||
),
|
||
},
|
||
{
|
||
name: 'tag_bank_1.json',
|
||
data: Buffer.from(JSON.stringify(createTagBank()), 'utf8'),
|
||
},
|
||
];
|
||
|
||
for (const image of images) {
|
||
zipFiles.push({
|
||
name: image.path,
|
||
data: Buffer.from(image.dataBase64, 'base64'),
|
||
});
|
||
}
|
||
|
||
const entriesPerBank = 10_000;
|
||
for (let i = 0; i < termEntries.length; i += entriesPerBank) {
|
||
zipFiles.push({
|
||
name: `term_bank_${Math.floor(i / entriesPerBank) + 1}.json`,
|
||
data: Buffer.from(JSON.stringify(termEntries.slice(i, i + entriesPerBank)), 'utf8'),
|
||
});
|
||
}
|
||
|
||
ensureDir(path.dirname(outputPath));
|
||
fs.writeFileSync(outputPath, createStoredZip(zipFiles));
|
||
return { zipPath: outputPath, entryCount: termEntries.length };
|
||
}
|
||
|
||
function buildMergedRevision(mediaIds: number[], snapshots: CharacterDictionarySnapshot[]): string {
|
||
const hash = createHash('sha1');
|
||
hash.update(
|
||
JSON.stringify({
|
||
mediaIds,
|
||
snapshots: snapshots.map((snapshot) => ({
|
||
mediaId: snapshot.mediaId,
|
||
updatedAt: snapshot.updatedAt,
|
||
entryCount: snapshot.entryCount,
|
||
})),
|
||
}),
|
||
);
|
||
return hash.digest('hex').slice(0, 12);
|
||
}
|
||
|
||
function normalizeMergedMediaIds(mediaIds: number[]): number[] {
|
||
return [
|
||
...new Set(
|
||
mediaIds
|
||
.filter((mediaId) => Number.isFinite(mediaId) && mediaId > 0)
|
||
.map((mediaId) => Math.floor(mediaId)),
|
||
),
|
||
].sort((left, right) => left - right);
|
||
}
|
||
|
||
export function createCharacterDictionaryRuntimeService(deps: CharacterDictionaryRuntimeDeps): {
|
||
getOrCreateCurrentSnapshot: (
|
||
targetPath?: string,
|
||
progress?: CharacterDictionarySnapshotProgressCallbacks,
|
||
) => Promise<CharacterDictionarySnapshotResult>;
|
||
buildMergedDictionary: (mediaIds: number[]) => Promise<MergedCharacterDictionaryBuildResult>;
|
||
generateForCurrentMedia: (
|
||
targetPath?: string,
|
||
options?: CharacterDictionaryGenerateOptions,
|
||
) => Promise<CharacterDictionaryBuildResult>;
|
||
} {
|
||
const outputDir = path.join(deps.userDataPath, 'character-dictionaries');
|
||
const sleepMs = deps.sleep ?? sleep;
|
||
const getCollapsibleSectionOpenState = deps.getCollapsibleSectionOpenState ?? (() => false);
|
||
|
||
const resolveCurrentMedia = async (
|
||
targetPath?: string,
|
||
beforeRequest?: () => Promise<void>,
|
||
): Promise<ResolvedAniListMedia> => {
|
||
deps.logInfo?.('[dictionary] resolving current anime for character dictionary generation');
|
||
const dictionaryTarget = targetPath?.trim() || '';
|
||
const guessInput =
|
||
dictionaryTarget.length > 0
|
||
? resolveDictionaryGuessInputs(dictionaryTarget)
|
||
: {
|
||
mediaPath: deps.getCurrentMediaPath(),
|
||
mediaTitle: deps.getCurrentMediaTitle(),
|
||
};
|
||
const mediaPathForGuess = deps.resolveMediaPathForJimaku(guessInput.mediaPath);
|
||
const mediaTitle = guessInput.mediaTitle;
|
||
const guessed = await deps.guessAnilistMediaInfo(mediaPathForGuess, mediaTitle);
|
||
if (!guessed || !guessed.title.trim()) {
|
||
throw new Error('Unable to resolve current anime from media path/title.');
|
||
}
|
||
deps.logInfo?.(
|
||
`[dictionary] current anime guess: ${guessed.title.trim()}${
|
||
typeof guessed.episode === 'number' && guessed.episode > 0
|
||
? ` (episode ${guessed.episode})`
|
||
: ''
|
||
}`,
|
||
);
|
||
const resolved = await resolveAniListMediaIdFromGuess(guessed, beforeRequest);
|
||
deps.logInfo?.(`[dictionary] AniList match: ${resolved.title} -> AniList ${resolved.id}`);
|
||
return resolved;
|
||
};
|
||
|
||
const getOrCreateSnapshot = async (
|
||
mediaId: number,
|
||
mediaTitleHint?: string,
|
||
beforeRequest?: () => Promise<void>,
|
||
progress?: CharacterDictionarySnapshotProgressCallbacks,
|
||
): Promise<CharacterDictionarySnapshotResult> => {
|
||
const snapshotPath = getSnapshotPath(outputDir, mediaId);
|
||
const cachedSnapshot = readSnapshot(snapshotPath);
|
||
if (cachedSnapshot) {
|
||
deps.logInfo?.(`[dictionary] snapshot hit for AniList ${mediaId}`);
|
||
return {
|
||
mediaId: cachedSnapshot.mediaId,
|
||
mediaTitle: cachedSnapshot.mediaTitle,
|
||
entryCount: cachedSnapshot.entryCount,
|
||
fromCache: true,
|
||
updatedAt: cachedSnapshot.updatedAt,
|
||
};
|
||
}
|
||
|
||
progress?.onGenerating?.({
|
||
mediaId,
|
||
mediaTitle: mediaTitleHint || `AniList ${mediaId}`,
|
||
});
|
||
deps.logInfo?.(`[dictionary] snapshot miss for AniList ${mediaId}, fetching characters`);
|
||
|
||
const { mediaTitle: fetchedMediaTitle, characters } = await fetchCharactersForMedia(
|
||
mediaId,
|
||
beforeRequest,
|
||
(page) => {
|
||
deps.logInfo?.(
|
||
`[dictionary] downloaded AniList character page ${page} for AniList ${mediaId}`,
|
||
);
|
||
},
|
||
);
|
||
if (characters.length === 0) {
|
||
throw new Error(`No characters returned for AniList media ${mediaId}.`);
|
||
}
|
||
|
||
const imagesByCharacterId = new Map<number, CharacterDictionarySnapshotImage>();
|
||
const imagesByVaId = new Map<number, CharacterDictionarySnapshotImage>();
|
||
const allImageUrls: Array<{ id: number; url: string; kind: 'character' | 'va' }> = [];
|
||
const seenVaIds = new Set<number>();
|
||
for (const character of characters) {
|
||
if (character.imageUrl) {
|
||
allImageUrls.push({ id: character.id, url: character.imageUrl, kind: 'character' });
|
||
}
|
||
for (const va of character.voiceActors) {
|
||
if (va.imageUrl && !seenVaIds.has(va.id)) {
|
||
seenVaIds.add(va.id);
|
||
allImageUrls.push({ id: va.id, url: va.imageUrl, kind: 'va' });
|
||
}
|
||
}
|
||
}
|
||
if (allImageUrls.length > 0) {
|
||
deps.logInfo?.(
|
||
`[dictionary] downloading ${allImageUrls.length} images for AniList ${mediaId}`,
|
||
);
|
||
}
|
||
let hasAttemptedImageDownload = false;
|
||
for (const entry of allImageUrls) {
|
||
if (hasAttemptedImageDownload) {
|
||
await sleepMs(CHARACTER_IMAGE_DOWNLOAD_DELAY_MS);
|
||
}
|
||
hasAttemptedImageDownload = true;
|
||
const image = await downloadCharacterImage(entry.url, entry.id);
|
||
if (!image) continue;
|
||
if (entry.kind === 'character') {
|
||
imagesByCharacterId.set(entry.id, {
|
||
path: buildSnapshotImagePath(mediaId, entry.id, image.ext),
|
||
dataBase64: image.bytes.toString('base64'),
|
||
});
|
||
} else {
|
||
imagesByVaId.set(entry.id, {
|
||
path: buildVaImagePath(mediaId, entry.id, image.ext),
|
||
dataBase64: image.bytes.toString('base64'),
|
||
});
|
||
}
|
||
}
|
||
|
||
const snapshot = buildSnapshotFromCharacters(
|
||
mediaId,
|
||
fetchedMediaTitle || mediaTitleHint || `AniList ${mediaId}`,
|
||
characters,
|
||
imagesByCharacterId,
|
||
imagesByVaId,
|
||
deps.now(),
|
||
getCollapsibleSectionOpenState,
|
||
);
|
||
writeSnapshot(snapshotPath, snapshot);
|
||
deps.logInfo?.(
|
||
`[dictionary] stored snapshot for AniList ${mediaId}: ${snapshot.entryCount} terms`,
|
||
);
|
||
|
||
return {
|
||
mediaId: snapshot.mediaId,
|
||
mediaTitle: snapshot.mediaTitle,
|
||
entryCount: snapshot.entryCount,
|
||
fromCache: false,
|
||
updatedAt: snapshot.updatedAt,
|
||
};
|
||
};
|
||
|
||
return {
|
||
getOrCreateCurrentSnapshot: async (
|
||
targetPath?: string,
|
||
progress?: CharacterDictionarySnapshotProgressCallbacks,
|
||
) => {
|
||
let hasAniListRequest = false;
|
||
const waitForAniListRequestSlot = async (): Promise<void> => {
|
||
if (!hasAniListRequest) {
|
||
hasAniListRequest = true;
|
||
return;
|
||
}
|
||
await sleepMs(ANILIST_REQUEST_DELAY_MS);
|
||
};
|
||
const resolvedMedia = await resolveCurrentMedia(targetPath, waitForAniListRequestSlot);
|
||
progress?.onChecking?.({
|
||
mediaId: resolvedMedia.id,
|
||
mediaTitle: resolvedMedia.title,
|
||
});
|
||
return getOrCreateSnapshot(
|
||
resolvedMedia.id,
|
||
resolvedMedia.title,
|
||
waitForAniListRequestSlot,
|
||
progress,
|
||
);
|
||
},
|
||
buildMergedDictionary: async (mediaIds: number[]) => {
|
||
const normalizedMediaIds = normalizeMergedMediaIds(mediaIds);
|
||
const snapshotResults = await Promise.all(
|
||
normalizedMediaIds.map((mediaId) => getOrCreateSnapshot(mediaId)),
|
||
);
|
||
const snapshots = snapshotResults.map(({ mediaId }) => {
|
||
const snapshot = readSnapshot(getSnapshotPath(outputDir, mediaId));
|
||
if (!snapshot) {
|
||
throw new Error(`Missing character dictionary snapshot for AniList ${mediaId}.`);
|
||
}
|
||
return snapshot;
|
||
});
|
||
const revision = buildMergedRevision(normalizedMediaIds, snapshots);
|
||
const description =
|
||
snapshots.length === 1
|
||
? `Character names from ${snapshots[0]!.mediaTitle}`
|
||
: `Character names from ${snapshots.length} recent anime`;
|
||
const { zipPath, entryCount } = buildDictionaryZip(
|
||
getMergedZipPath(outputDir),
|
||
CHARACTER_DICTIONARY_MERGED_TITLE,
|
||
description,
|
||
revision,
|
||
applyCollapsibleOpenStatesToTermEntries(
|
||
snapshots.flatMap((snapshot) => snapshot.termEntries),
|
||
getCollapsibleSectionOpenState,
|
||
),
|
||
snapshots.flatMap((snapshot) => snapshot.images),
|
||
);
|
||
deps.logInfo?.(
|
||
`[dictionary] rebuilt merged dictionary: ${normalizedMediaIds.join(', ') || '<empty>'} -> ${zipPath}`,
|
||
);
|
||
return {
|
||
zipPath,
|
||
revision,
|
||
dictionaryTitle: CHARACTER_DICTIONARY_MERGED_TITLE,
|
||
entryCount,
|
||
};
|
||
},
|
||
generateForCurrentMedia: async (
|
||
targetPath?: string,
|
||
_options?: CharacterDictionaryGenerateOptions,
|
||
) => {
|
||
let hasAniListRequest = false;
|
||
const waitForAniListRequestSlot = async (): Promise<void> => {
|
||
if (!hasAniListRequest) {
|
||
hasAniListRequest = true;
|
||
return;
|
||
}
|
||
await sleepMs(ANILIST_REQUEST_DELAY_MS);
|
||
};
|
||
const resolvedMedia = await resolveCurrentMedia(targetPath, waitForAniListRequestSlot);
|
||
const snapshot = await getOrCreateSnapshot(
|
||
resolvedMedia.id,
|
||
resolvedMedia.title,
|
||
waitForAniListRequestSlot,
|
||
);
|
||
const storedSnapshot = readSnapshot(getSnapshotPath(outputDir, resolvedMedia.id));
|
||
if (!storedSnapshot) {
|
||
throw new Error(`Snapshot missing after generation for AniList ${resolvedMedia.id}.`);
|
||
}
|
||
const revision = String(storedSnapshot.updatedAt);
|
||
const dictionaryTitle = buildDictionaryTitle(resolvedMedia.id);
|
||
const description = `Character names from ${storedSnapshot.mediaTitle} [AniList media ID ${resolvedMedia.id}]`;
|
||
const zipPath = path.join(outputDir, `anilist-${resolvedMedia.id}.zip`);
|
||
deps.logInfo?.(`[dictionary] building ZIP for AniList ${resolvedMedia.id}`);
|
||
buildDictionaryZip(
|
||
zipPath,
|
||
dictionaryTitle,
|
||
description,
|
||
revision,
|
||
applyCollapsibleOpenStatesToTermEntries(
|
||
storedSnapshot.termEntries,
|
||
getCollapsibleSectionOpenState,
|
||
),
|
||
storedSnapshot.images,
|
||
);
|
||
deps.logInfo?.(
|
||
`[dictionary] generated AniList ${resolvedMedia.id}: ${storedSnapshot.entryCount} terms -> ${zipPath}`,
|
||
);
|
||
return {
|
||
zipPath,
|
||
fromCache: snapshot.fromCache,
|
||
mediaId: resolvedMedia.id,
|
||
mediaTitle: storedSnapshot.mediaTitle,
|
||
entryCount: storedSnapshot.entryCount,
|
||
dictionaryTitle,
|
||
revision,
|
||
};
|
||
},
|
||
};
|
||
}
|