diff --git a/docs-site/architecture.md b/docs-site/architecture.md index 97a01ac..9708608 100644 --- a/docs-site/architecture.md +++ b/docs-site/architecture.md @@ -39,6 +39,7 @@ src/ types.ts # Shared type definitions main/ # Main-process composition/runtime adapters app-lifecycle.ts # App lifecycle + app-ready runtime runner factories + character-dictionary-runtime.ts # Character-dictionary orchestration/public runtime API cli-runtime.ts # CLI command runtime service adapters config-validation.ts # Startup/hot-reload config error formatting and fail-fast helpers dependencies.ts # Shared dependency builders for IPC/runtime services @@ -53,6 +54,7 @@ src/ startup-lifecycle.ts # Lifecycle runtime runner adapter state.ts # Application runtime state container + reducer transitions subsync-runtime.ts # Subsync command runtime adapter + character-dictionary-runtime/ # Character-dictionary fetch/build/cache modules + focused tests runtime/ composers/ # High-level composition clusters used by main.ts domains/ # Domain barrel exports (startup/overlay/mpv/jellyfin/...) diff --git a/docs/architecture/domains.md b/docs/architecture/domains.md index 0a910ac..9f4a7e6 100644 --- a/docs/architecture/domains.md +++ b/docs/architecture/domains.md @@ -3,7 +3,7 @@ # Domain Ownership Status: active -Last verified: 2026-03-13 +Last verified: 2026-03-26 Owner: Kyle Yasuda Read when: you need to find the owner module for a behavior or test surface @@ -23,17 +23,28 @@ Read when: you need to find the owner module for a behavior or test surface - Anki workflow: `src/anki-integration/`, `src/core/services/anki-jimaku*.ts` - Immersion tracking: `src/core/services/immersion-tracker/` Includes stats storage/query schema such as `imm_videos`, `imm_media_art`, and `imm_youtube_videos` for per-video and YouTube-specific library metadata. -- AniList tracking: `src/core/services/anilist/`, `src/main/runtime/composers/anilist-*` +- AniList tracking + character dictionary: `src/core/services/anilist/`, `src/main/runtime/composers/anilist-*`, `src/main/character-dictionary-runtime.ts`, `src/main/character-dictionary-runtime/` - Jellyfin integration: `src/core/services/jellyfin*.ts`, `src/main/runtime/composers/jellyfin-*` - Window trackers: `src/window-trackers/` - Stats app: `stats/` - Public docs site: `docs-site/` +## Shared Contract Entry Points + +- Config + app-state contracts: `src/types/config.ts` +- Subtitle/token/media annotation contracts: `src/types/subtitle.ts` +- Runtime/window/controller/Electron bridge contracts: `src/types/runtime.ts` +- Anki-specific contracts: `src/types/anki.ts` +- External integration contracts: `src/types/integrations.ts` +- Runtime-option contracts: `src/types/runtime-options.ts` +- Compatibility-only barrel: `src/types.ts` + ## Ownership Heuristics - Runtime wiring or dependency setup: start in `src/main/` - Business logic or service behavior: start in `src/core/services/` - UI interaction or overlay DOM behavior: start in `src/renderer/` - Command parsing or mpv launch flow: start in `launcher/` +- Shared contract changes: add or edit the narrowest `src/types/.ts` entrypoint; only touch `src/types.ts` for compatibility exports. - User-facing docs: `docs-site/` - Internal process/docs: `docs/` diff --git a/src/main/character-dictionary-runtime.ts b/src/main/character-dictionary-runtime.ts index dc21b3b..3480500 100644 --- a/src/main/character-dictionary-runtime.ts +++ b/src/main/character-dictionary-runtime.ts @@ -1,1028 +1,60 @@ import * as fs from 'fs'; import * as os from 'os'; import * as path from 'path'; -import { createHash } from 'node:crypto'; -import type { AnilistMediaGuess } from '../core/services/anilist/anilist-updater'; -import type { AnilistCharacterDictionaryCollapsibleSectionKey } from '../types'; import { hasVideoExtension } from '../shared/video-extensions'; +import { + applyCollapsibleOpenStatesToTermEntries, + buildDictionaryTitle, + buildDictionaryZip, + buildSnapshotFromCharacters, + buildSnapshotImagePath, + buildVaImagePath, +} from './character-dictionary-runtime/build'; +import { + buildMergedRevision, + getMergedZipPath, + getSnapshotPath, + normalizeMergedMediaIds, + readSnapshot, + writeSnapshot, +} from './character-dictionary-runtime/cache'; +import { + ANILIST_REQUEST_DELAY_MS, + CHARACTER_DICTIONARY_MERGED_TITLE, + CHARACTER_IMAGE_DOWNLOAD_DELAY_MS, +} from './character-dictionary-runtime/constants'; +import { + downloadCharacterImage, + fetchCharactersForMedia, + resolveAniListMediaIdFromGuess, +} from './character-dictionary-runtime/fetch'; +import type { + CharacterDictionaryBuildResult, + CharacterDictionaryGenerateOptions, + CharacterDictionaryRuntimeDeps, + CharacterDictionarySnapshotImage, + CharacterDictionarySnapshotProgress, + CharacterDictionarySnapshotProgressCallbacks, + CharacterDictionarySnapshotResult, + MergedCharacterDictionaryBuildResult, + ResolvedAniListMedia, +} from './character-dictionary-runtime/types'; -const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co'; -const ANILIST_REQUEST_DELAY_MS = 2000; -const CHARACTER_IMAGE_DOWNLOAD_DELAY_MS = 250; -const HONORIFIC_SUFFIXES = [ - { term: 'さん', reading: 'さん' }, - { term: '様', reading: 'さま' }, - { term: '先生', reading: 'せんせい' }, - { term: '先輩', reading: 'せんぱい' }, - { term: '後輩', reading: 'こうはい' }, - { term: '氏', reading: 'し' }, - { term: '君', reading: 'くん' }, - { term: 'くん', reading: 'くん' }, - { term: 'ちゃん', reading: 'ちゃん' }, - { term: 'たん', reading: 'たん' }, - { term: '坊', reading: 'ぼう' }, - { term: '殿', reading: 'どの' }, - { term: '博士', reading: 'はかせ' }, - { term: '社長', reading: 'しゃちょう' }, - { term: '部長', reading: 'ぶちょう' }, -] as const; -type CharacterDictionaryRole = 'main' | 'primary' | 'side' | 'appears'; - -type CharacterDictionaryGlossaryEntry = string | Record; -type CharacterDictionaryTermEntry = [ - string, - string, - string, - string, - number, - CharacterDictionaryGlossaryEntry[], - number, - string, -]; - -type CharacterDictionarySnapshotImage = { - path: string; - dataBase64: string; -}; - -type CharacterBirthday = [number, number]; - -type JapaneseNameParts = { - hasSpace: boolean; - original: string; - combined: string; - family: string | null; - given: string | null; -}; - -type NameReadings = { - hasSpace: boolean; - original: string; - full: string; - family: string; - given: string; -}; - -export type CharacterDictionarySnapshot = { - formatVersion: number; - mediaId: number; - mediaTitle: string; - entryCount: number; - updatedAt: number; - termEntries: CharacterDictionaryTermEntry[]; - images: CharacterDictionarySnapshotImage[]; -}; - -const CHARACTER_DICTIONARY_FORMAT_VERSION = 15; -const CHARACTER_DICTIONARY_MERGED_TITLE = 'SubMiner Character Dictionary'; - -type AniListSearchResponse = { - Page?: { - media?: Array<{ - id: number; - episodes?: number | null; - title?: { - romaji?: string | null; - english?: string | null; - native?: string | null; - }; - }>; - }; -}; - -type AniListCharacterPageResponse = { - Media?: { - title?: { - romaji?: string | null; - english?: string | null; - native?: string | null; - }; - characters?: { - pageInfo?: { - hasNextPage?: boolean | null; - }; - edges?: Array<{ - role?: string | null; - voiceActors?: Array<{ - id: number; - name?: { - full?: string | null; - native?: string | null; - } | null; - image?: { - large?: string | null; - medium?: string | null; - } | null; - }> | null; - node?: { - id: number; - description?: string | null; - image?: { - large?: string | null; - medium?: string | null; - } | null; - gender?: string | null; - age?: string | number | null; - dateOfBirth?: { - month?: number | null; - day?: number | null; - } | null; - bloodType?: string | null; - name?: { - first?: string | null; - full?: string | null; - last?: string | null; - native?: string | null; - alternative?: Array | null; - } | null; - } | null; - } | null>; - } | null; - } | null; -}; - -type VoiceActorRecord = { - id: number; - fullName: string; - nativeName: string; - imageUrl: string | null; -}; - -type CharacterRecord = { - id: number; - role: CharacterDictionaryRole; - firstNameHint: string; - fullName: string; - lastNameHint: string; - nativeName: string; - alternativeNames: string[]; - bloodType: string; - birthday: CharacterBirthday | null; - description: string; - imageUrl: string | null; - age: string; - sex: string; - voiceActors: VoiceActorRecord[]; -}; - -type ZipEntry = { - name: string; - data: Buffer; - crc32: number; - localHeaderOffset: number; -}; - -export type CharacterDictionaryBuildResult = { - zipPath: string; - fromCache: boolean; - mediaId: number; - mediaTitle: string; - entryCount: number; - dictionaryTitle?: string; - revision?: string; -}; - -export type CharacterDictionaryGenerateOptions = { - refreshTtlMs?: number; -}; - -export type CharacterDictionarySnapshotResult = { - mediaId: number; - mediaTitle: string; - entryCount: number; - fromCache: boolean; - updatedAt: number; -}; - -export type CharacterDictionarySnapshotProgress = { - mediaId: number; - mediaTitle: string; -}; - -export type CharacterDictionarySnapshotProgressCallbacks = { - onChecking?: (progress: CharacterDictionarySnapshotProgress) => void; - onGenerating?: (progress: CharacterDictionarySnapshotProgress) => void; -}; - -export type MergedCharacterDictionaryBuildResult = { - zipPath: string; - revision: string; - dictionaryTitle: string; - entryCount: number; -}; - -export interface CharacterDictionaryRuntimeDeps { - userDataPath: string; - getCurrentMediaPath: () => string | null; - getCurrentMediaTitle: () => string | null; - resolveMediaPathForJimaku: (mediaPath: string | null) => string | null; - guessAnilistMediaInfo: ( - mediaPath: string | null, - mediaTitle: string | null, - ) => Promise; - now: () => number; - sleep?: (ms: number) => Promise; - logInfo?: (message: string) => void; - logWarn?: (message: string) => void; - getCollapsibleSectionOpenState?: ( - section: AnilistCharacterDictionaryCollapsibleSectionKey, - ) => boolean; -} - -type ResolvedAniListMedia = { - id: number; - title: string; -}; +export type { + CharacterDictionaryBuildResult, + CharacterDictionaryGenerateOptions, + CharacterDictionaryRuntimeDeps, + CharacterDictionarySnapshot, + CharacterDictionarySnapshotProgress, + CharacterDictionarySnapshotProgressCallbacks, + CharacterDictionarySnapshotResult, + MergedCharacterDictionaryBuildResult, +} from './character-dictionary-runtime/types'; function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } -function normalizeTitle(value: string): string { - return value.trim().toLowerCase().replace(/\s+/g, ' '); -} - -function pickAniListSearchResult( - title: string, - episode: number | null, - media: Array<{ - id: number; - episodes?: number | null; - title?: { - romaji?: string | null; - english?: string | null; - native?: string | null; - }; - }>, -): ResolvedAniListMedia | null { - if (media.length === 0) return null; - - const episodeFiltered = - typeof episode === 'number' && episode > 0 - ? media.filter((entry) => entry.episodes == null || entry.episodes >= episode) - : media; - const candidates = episodeFiltered.length > 0 ? episodeFiltered : media; - const normalizedInput = normalizeTitle(title); - const exact = candidates.find((entry) => { - const candidateTitles = [entry.title?.romaji, entry.title?.english, entry.title?.native] - .filter((value): value is string => typeof value === 'string' && value.trim().length > 0) - .map((value) => normalizeTitle(value)); - return candidateTitles.includes(normalizedInput); - }); - const selected = exact ?? candidates[0]!; - const selectedTitle = - selected.title?.english?.trim() || - selected.title?.romaji?.trim() || - selected.title?.native?.trim() || - title; - return { - id: selected.id, - title: selectedTitle, - }; -} - -function hasKanaOnly(value: string): boolean { - return /^[\u3040-\u309f\u30a0-\u30ffー]+$/.test(value); -} - -function katakanaToHiragana(value: string): string { - let output = ''; - for (const char of value) { - const code = char.charCodeAt(0); - if (code >= 0x30a1 && code <= 0x30f6) { - output += String.fromCharCode(code - 0x60); - continue; - } - output += char; - } - return output; -} - -function buildReading(term: string): string { - const compact = term.replace(/\s+/g, '').trim(); - if (!compact || !hasKanaOnly(compact)) { - return ''; - } - return katakanaToHiragana(compact); -} - -function containsKanji(value: string): boolean { - for (const char of value) { - const code = char.charCodeAt(0); - if ((code >= 0x4e00 && code <= 0x9fff) || (code >= 0x3400 && code <= 0x4dbf)) { - return true; - } - } - return false; -} - -function isRomanizedName(value: string): boolean { - return /^[A-Za-zĀĪŪĒŌÂÊÎÔÛāīūēōâêîôû'’.\-\s]+$/.test(value); -} - -function normalizeRomanizedName(value: string): string { - return value - .normalize('NFKC') - .toLowerCase() - .replace(/[’']/g, '') - .replace(/[.\-]/g, ' ') - .replace(/ā|â/g, 'aa') - .replace(/ī|î/g, 'ii') - .replace(/ū|û/g, 'uu') - .replace(/ē|ê/g, 'ei') - .replace(/ō|ô/g, 'ou') - .replace(/\s+/g, ' ') - .trim(); -} - -const ROMANIZED_KANA_DIGRAPHS: ReadonlyArray<[string, string]> = [ - ['kya', 'キャ'], - ['kyu', 'キュ'], - ['kyo', 'キョ'], - ['gya', 'ギャ'], - ['gyu', 'ギュ'], - ['gyo', 'ギョ'], - ['sha', 'シャ'], - ['shu', 'シュ'], - ['sho', 'ショ'], - ['sya', 'シャ'], - ['syu', 'シュ'], - ['syo', 'ショ'], - ['ja', 'ジャ'], - ['ju', 'ジュ'], - ['jo', 'ジョ'], - ['jya', 'ジャ'], - ['jyu', 'ジュ'], - ['jyo', 'ジョ'], - ['cha', 'チャ'], - ['chu', 'チュ'], - ['cho', 'チョ'], - ['tya', 'チャ'], - ['tyu', 'チュ'], - ['tyo', 'チョ'], - ['cya', 'チャ'], - ['cyu', 'チュ'], - ['cyo', 'チョ'], - ['nya', 'ニャ'], - ['nyu', 'ニュ'], - ['nyo', 'ニョ'], - ['hya', 'ヒャ'], - ['hyu', 'ヒュ'], - ['hyo', 'ヒョ'], - ['bya', 'ビャ'], - ['byu', 'ビュ'], - ['byo', 'ビョ'], - ['pya', 'ピャ'], - ['pyu', 'ピュ'], - ['pyo', 'ピョ'], - ['mya', 'ミャ'], - ['myu', 'ミュ'], - ['myo', 'ミョ'], - ['rya', 'リャ'], - ['ryu', 'リュ'], - ['ryo', 'リョ'], - ['fa', 'ファ'], - ['fi', 'フィ'], - ['fe', 'フェ'], - ['fo', 'フォ'], - ['fyu', 'フュ'], - ['fyo', 'フョ'], - ['fya', 'フャ'], - ['va', 'ヴァ'], - ['vi', 'ヴィ'], - ['vu', 'ヴ'], - ['ve', 'ヴェ'], - ['vo', 'ヴォ'], - ['she', 'シェ'], - ['che', 'チェ'], - ['je', 'ジェ'], - ['tsi', 'ツィ'], - ['tse', 'ツェ'], - ['tsa', 'ツァ'], - ['tso', 'ツォ'], - ['thi', 'ティ'], - ['thu', 'テュ'], - ['dhi', 'ディ'], - ['dhu', 'デュ'], - ['wi', 'ウィ'], - ['we', 'ウェ'], - ['wo', 'ウォ'], -]; - -const ROMANIZED_KANA_MONOGRAPHS: ReadonlyArray<[string, string]> = [ - ['a', 'ア'], - ['i', 'イ'], - ['u', 'ウ'], - ['e', 'エ'], - ['o', 'オ'], - ['ka', 'カ'], - ['ki', 'キ'], - ['ku', 'ク'], - ['ke', 'ケ'], - ['ko', 'コ'], - ['ga', 'ガ'], - ['gi', 'ギ'], - ['gu', 'グ'], - ['ge', 'ゲ'], - ['go', 'ゴ'], - ['sa', 'サ'], - ['shi', 'シ'], - ['si', 'シ'], - ['su', 'ス'], - ['se', 'セ'], - ['so', 'ソ'], - ['za', 'ザ'], - ['ji', 'ジ'], - ['zi', 'ジ'], - ['zu', 'ズ'], - ['ze', 'ゼ'], - ['zo', 'ゾ'], - ['ta', 'タ'], - ['chi', 'チ'], - ['ti', 'チ'], - ['tsu', 'ツ'], - ['tu', 'ツ'], - ['te', 'テ'], - ['to', 'ト'], - ['da', 'ダ'], - ['de', 'デ'], - ['do', 'ド'], - ['na', 'ナ'], - ['ni', 'ニ'], - ['nu', 'ヌ'], - ['ne', 'ネ'], - ['no', 'ノ'], - ['ha', 'ハ'], - ['hi', 'ヒ'], - ['fu', 'フ'], - ['hu', 'フ'], - ['he', 'ヘ'], - ['ho', 'ホ'], - ['ba', 'バ'], - ['bi', 'ビ'], - ['bu', 'ブ'], - ['be', 'ベ'], - ['bo', 'ボ'], - ['pa', 'パ'], - ['pi', 'ピ'], - ['pu', 'プ'], - ['pe', 'ペ'], - ['po', 'ポ'], - ['ma', 'マ'], - ['mi', 'ミ'], - ['mu', 'ム'], - ['me', 'メ'], - ['mo', 'モ'], - ['ya', 'ヤ'], - ['yu', 'ユ'], - ['yo', 'ヨ'], - ['ra', 'ラ'], - ['ri', 'リ'], - ['ru', 'ル'], - ['re', 'レ'], - ['ro', 'ロ'], - ['wa', 'ワ'], - ['w', 'ウ'], - ['wo', 'ヲ'], - ['n', 'ン'], -]; - -function romanizedTokenToKatakana(token: string): string | null { - const normalized = normalizeRomanizedName(token).replace(/\s+/g, ''); - if (!normalized || !/^[a-z]+$/.test(normalized)) { - return null; - } - - let output = ''; - for (let i = 0; i < normalized.length; ) { - const current = normalized[i]!; - const next = normalized[i + 1] ?? ''; - - if ( - i + 1 < normalized.length && - current === next && - current !== 'n' && - !'aeiou'.includes(current) - ) { - output += 'ッ'; - i += 1; - continue; - } - - if (current === 'n' && next.length > 0 && next !== 'y' && !'aeiou'.includes(next)) { - output += 'ン'; - i += 1; - continue; - } - - const digraph = ROMANIZED_KANA_DIGRAPHS.find(([romaji]) => normalized.startsWith(romaji, i)); - if (digraph) { - output += digraph[1]; - i += digraph[0].length; - continue; - } - - const monograph = ROMANIZED_KANA_MONOGRAPHS.find(([romaji]) => - normalized.startsWith(romaji, i), - ); - if (monograph) { - output += monograph[1]; - i += monograph[0].length; - continue; - } - - return null; - } - - return output.length > 0 ? output : null; -} - -function buildReadingFromRomanized(value: string): string { - const katakana = romanizedTokenToKatakana(value); - return katakana ? katakanaToHiragana(katakana) : ''; -} - -function buildReadingFromHint(value: string): string { - return buildReading(value) || buildReadingFromRomanized(value); -} - -function scoreJapaneseNamePartLength(length: number): number { - if (length === 2) return 3; - if (length === 1 || length === 3) return 2; - if (length === 4) return 1; - return 0; -} - -function inferJapaneseNameSplitIndex( - nameOriginal: string, - firstNameHint: string, - lastNameHint: string, -): number | null { - const chars = [...nameOriginal]; - if (chars.length < 2) return null; - - const familyHintLength = [...buildReadingFromHint(lastNameHint)].length; - const givenHintLength = [...buildReadingFromHint(firstNameHint)].length; - const totalHintLength = familyHintLength + givenHintLength; - const defaultBoundary = Math.round(chars.length / 2); - let bestIndex: number | null = null; - let bestScore = Number.NEGATIVE_INFINITY; - - for (let index = 1; index < chars.length; index += 1) { - const familyLength = index; - const givenLength = chars.length - index; - let score = - scoreJapaneseNamePartLength(familyLength) + scoreJapaneseNamePartLength(givenLength); - - if (chars.length >= 4 && familyLength >= 2 && givenLength >= 2) { - score += 1; - } - - if (totalHintLength > 0) { - const expectedFamilyLength = (chars.length * familyHintLength) / totalHintLength; - score -= Math.abs(familyLength - expectedFamilyLength) * 1.5; - } else { - score -= Math.abs(familyLength - defaultBoundary) * 0.5; - } - - if (familyLength === givenLength) { - score += 0.25; - } - - if (score > bestScore) { - bestScore = score; - bestIndex = index; - } - } - - return bestIndex; -} - -function addRomanizedKanaAliases(values: Iterable): string[] { - const aliases = new Set(); - for (const value of values) { - const trimmed = value.trim(); - if (!trimmed || !isRomanizedName(trimmed)) continue; - const katakana = romanizedTokenToKatakana(trimmed); - if (katakana) { - aliases.add(katakana); - } - } - return [...aliases]; -} - -function splitJapaneseName( - nameOriginal: string, - firstNameHint?: string, - lastNameHint?: string, -): JapaneseNameParts { - const trimmed = nameOriginal.trim(); - if (!trimmed) { - return { - hasSpace: false, - original: '', - combined: '', - family: null, - given: null, - }; - } - - const normalizedSpace = trimmed.replace(/[\s\u3000]+/g, ' ').trim(); - const spaceParts = normalizedSpace.split(' ').filter((part) => part.length > 0); - if (spaceParts.length === 2) { - const family = spaceParts[0]!; - const given = spaceParts[1]!; - return { - hasSpace: true, - original: normalizedSpace, - combined: `${family}${given}`, - family, - given, - }; - } - - const middleDotParts = trimmed - .split(/[・・·•]/) - .map((part) => part.trim()) - .filter((part) => part.length > 0); - if (middleDotParts.length === 2) { - const family = middleDotParts[0]!; - const given = middleDotParts[1]!; - return { - hasSpace: true, - original: trimmed, - combined: `${family}${given}`, - family, - given, - }; - } - - const hintedFirst = firstNameHint?.trim() || ''; - const hintedLast = lastNameHint?.trim() || ''; - if (hintedFirst && hintedLast) { - const familyGiven = `${hintedLast}${hintedFirst}`; - if (trimmed === familyGiven) { - return { - hasSpace: true, - original: trimmed, - combined: familyGiven, - family: hintedLast, - given: hintedFirst, - }; - } - - const givenFamily = `${hintedFirst}${hintedLast}`; - if (trimmed === givenFamily) { - return { - hasSpace: true, - original: trimmed, - combined: givenFamily, - family: hintedFirst, - given: hintedLast, - }; - } - } - - if (hintedFirst && hintedLast && containsKanji(trimmed)) { - const splitIndex = inferJapaneseNameSplitIndex(trimmed, hintedFirst, hintedLast); - if (splitIndex != null) { - const chars = [...trimmed]; - const family = chars.slice(0, splitIndex).join(''); - const given = chars.slice(splitIndex).join(''); - if (family && given) { - return { - hasSpace: true, - original: trimmed, - combined: trimmed, - family, - given, - }; - } - } - } - - return { - hasSpace: false, - original: trimmed, - combined: trimmed, - family: null, - given: null, - }; -} - -function generateNameReadings( - nameOriginal: string, - romanizedName: string, - firstNameHint?: string, - lastNameHint?: string, -): NameReadings { - const trimmed = nameOriginal.trim(); - if (!trimmed) { - return { - hasSpace: false, - original: '', - full: '', - family: '', - given: '', - }; - } - - const nameParts = splitJapaneseName(trimmed, firstNameHint, lastNameHint); - if (!nameParts.hasSpace || !nameParts.family || !nameParts.given) { - const full = containsKanji(trimmed) - ? buildReadingFromRomanized(romanizedName) - : buildReading(trimmed); - return { - hasSpace: false, - original: trimmed, - full, - family: full, - given: full, - }; - } - - const romanizedParts = romanizedName - .trim() - .split(/\s+/) - .filter((part) => part.length > 0); - const familyFromHints = buildReadingFromHint(lastNameHint || ''); - const givenFromHints = buildReadingFromHint(firstNameHint || ''); - const familyRomajiFallback = romanizedParts[0] || ''; - const givenRomajiFallback = romanizedParts.slice(1).join(' '); - const family = - familyFromHints || - (containsKanji(nameParts.family) - ? buildReadingFromRomanized(familyRomajiFallback) - : buildReading(nameParts.family)); - const given = - givenFromHints || - (containsKanji(nameParts.given) - ? buildReadingFromRomanized(givenRomajiFallback) - : buildReading(nameParts.given)); - const full = - `${family}${given}` || buildReading(trimmed) || buildReadingFromRomanized(romanizedName); - - return { - hasSpace: true, - original: nameParts.original, - full, - family, - given, - }; -} - -function expandRawNameVariants(rawName: string): string[] { - const trimmed = rawName.trim(); - if (!trimmed) return []; - - const variants = new Set([trimmed]); - const outer = trimmed - .replace(/[((][^()()]+[))]/g, ' ') - .replace(/\s+/g, ' ') - .trim(); - if (outer && outer !== trimmed) { - variants.add(outer); - } - - for (const match of trimmed.matchAll(/[((]([^()()]+)[))]/g)) { - const inner = match[1]?.trim() || ''; - if (inner) { - variants.add(inner); - } - } - - return [...variants]; -} - -function buildNameTerms(character: CharacterRecord): string[] { - const base = new Set(); - const rawNames = [character.nativeName, character.fullName, ...character.alternativeNames]; - for (const rawName of rawNames) { - for (const name of expandRawNameVariants(rawName)) { - base.add(name); - - const compact = name.replace(/[\s\u3000]+/g, ''); - if (compact && compact !== name) { - base.add(compact); - } - - const noMiddleDots = compact.replace(/[・・·•]/g, ''); - if (noMiddleDots && noMiddleDots !== compact) { - base.add(noMiddleDots); - } - - const split = name.split(/[\s\u3000]+/).filter((part) => part.trim().length > 0); - if (split.length === 2) { - base.add(split[0]!); - base.add(split[1]!); - } - - const splitByMiddleDot = name - .split(/[・・·•]/) - .map((part) => part.trim()) - .filter((part) => part.length > 0); - if (splitByMiddleDot.length >= 2) { - for (const part of splitByMiddleDot) { - base.add(part); - } - } - } - } - - const nativeParts = splitJapaneseName( - character.nativeName, - character.firstNameHint, - character.lastNameHint, - ); - if (nativeParts.family) { - base.add(nativeParts.family); - } - if (nativeParts.given) { - base.add(nativeParts.given); - } - - const withHonorifics = new Set(); - for (const entry of base) { - withHonorifics.add(entry); - for (const suffix of HONORIFIC_SUFFIXES) { - withHonorifics.add(`${entry}${suffix.term}`); - } - } - - for (const alias of addRomanizedKanaAliases(withHonorifics)) { - withHonorifics.add(alias); - for (const suffix of HONORIFIC_SUFFIXES) { - withHonorifics.add(`${alias}${suffix.term}`); - } - } - - return [...withHonorifics].filter((entry) => entry.trim().length > 0); -} - -const MONTH_NAMES: ReadonlyArray<[number, string]> = [ - [1, 'January'], - [2, 'February'], - [3, 'March'], - [4, 'April'], - [5, 'May'], - [6, 'June'], - [7, 'July'], - [8, 'August'], - [9, 'September'], - [10, 'October'], - [11, 'November'], - [12, 'December'], -]; - -const SEX_DISPLAY: ReadonlyArray<[string, string]> = [ - ['m', '♂ Male'], - ['f', '♀ Female'], - ['male', '♂ Male'], - ['female', '♀ Female'], -]; - -function formatBirthday(birthday: CharacterBirthday | null): string { - if (!birthday) return ''; - const [month, day] = birthday; - const monthName = MONTH_NAMES.find(([m]) => m === month)?.[1] || 'Unknown'; - return `${monthName} ${day}`; -} - -function formatCharacterStats(character: CharacterRecord): string { - const parts: string[] = []; - const normalizedSex = character.sex.trim().toLowerCase(); - const sexDisplay = SEX_DISPLAY.find(([key]) => key === normalizedSex)?.[1]; - if (sexDisplay) parts.push(sexDisplay); - if (character.age.trim()) parts.push(`${character.age.trim()} years`); - if (character.bloodType.trim()) parts.push(`Blood Type ${character.bloodType.trim()}`); - const birthday = formatBirthday(character.birthday); - if (birthday) parts.push(`Birthday: ${birthday}`); - return parts.join(' • '); -} - -function buildReadingForTerm( - term: string, - character: CharacterRecord, - readings: NameReadings, - nameParts: JapaneseNameParts, -): string { - for (const suffix of HONORIFIC_SUFFIXES) { - if (term.endsWith(suffix.term) && term.length > suffix.term.length) { - const baseTerm = term.slice(0, -suffix.term.length); - const baseReading = buildReadingForTerm(baseTerm, character, readings, nameParts); - return baseReading ? `${baseReading}${suffix.reading}` : ''; - } - } - - const compactNative = character.nativeName.replace(/[\s\u3000]+/g, ''); - const noMiddleDotsNative = compactNative.replace(/[・・·•]/g, ''); - if ( - term === character.nativeName || - term === compactNative || - term === noMiddleDotsNative || - term === nameParts.original || - term === nameParts.combined - ) { - return readings.full; - } - - const familyCompact = nameParts.family?.replace(/[・・·•]/g, '') || ''; - if (nameParts.family && (term === nameParts.family || term === familyCompact)) { - return readings.family; - } - - const givenCompact = nameParts.given?.replace(/[・・·•]/g, '') || ''; - if (nameParts.given && (term === nameParts.given || term === givenCompact)) { - return readings.given; - } - - const compact = term.replace(/[\s\u3000]+/g, ''); - if (hasKanaOnly(compact)) { - return buildReading(compact); - } - - if (isRomanizedName(term)) { - return buildReadingFromRomanized(term) || readings.full; - } - - return ''; -} - -function parseCharacterDescription(raw: string): { - fields: Array<{ key: string; value: string }>; - text: string; -} { - const cleaned = raw.replace(//gi, '\n').replace(/<[^>]+>/g, ' '); - const lines = cleaned.split(/\n/); - const fields: Array<{ key: string; value: string }> = []; - const textLines: string[] = []; - - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed) continue; - const match = trimmed.match(/^__([^_]+):__\s*(.+)$/); - if (match) { - const value = match[2]! - .replace(/__([^_]+)__/g, '$1') - .replace(/\*\*([^*]+)\*\*/g, '$1') - .replace(/_([^_]+)_/g, '$1') - .replace(/\*([^*]+)\*/g, '$1') - .trim(); - fields.push({ key: match[1]!.trim(), value }); - } else { - textLines.push(trimmed); - } - } - - const text = textLines - .join(' ') - .replace(/\[([^\]]+)\]\((https?:\/\/[^)\s]+)\)/g, '$1') - .replace(/https?:\/\/\S+/g, '') - .replace(/__([^_]+)__/g, '$1') - .replace(/\*\*([^*]+)\*\*/g, '$1') - .replace(/~!/g, '') - .replace(/!~/g, '') - .replace(/\s+/g, ' ') - .trim(); - - return { fields, text }; -} - -function roleInfo(role: CharacterDictionaryRole): { tag: string; score: number } { - if (role === 'main') return { tag: 'main', score: 100 }; - if (role === 'primary') return { tag: 'primary', score: 75 }; - if (role === 'side') return { tag: 'side', score: 50 }; - return { tag: 'appears', score: 25 }; -} - -function mapRole(input: string | null | undefined): CharacterDictionaryRole { - const value = (input || '').trim().toUpperCase(); - if (value === 'MAIN') return 'main'; - if (value === 'SUPPORTING') return 'primary'; - if (value === 'BACKGROUND') return 'side'; - return 'side'; -} - -function roleLabel(role: CharacterDictionaryRole): string { - if (role === 'main') return 'Protagonist'; - if (role === 'primary') return 'Main Character'; - if (role === 'side') return 'Side Character'; - return 'Minor Role'; -} - -function inferImageExt(contentType: string | null): string { - const normalized = (contentType || '').toLowerCase(); - if (normalized.includes('png')) return 'png'; - if (normalized.includes('gif')) return 'gif'; - if (normalized.includes('webp')) return 'webp'; - return 'jpg'; -} - -function ensureDir(dirPath: string): void { - if (fs.existsSync(dirPath)) return; - fs.mkdirSync(dirPath, { recursive: true }); -} - function expandUserPath(input: string): string { if (input.startsWith('~')) { return path.join(os.homedir(), input.slice(1)); @@ -1098,898 +130,6 @@ function resolveDictionaryGuessInputs(targetPath: string): { throw new Error(`Dictionary target must be a file or directory path: ${targetPath}`); } -function getSnapshotsDir(outputDir: string): string { - return path.join(outputDir, 'snapshots'); -} - -function getSnapshotPath(outputDir: string, mediaId: number): string { - return path.join(getSnapshotsDir(outputDir), `anilist-${mediaId}.json`); -} - -function getMergedZipPath(outputDir: string): string { - return path.join(outputDir, 'merged.zip'); -} - -function readSnapshot(snapshotPath: string): CharacterDictionarySnapshot | null { - try { - const raw = fs.readFileSync(snapshotPath, 'utf8'); - const parsed = JSON.parse(raw) as Partial; - if (!parsed || typeof parsed !== 'object') { - return null; - } - if ( - parsed.formatVersion !== CHARACTER_DICTIONARY_FORMAT_VERSION || - typeof parsed.mediaId !== 'number' || - typeof parsed.mediaTitle !== 'string' || - typeof parsed.entryCount !== 'number' || - typeof parsed.updatedAt !== 'number' || - !Array.isArray(parsed.termEntries) || - !Array.isArray(parsed.images) - ) { - return null; - } - return { - formatVersion: parsed.formatVersion, - mediaId: parsed.mediaId, - mediaTitle: parsed.mediaTitle, - entryCount: parsed.entryCount, - updatedAt: parsed.updatedAt, - termEntries: parsed.termEntries as CharacterDictionaryTermEntry[], - images: parsed.images as CharacterDictionarySnapshotImage[], - }; - } catch { - return null; - } -} - -function writeSnapshot(snapshotPath: string, snapshot: CharacterDictionarySnapshot): void { - ensureDir(path.dirname(snapshotPath)); - fs.writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2), 'utf8'); -} - -function roleBadgeStyle(role: CharacterDictionaryRole): Record { - const base = { - borderRadius: '4px', - padding: '0.15em 0.5em', - fontSize: '0.8em', - fontWeight: 'bold', - color: '#fff', - }; - if (role === 'main') return { ...base, backgroundColor: '#4CAF50' }; - if (role === 'primary') return { ...base, backgroundColor: '#2196F3' }; - if (role === 'side') return { ...base, backgroundColor: '#FF9800' }; - return { ...base, backgroundColor: '#9E9E9E' }; -} - -function buildCollapsibleSection( - title: string, - open: boolean, - body: Array> | string | Record, -): Record { - return { - tag: 'details', - open, - style: { marginTop: '0.4em' }, - content: [ - { - tag: 'summary', - style: { fontWeight: 'bold', fontSize: '0.95em', cursor: 'pointer' }, - content: title, - }, - { - tag: 'div', - style: { padding: '0.25em 0 0 0.4em', fontSize: '0.9em' }, - content: body, - }, - ], - }; -} - -function buildVoicedByContent( - voiceActors: VoiceActorRecord[], - vaImagePaths: Map, -): Record { - if (voiceActors.length === 1) { - const va = voiceActors[0]!; - const vaImgPath = vaImagePaths.get(va.id); - const vaLabel = va.nativeName - ? va.fullName - ? `${va.nativeName} (${va.fullName})` - : va.nativeName - : va.fullName; - - if (vaImgPath) { - return { - tag: 'table', - content: { - tag: 'tr', - content: [ - { - tag: 'td', - style: { - verticalAlign: 'top', - padding: '0', - paddingRight: '0.4em', - borderWidth: '0', - }, - content: { - tag: 'img', - path: vaImgPath, - width: 3, - height: 3, - sizeUnits: 'em', - title: vaLabel, - alt: vaLabel, - collapsed: false, - collapsible: false, - background: true, - }, - }, - { - tag: 'td', - style: { verticalAlign: 'middle', padding: '0', borderWidth: '0' }, - content: vaLabel, - }, - ], - }, - }; - } - - return { tag: 'div', content: vaLabel }; - } - - const items: Array> = []; - for (const va of voiceActors) { - const vaLabel = va.nativeName - ? va.fullName - ? `${va.nativeName} (${va.fullName})` - : va.nativeName - : va.fullName; - items.push({ tag: 'li', content: vaLabel }); - } - return { tag: 'ul', style: { marginTop: '0.15em' }, content: items }; -} - -function createDefinitionGlossary( - character: CharacterRecord, - mediaTitle: string, - imagePath: string | null, - vaImagePaths: Map, - getCollapsibleSectionOpenState: ( - section: AnilistCharacterDictionaryCollapsibleSectionKey, - ) => boolean, -): CharacterDictionaryGlossaryEntry[] { - const displayName = character.nativeName || character.fullName || `Character ${character.id}`; - const secondaryName = - character.nativeName && character.fullName && character.fullName !== character.nativeName - ? character.fullName - : null; - const { fields, text: descriptionText } = parseCharacterDescription(character.description); - - const content: Array> = [ - { - tag: 'div', - style: { fontWeight: 'bold', fontSize: '1.1em', marginBottom: '0.1em' }, - content: displayName, - }, - ]; - - if (secondaryName) { - content.push({ - tag: 'div', - style: { fontSize: '0.85em', fontStyle: 'italic', color: '#b0b0b0', marginBottom: '0.2em' }, - content: secondaryName, - }); - } - - if (imagePath) { - content.push({ - tag: 'div', - style: { marginTop: '0.3em', marginBottom: '0.3em' }, - content: { - tag: 'img', - path: imagePath, - width: 8, - height: 11, - sizeUnits: 'em', - title: displayName, - alt: displayName, - description: `${displayName} · ${mediaTitle}`, - collapsed: false, - collapsible: false, - background: true, - }, - }); - } - - content.push({ - tag: 'div', - style: { fontSize: '0.8em', color: '#999', marginBottom: '0.2em' }, - content: `From: ${mediaTitle}`, - }); - - content.push({ - tag: 'div', - style: { marginBottom: '0.15em' }, - content: { - tag: 'span', - style: roleBadgeStyle(character.role), - content: roleLabel(character.role), - }, - }); - - const statsLine = formatCharacterStats(character); - if (descriptionText) { - content.push( - buildCollapsibleSection( - 'Description', - getCollapsibleSectionOpenState('description'), - descriptionText, - ), - ); - } - - const fieldItems: Array> = []; - if (statsLine) { - fieldItems.push({ - tag: 'li', - style: { fontWeight: 'bold' }, - content: statsLine, - }); - } - fieldItems.push( - ...fields.map((f) => ({ - tag: 'li', - content: `${f.key}: ${f.value}`, - })), - ); - if (fieldItems.length > 0) { - content.push( - buildCollapsibleSection( - 'Character Information', - getCollapsibleSectionOpenState('characterInformation'), - { - tag: 'ul', - style: { marginTop: '0.15em' }, - content: fieldItems, - }, - ), - ); - } - - if (character.voiceActors.length > 0) { - content.push( - buildCollapsibleSection( - 'Voiced by', - getCollapsibleSectionOpenState('voicedBy'), - buildVoicedByContent(character.voiceActors, vaImagePaths), - ), - ); - } - - return [ - { - type: 'structured-content', - content: { tag: 'div', content }, - }, - ]; -} - -function buildSnapshotImagePath(mediaId: number, charId: number, ext: string): string { - return `img/m${mediaId}-c${charId}.${ext}`; -} - -function buildVaImagePath(mediaId: number, vaId: number, ext: string): string { - return `img/m${mediaId}-va${vaId}.${ext}`; -} - -function buildTermEntry( - term: string, - reading: string, - role: CharacterDictionaryRole, - glossary: CharacterDictionaryGlossaryEntry[], -): CharacterDictionaryTermEntry { - const { tag, score } = roleInfo(role); - return [term, reading, `name ${tag}`, '', score, glossary, 0, '']; -} - -const CRC32_TABLE = (() => { - const table = new Uint32Array(256); - for (let i = 0; i < 256; i += 1) { - let crc = i; - for (let j = 0; j < 8; j += 1) { - crc = (crc & 1) !== 0 ? 0xedb88320 ^ (crc >>> 1) : crc >>> 1; - } - table[i] = crc >>> 0; - } - return table; -})(); - -function crc32(data: Buffer): number { - let crc = 0xffffffff; - for (const byte of data) { - crc = CRC32_TABLE[(crc ^ byte) & 0xff]! ^ (crc >>> 8); - } - return (crc ^ 0xffffffff) >>> 0; -} - -function createStoredZip(files: Array<{ name: string; data: Buffer }>): Buffer { - const chunks: Buffer[] = []; - const entries: ZipEntry[] = []; - let offset = 0; - - for (const file of files) { - const fileName = Buffer.from(file.name, 'utf8'); - const fileData = file.data; - const fileCrc32 = crc32(fileData); - const local = Buffer.alloc(30 + fileName.length); - let cursor = 0; - local.writeUInt32LE(0x04034b50, cursor); - cursor += 4; - local.writeUInt16LE(20, cursor); - cursor += 2; - local.writeUInt16LE(0, cursor); - cursor += 2; - local.writeUInt16LE(0, cursor); - cursor += 2; - local.writeUInt16LE(0, cursor); - cursor += 2; - local.writeUInt16LE(0, cursor); - cursor += 2; - local.writeUInt32LE(fileCrc32, cursor); - cursor += 4; - local.writeUInt32LE(fileData.length, cursor); - cursor += 4; - local.writeUInt32LE(fileData.length, cursor); - cursor += 4; - local.writeUInt16LE(fileName.length, cursor); - cursor += 2; - local.writeUInt16LE(0, cursor); - cursor += 2; - fileName.copy(local, cursor); - - chunks.push(local, fileData); - entries.push({ - name: file.name, - data: fileData, - crc32: fileCrc32, - localHeaderOffset: offset, - }); - offset += local.length + fileData.length; - } - - const centralStart = offset; - const centralChunks: Buffer[] = []; - for (const entry of entries) { - const fileName = Buffer.from(entry.name, 'utf8'); - const central = Buffer.alloc(46 + fileName.length); - let cursor = 0; - central.writeUInt32LE(0x02014b50, cursor); - cursor += 4; - central.writeUInt16LE(20, cursor); - cursor += 2; - central.writeUInt16LE(20, cursor); - cursor += 2; - central.writeUInt16LE(0, cursor); - cursor += 2; - central.writeUInt16LE(0, cursor); - cursor += 2; - central.writeUInt16LE(0, cursor); - cursor += 2; - central.writeUInt16LE(0, cursor); - cursor += 2; - central.writeUInt32LE(entry.crc32, cursor); - cursor += 4; - central.writeUInt32LE(entry.data.length, cursor); - cursor += 4; - central.writeUInt32LE(entry.data.length, cursor); - cursor += 4; - central.writeUInt16LE(fileName.length, cursor); - cursor += 2; - central.writeUInt16LE(0, cursor); - cursor += 2; - central.writeUInt16LE(0, cursor); - cursor += 2; - central.writeUInt16LE(0, cursor); - cursor += 2; - central.writeUInt16LE(0, cursor); - cursor += 2; - central.writeUInt32LE(0, cursor); - cursor += 4; - central.writeUInt32LE(entry.localHeaderOffset, cursor); - cursor += 4; - fileName.copy(central, cursor); - centralChunks.push(central); - offset += central.length; - } - - const centralSize = offset - centralStart; - const end = Buffer.alloc(22); - let cursor = 0; - end.writeUInt32LE(0x06054b50, cursor); - cursor += 4; - end.writeUInt16LE(0, cursor); - cursor += 2; - end.writeUInt16LE(0, cursor); - cursor += 2; - end.writeUInt16LE(entries.length, cursor); - cursor += 2; - end.writeUInt16LE(entries.length, cursor); - cursor += 2; - end.writeUInt32LE(centralSize, cursor); - cursor += 4; - end.writeUInt32LE(centralStart, cursor); - cursor += 4; - end.writeUInt16LE(0, cursor); - - return Buffer.concat([...chunks, ...centralChunks, end]); -} - -async function fetchAniList( - query: string, - variables: Record, - beforeRequest?: () => Promise, -): Promise { - if (beforeRequest) { - await beforeRequest(); - } - const response = await fetch(ANILIST_GRAPHQL_URL, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ - query, - variables, - }), - }); - if (!response.ok) { - throw new Error(`AniList request failed (${response.status})`); - } - const payload = (await response.json()) as { - data?: T; - errors?: Array<{ message?: string }>; - }; - const firstError = payload.errors?.find((entry) => entry && typeof entry.message === 'string'); - if (firstError?.message) { - throw new Error(firstError.message); - } - if (!payload.data) { - throw new Error('AniList response missing data'); - } - return payload.data; -} - -async function resolveAniListMediaIdFromGuess( - guess: AnilistMediaGuess, - beforeRequest?: () => Promise, -): Promise { - const data = await fetchAniList( - ` - query($search: String!) { - Page(perPage: 10) { - media(search: $search, type: ANIME, sort: [SEARCH_MATCH, POPULARITY_DESC]) { - id - episodes - title { - romaji - english - native - } - } - } - } - `, - { - search: guess.title, - }, - beforeRequest, - ); - - const media = data.Page?.media ?? []; - const resolved = pickAniListSearchResult(guess.title, guess.episode, media); - if (!resolved) { - throw new Error(`No AniList media match found for "${guess.title}".`); - } - return resolved; -} - -async function fetchCharactersForMedia( - mediaId: number, - beforeRequest?: () => Promise, - onPageFetched?: (page: number) => void, -): Promise<{ - mediaTitle: string; - characters: CharacterRecord[]; -}> { - const characters: CharacterRecord[] = []; - let page = 1; - let mediaTitle = ''; - for (;;) { - const data = await fetchAniList( - ` - query($id: Int!, $page: Int!) { - Media(id: $id, type: ANIME) { - title { - romaji - english - native - } - characters(page: $page, perPage: 50, sort: [ROLE, RELEVANCE, ID]) { - pageInfo { - hasNextPage - } - edges { - role - voiceActors(language: JAPANESE) { - id - name { - full - native - } - image { - medium - } - } - node { - id - description(asHtml: false) - gender - age - dateOfBirth { - month - day - } - bloodType - image { - large - medium - } - name { - first - full - last - native - alternative - } - } - } - } - } - } - `, - { - id: mediaId, - page, - }, - beforeRequest, - ); - onPageFetched?.(page); - - const media = data.Media; - if (!media) { - throw new Error(`AniList media ${mediaId} not found.`); - } - if (!mediaTitle) { - mediaTitle = - media.title?.english?.trim() || - media.title?.romaji?.trim() || - media.title?.native?.trim() || - `AniList ${mediaId}`; - } - - const edges = media.characters?.edges ?? []; - for (const edge of edges) { - const node = edge?.node; - if (!node || typeof node.id !== 'number') continue; - const firstNameHint = node.name?.first?.trim() || ''; - const fullName = node.name?.full?.trim() || ''; - const lastNameHint = node.name?.last?.trim() || ''; - const nativeName = node.name?.native?.trim() || ''; - const alternativeNames = [ - ...new Set( - (node.name?.alternative ?? []) - .filter((value): value is string => typeof value === 'string') - .map((value) => value.trim()) - .filter((value) => value.length > 0), - ), - ]; - if (!nativeName) continue; - const voiceActors: VoiceActorRecord[] = []; - for (const va of edge?.voiceActors ?? []) { - if (!va || typeof va.id !== 'number') continue; - const vaFull = va.name?.full?.trim() || ''; - const vaNative = va.name?.native?.trim() || ''; - if (!vaFull && !vaNative) continue; - voiceActors.push({ - id: va.id, - fullName: vaFull, - nativeName: vaNative, - imageUrl: va.image?.medium || null, - }); - } - characters.push({ - id: node.id, - role: mapRole(edge?.role), - firstNameHint, - fullName, - lastNameHint, - nativeName, - alternativeNames, - bloodType: node.bloodType?.trim() || '', - birthday: - typeof node.dateOfBirth?.month === 'number' && typeof node.dateOfBirth?.day === 'number' - ? [node.dateOfBirth.month, node.dateOfBirth.day] - : null, - description: node.description || '', - imageUrl: node.image?.large || node.image?.medium || null, - age: - typeof node.age === 'string' - ? node.age.trim() - : typeof node.age === 'number' - ? String(node.age) - : '', - sex: node.gender?.trim() || '', - voiceActors, - }); - } - - const hasNextPage = Boolean(media.characters?.pageInfo?.hasNextPage); - if (!hasNextPage) { - break; - } - page += 1; - } - - return { - mediaTitle, - characters, - }; -} - -async function downloadCharacterImage( - imageUrl: string, - charId: number, -): Promise<{ - filename: string; - ext: string; - bytes: Buffer; -} | null> { - try { - const response = await fetch(imageUrl); - if (!response.ok) return null; - const bytes = Buffer.from(await response.arrayBuffer()); - if (bytes.length === 0) return null; - const ext = inferImageExt(response.headers.get('content-type')); - return { - filename: `c${charId}.${ext}`, - ext, - bytes, - }; - } catch { - return null; - } -} - -function buildDictionaryTitle(mediaId: number): string { - return `SubMiner Character Dictionary (AniList ${mediaId})`; -} - -function createIndex( - dictionaryTitle: string, - description: string, - revision: string, -): Record { - return { - title: dictionaryTitle, - revision, - format: 3, - author: 'SubMiner', - description, - }; -} - -function createTagBank(): Array<[string, string, number, string, number]> { - return [ - ['name', 'partOfSpeech', 0, 'Character name', 0], - ['main', 'name', 0, 'Protagonist', 0], - ['primary', 'name', 0, 'Main character', 0], - ['side', 'name', 0, 'Side character', 0], - ['appears', 'name', 0, 'Minor appearance', 0], - ]; -} - -function buildSnapshotFromCharacters( - mediaId: number, - mediaTitle: string, - characters: CharacterRecord[], - imagesByCharacterId: Map, - imagesByVaId: Map, - updatedAt: number, - getCollapsibleSectionOpenState: ( - section: AnilistCharacterDictionaryCollapsibleSectionKey, - ) => boolean, -): CharacterDictionarySnapshot { - const termEntries: CharacterDictionaryTermEntry[] = []; - - for (const character of characters) { - const seenTerms = new Set(); - const imagePath = imagesByCharacterId.get(character.id)?.path ?? null; - const vaImagePaths = new Map(); - for (const va of character.voiceActors) { - const vaImg = imagesByVaId.get(va.id); - if (vaImg) vaImagePaths.set(va.id, vaImg.path); - } - const glossary = createDefinitionGlossary( - character, - mediaTitle, - imagePath, - vaImagePaths, - getCollapsibleSectionOpenState, - ); - const candidateTerms = buildNameTerms(character); - const nameParts = splitJapaneseName( - character.nativeName, - character.firstNameHint, - character.lastNameHint, - ); - const readings = generateNameReadings( - character.nativeName, - character.fullName, - character.firstNameHint, - character.lastNameHint, - ); - for (const term of candidateTerms) { - if (seenTerms.has(term)) continue; - seenTerms.add(term); - const reading = buildReadingForTerm(term, character, readings, nameParts); - termEntries.push(buildTermEntry(term, reading, character.role, glossary)); - } - } - - if (termEntries.length === 0) { - throw new Error('No dictionary entries generated from AniList character data.'); - } - - return { - formatVersion: CHARACTER_DICTIONARY_FORMAT_VERSION, - mediaId, - mediaTitle, - entryCount: termEntries.length, - updatedAt, - termEntries, - images: [...imagesByCharacterId.values(), ...imagesByVaId.values()], - }; -} - -function getCollapsibleSectionKeyFromTitle( - title: string, -): AnilistCharacterDictionaryCollapsibleSectionKey | null { - if (title === 'Description') return 'description'; - if (title === 'Character Information') return 'characterInformation'; - if (title === 'Voiced by') return 'voicedBy'; - return null; -} - -function applyCollapsibleOpenStatesToStructuredValue( - value: unknown, - getCollapsibleSectionOpenState: ( - section: AnilistCharacterDictionaryCollapsibleSectionKey, - ) => boolean, -): unknown { - if (Array.isArray(value)) { - return value.map((item) => - applyCollapsibleOpenStatesToStructuredValue(item, getCollapsibleSectionOpenState), - ); - } - if (!value || typeof value !== 'object') { - return value; - } - - const record = value as Record; - const next: Record = {}; - for (const [key, child] of Object.entries(record)) { - next[key] = applyCollapsibleOpenStatesToStructuredValue(child, getCollapsibleSectionOpenState); - } - - if (record.tag === 'details') { - const content = Array.isArray(record.content) ? record.content : []; - const summary = content[0]; - if (summary && typeof summary === 'object' && !Array.isArray(summary)) { - const summaryContent = (summary as Record).content; - if (typeof summaryContent === 'string') { - const section = getCollapsibleSectionKeyFromTitle(summaryContent); - if (section) { - next.open = getCollapsibleSectionOpenState(section); - } - } - } - } - - return next; -} - -function applyCollapsibleOpenStatesToTermEntries( - termEntries: CharacterDictionaryTermEntry[], - getCollapsibleSectionOpenState: ( - section: AnilistCharacterDictionaryCollapsibleSectionKey, - ) => boolean, -): CharacterDictionaryTermEntry[] { - return termEntries.map((entry) => { - const glossary = entry[5].map((item) => - applyCollapsibleOpenStatesToStructuredValue(item, getCollapsibleSectionOpenState), - ) as CharacterDictionaryGlossaryEntry[]; - return [...entry.slice(0, 5), glossary, ...entry.slice(6)] as CharacterDictionaryTermEntry; - }); -} - -function buildDictionaryZip( - outputPath: string, - dictionaryTitle: string, - description: string, - revision: string, - termEntries: CharacterDictionaryTermEntry[], - images: CharacterDictionarySnapshotImage[], -): { zipPath: string; entryCount: number } { - const zipFiles: Array<{ name: string; data: Buffer }> = [ - { - name: 'index.json', - data: Buffer.from( - JSON.stringify(createIndex(dictionaryTitle, description, revision), null, 2), - 'utf8', - ), - }, - { - name: 'tag_bank_1.json', - data: Buffer.from(JSON.stringify(createTagBank()), 'utf8'), - }, - ]; - - for (const image of images) { - zipFiles.push({ - name: image.path, - data: Buffer.from(image.dataBase64, 'base64'), - }); - } - - const entriesPerBank = 10_000; - for (let i = 0; i < termEntries.length; i += entriesPerBank) { - zipFiles.push({ - name: `term_bank_${Math.floor(i / entriesPerBank) + 1}.json`, - data: Buffer.from(JSON.stringify(termEntries.slice(i, i + entriesPerBank)), 'utf8'), - }); - } - - ensureDir(path.dirname(outputPath)); - fs.writeFileSync(outputPath, createStoredZip(zipFiles)); - return { zipPath: outputPath, entryCount: termEntries.length }; -} - -function buildMergedRevision(mediaIds: number[], snapshots: CharacterDictionarySnapshot[]): string { - const hash = createHash('sha1'); - hash.update( - JSON.stringify({ - mediaIds, - snapshots: snapshots.map((snapshot) => ({ - mediaId: snapshot.mediaId, - updatedAt: snapshot.updatedAt, - entryCount: snapshot.entryCount, - })), - }), - ); - return hash.digest('hex').slice(0, 12); -} - -function normalizeMergedMediaIds(mediaIds: number[]): number[] { - return [ - ...new Set( - mediaIds - .filter((mediaId) => Number.isFinite(mediaId) && mediaId > 0) - .map((mediaId) => Math.floor(mediaId)), - ), - ].sort((left, right) => left - right); -} - export function createCharacterDictionaryRuntimeService(deps: CharacterDictionaryRuntimeDeps): { getOrCreateCurrentSnapshot: ( targetPath?: string, diff --git a/src/main/character-dictionary-runtime/build.test.ts b/src/main/character-dictionary-runtime/build.test.ts new file mode 100644 index 0000000..caf0755 --- /dev/null +++ b/src/main/character-dictionary-runtime/build.test.ts @@ -0,0 +1,58 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; +import { applyCollapsibleOpenStatesToTermEntries } from './build'; +import type { CharacterDictionaryTermEntry } from './types'; + +test('applyCollapsibleOpenStatesToTermEntries reapplies configured details open states', () => { + const termEntries: CharacterDictionaryTermEntry[] = [ + [ + 'アルファ', + 'あるふぁ', + '', + '', + 0, + [ + { + type: 'structured-content', + content: { + tag: 'div', + content: [ + { + tag: 'details', + open: false, + content: [ + { tag: 'summary', content: 'Description' }, + { tag: 'div', content: 'body' }, + ], + }, + { + tag: 'details', + open: false, + content: [ + { tag: 'summary', content: 'Voiced by' }, + { tag: 'div', content: 'cv' }, + ], + }, + ], + }, + }, + ], + 0, + 'name', + ], + ]; + + const [entry] = applyCollapsibleOpenStatesToTermEntries( + termEntries, + (section) => section === 'description', + ); + assert.ok(entry); + const glossaryEntry = entry[5][0] as { + content: { + content: Array<{ open?: boolean }>; + }; + }; + + assert.equal(glossaryEntry.content.content[0]?.open, true); + assert.equal(glossaryEntry.content.content[1]?.open, false); +}); diff --git a/src/main/character-dictionary-runtime/build.ts b/src/main/character-dictionary-runtime/build.ts new file mode 100644 index 0000000..2992eb8 --- /dev/null +++ b/src/main/character-dictionary-runtime/build.ts @@ -0,0 +1,7 @@ +export { + applyCollapsibleOpenStatesToTermEntries, + buildSnapshotFromCharacters, + buildSnapshotImagePath, + buildVaImagePath, +} from './snapshot'; +export { buildDictionaryTitle, buildDictionaryZip } from './zip'; diff --git a/src/main/character-dictionary-runtime/cache.test.ts b/src/main/character-dictionary-runtime/cache.test.ts new file mode 100644 index 0000000..6f739b0 --- /dev/null +++ b/src/main/character-dictionary-runtime/cache.test.ts @@ -0,0 +1,54 @@ +import assert from 'node:assert/strict'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import test from 'node:test'; +import { getSnapshotPath, readSnapshot, writeSnapshot } from './cache'; +import { CHARACTER_DICTIONARY_FORMAT_VERSION } from './constants'; +import type { CharacterDictionarySnapshot } from './types'; + +function makeTempDir(): string { + return fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-character-dictionary-cache-')); +} + +function createSnapshot(): CharacterDictionarySnapshot { + return { + formatVersion: CHARACTER_DICTIONARY_FORMAT_VERSION, + mediaId: 130298, + mediaTitle: 'The Eminence in Shadow', + entryCount: 1, + updatedAt: 1_700_000_000_000, + termEntries: [['アルファ', 'あるふぁ', '', '', 0, ['Alpha'], 0, 'name']], + images: [ + { + path: 'img/m130298-c1.png', + dataBase64: + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+nmX8AAAAASUVORK5CYII=', + }, + ], + }; +} + +test('writeSnapshot persists and readSnapshot restores current-format snapshots', () => { + const outputDir = makeTempDir(); + const snapshotPath = getSnapshotPath(outputDir, 130298); + const snapshot = createSnapshot(); + + writeSnapshot(snapshotPath, snapshot); + + assert.deepEqual(readSnapshot(snapshotPath), snapshot); +}); + +test('readSnapshot ignores snapshots written with an older format version', () => { + const outputDir = makeTempDir(); + const snapshotPath = getSnapshotPath(outputDir, 130298); + const staleSnapshot = { + ...createSnapshot(), + formatVersion: CHARACTER_DICTIONARY_FORMAT_VERSION - 1, + }; + + fs.mkdirSync(path.dirname(snapshotPath), { recursive: true }); + fs.writeFileSync(snapshotPath, JSON.stringify(staleSnapshot), 'utf8'); + + assert.equal(readSnapshot(snapshotPath), null); +}); diff --git a/src/main/character-dictionary-runtime/cache.ts b/src/main/character-dictionary-runtime/cache.ts new file mode 100644 index 0000000..db5d57a --- /dev/null +++ b/src/main/character-dictionary-runtime/cache.ts @@ -0,0 +1,87 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { createHash } from 'node:crypto'; +import { CHARACTER_DICTIONARY_FORMAT_VERSION } from './constants'; +import { ensureDir } from './fs-utils'; +import type { + CharacterDictionarySnapshot, + CharacterDictionarySnapshotImage, + CharacterDictionaryTermEntry, +} from './types'; + +function getSnapshotsDir(outputDir: string): string { + return path.join(outputDir, 'snapshots'); +} + +export function getSnapshotPath(outputDir: string, mediaId: number): string { + return path.join(getSnapshotsDir(outputDir), `anilist-${mediaId}.json`); +} + +export function getMergedZipPath(outputDir: string): string { + return path.join(outputDir, 'merged.zip'); +} + +export function readSnapshot(snapshotPath: string): CharacterDictionarySnapshot | null { + try { + const raw = fs.readFileSync(snapshotPath, 'utf8'); + const parsed = JSON.parse(raw) as Partial; + if (!parsed || typeof parsed !== 'object') { + return null; + } + if ( + parsed.formatVersion !== CHARACTER_DICTIONARY_FORMAT_VERSION || + typeof parsed.mediaId !== 'number' || + typeof parsed.mediaTitle !== 'string' || + typeof parsed.entryCount !== 'number' || + typeof parsed.updatedAt !== 'number' || + !Array.isArray(parsed.termEntries) || + !Array.isArray(parsed.images) + ) { + return null; + } + return { + formatVersion: parsed.formatVersion, + mediaId: parsed.mediaId, + mediaTitle: parsed.mediaTitle, + entryCount: parsed.entryCount, + updatedAt: parsed.updatedAt, + termEntries: parsed.termEntries as CharacterDictionaryTermEntry[], + images: parsed.images as CharacterDictionarySnapshotImage[], + }; + } catch { + return null; + } +} + +export function writeSnapshot(snapshotPath: string, snapshot: CharacterDictionarySnapshot): void { + ensureDir(path.dirname(snapshotPath)); + fs.writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2), 'utf8'); +} + +export function buildMergedRevision( + mediaIds: number[], + snapshots: CharacterDictionarySnapshot[], +): string { + const hash = createHash('sha1'); + hash.update( + JSON.stringify({ + mediaIds, + snapshots: snapshots.map((snapshot) => ({ + mediaId: snapshot.mediaId, + updatedAt: snapshot.updatedAt, + entryCount: snapshot.entryCount, + })), + }), + ); + return hash.digest('hex').slice(0, 12); +} + +export function normalizeMergedMediaIds(mediaIds: number[]): number[] { + return [ + ...new Set( + mediaIds + .filter((mediaId) => Number.isFinite(mediaId) && mediaId > 0) + .map((mediaId) => Math.floor(mediaId)), + ), + ].sort((left, right) => left - right); +} diff --git a/src/main/character-dictionary-runtime/constants.ts b/src/main/character-dictionary-runtime/constants.ts new file mode 100644 index 0000000..c4bef98 --- /dev/null +++ b/src/main/character-dictionary-runtime/constants.ts @@ -0,0 +1,23 @@ +export const ANILIST_GRAPHQL_URL = 'https://graphql.anilist.co'; +export const ANILIST_REQUEST_DELAY_MS = 2000; +export const CHARACTER_IMAGE_DOWNLOAD_DELAY_MS = 250; +export const CHARACTER_DICTIONARY_FORMAT_VERSION = 15; +export const CHARACTER_DICTIONARY_MERGED_TITLE = 'SubMiner Character Dictionary'; + +export const HONORIFIC_SUFFIXES = [ + { term: 'さん', reading: 'さん' }, + { term: '様', reading: 'さま' }, + { term: '先生', reading: 'せんせい' }, + { term: '先輩', reading: 'せんぱい' }, + { term: '後輩', reading: 'こうはい' }, + { term: '氏', reading: 'し' }, + { term: '君', reading: 'くん' }, + { term: 'くん', reading: 'くん' }, + { term: 'ちゃん', reading: 'ちゃん' }, + { term: 'たん', reading: 'たん' }, + { term: '坊', reading: 'ぼう' }, + { term: '殿', reading: 'どの' }, + { term: '博士', reading: 'はかせ' }, + { term: '社長', reading: 'しゃちょう' }, + { term: '部長', reading: 'ぶちょう' }, +] as const; diff --git a/src/main/character-dictionary-runtime/description.ts b/src/main/character-dictionary-runtime/description.ts new file mode 100644 index 0000000..7510a8c --- /dev/null +++ b/src/main/character-dictionary-runtime/description.ts @@ -0,0 +1,82 @@ +import type { CharacterBirthday, CharacterRecord } from './types'; + +const MONTH_NAMES: ReadonlyArray<[number, string]> = [ + [1, 'January'], + [2, 'February'], + [3, 'March'], + [4, 'April'], + [5, 'May'], + [6, 'June'], + [7, 'July'], + [8, 'August'], + [9, 'September'], + [10, 'October'], + [11, 'November'], + [12, 'December'], +]; + +const SEX_DISPLAY: ReadonlyArray<[string, string]> = [ + ['m', '♂ Male'], + ['f', '♀ Female'], + ['male', '♂ Male'], + ['female', '♀ Female'], +]; + +function formatBirthday(birthday: CharacterBirthday | null): string { + if (!birthday) return ''; + const [month, day] = birthday; + const monthName = MONTH_NAMES.find(([m]) => m === month)?.[1] || 'Unknown'; + return `${monthName} ${day}`; +} + +export function formatCharacterStats(character: CharacterRecord): string { + const parts: string[] = []; + const normalizedSex = character.sex.trim().toLowerCase(); + const sexDisplay = SEX_DISPLAY.find(([key]) => key === normalizedSex)?.[1]; + if (sexDisplay) parts.push(sexDisplay); + if (character.age.trim()) parts.push(`${character.age.trim()} years`); + if (character.bloodType.trim()) parts.push(`Blood Type ${character.bloodType.trim()}`); + const birthday = formatBirthday(character.birthday); + if (birthday) parts.push(`Birthday: ${birthday}`); + return parts.join(' • '); +} + +export function parseCharacterDescription(raw: string): { + fields: Array<{ key: string; value: string }>; + text: string; +} { + const cleaned = raw.replace(//gi, '\n').replace(/<[^>]+>/g, ' '); + const lines = cleaned.split(/\n/); + const fields: Array<{ key: string; value: string }> = []; + const textLines: string[] = []; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) continue; + const match = trimmed.match(/^__([^_]+):__\s*(.+)$/); + if (match) { + const value = match[2]! + .replace(/__([^_]+)__/g, '$1') + .replace(/\*\*([^*]+)\*\*/g, '$1') + .replace(/_([^_]+)_/g, '$1') + .replace(/\*([^*]+)\*/g, '$1') + .trim(); + fields.push({ key: match[1]!.trim(), value }); + } else { + textLines.push(trimmed); + } + } + + const text = textLines + .join(' ') + .replace(/\[([^\]]+)\]\((https?:\/\/[^)\s]+)\)/g, '$1') + .replace(/https?:\/\/\S+/g, '') + .replace(/__([^_]+)__/g, '$1') + .replace(/\*\*([^*]+)\*\*/g, '$1') + .replace(/~!/g, '') + .replace(/!~/g, '') + .replace(/\s+/g, ' ') + .trim(); + + return { fields, text }; +} diff --git a/src/main/character-dictionary-runtime/fetch.ts b/src/main/character-dictionary-runtime/fetch.ts new file mode 100644 index 0000000..61ba245 --- /dev/null +++ b/src/main/character-dictionary-runtime/fetch.ts @@ -0,0 +1,386 @@ +import type { AnilistMediaGuess } from '../../core/services/anilist/anilist-updater'; +import { ANILIST_GRAPHQL_URL } from './constants'; +import type { + CharacterDictionaryRole, + CharacterRecord, + ResolvedAniListMedia, + VoiceActorRecord, +} from './types'; + +type AniListSearchResponse = { + Page?: { + media?: Array<{ + id: number; + episodes?: number | null; + title?: { + romaji?: string | null; + english?: string | null; + native?: string | null; + }; + }>; + }; +}; + +type AniListCharacterPageResponse = { + Media?: { + title?: { + romaji?: string | null; + english?: string | null; + native?: string | null; + }; + characters?: { + pageInfo?: { + hasNextPage?: boolean | null; + }; + edges?: Array<{ + role?: string | null; + voiceActors?: Array<{ + id: number; + name?: { + full?: string | null; + native?: string | null; + } | null; + image?: { + large?: string | null; + medium?: string | null; + } | null; + }> | null; + node?: { + id: number; + description?: string | null; + image?: { + large?: string | null; + medium?: string | null; + } | null; + gender?: string | null; + age?: string | number | null; + dateOfBirth?: { + month?: number | null; + day?: number | null; + } | null; + bloodType?: string | null; + name?: { + first?: string | null; + full?: string | null; + last?: string | null; + native?: string | null; + alternative?: Array | null; + } | null; + } | null; + } | null>; + } | null; + } | null; +}; + +function normalizeTitle(value: string): string { + return value.trim().toLowerCase().replace(/\s+/g, ' '); +} + +function pickAniListSearchResult( + title: string, + episode: number | null, + media: Array<{ + id: number; + episodes?: number | null; + title?: { + romaji?: string | null; + english?: string | null; + native?: string | null; + }; + }>, +): ResolvedAniListMedia | null { + if (media.length === 0) return null; + + const episodeFiltered = + episode && episode > 0 + ? media.filter((entry) => { + const totalEpisodes = entry.episodes; + return ( + typeof totalEpisodes !== 'number' || totalEpisodes <= 0 || episode <= totalEpisodes + ); + }) + : media; + const candidates = episodeFiltered.length > 0 ? episodeFiltered : media; + const normalizedTitle = normalizeTitle(title); + + const exact = candidates.find((entry) => { + const titles = [entry.title?.english, entry.title?.romaji, entry.title?.native] + .filter((value): value is string => typeof value === 'string') + .map((value) => normalizeTitle(value)); + return titles.includes(normalizedTitle); + }); + const selected = exact ?? candidates[0] ?? media[0]; + if (!selected) return null; + + const selectedTitle = + selected.title?.english?.trim() || + selected.title?.romaji?.trim() || + selected.title?.native?.trim() || + title.trim(); + return { + id: selected.id, + title: selectedTitle, + }; +} + +async function fetchAniList( + query: string, + variables: Record, + beforeRequest?: () => Promise, +): Promise { + if (beforeRequest) { + await beforeRequest(); + } + const response = await fetch(ANILIST_GRAPHQL_URL, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query, + variables, + }), + }); + if (!response.ok) { + throw new Error(`AniList request failed (${response.status})`); + } + const payload = (await response.json()) as { + data?: T; + errors?: Array<{ message?: string }>; + }; + const firstError = payload.errors?.find((entry) => entry && typeof entry.message === 'string'); + if (firstError?.message) { + throw new Error(firstError.message); + } + if (!payload.data) { + throw new Error('AniList response missing data'); + } + return payload.data; +} + +function mapRole(input: string | null | undefined): CharacterDictionaryRole { + const value = (input || '').trim().toUpperCase(); + if (value === 'MAIN') return 'main'; + if (value === 'SUPPORTING') return 'primary'; + if (value === 'BACKGROUND') return 'side'; + return 'side'; +} + +function inferImageExt(contentType: string | null): string { + const normalized = (contentType || '').toLowerCase(); + if (normalized.includes('png')) return 'png'; + if (normalized.includes('gif')) return 'gif'; + if (normalized.includes('webp')) return 'webp'; + return 'jpg'; +} + +export async function resolveAniListMediaIdFromGuess( + guess: AnilistMediaGuess, + beforeRequest?: () => Promise, +): Promise { + const data = await fetchAniList( + ` + query($search: String!) { + Page(perPage: 10) { + media(search: $search, type: ANIME, sort: [SEARCH_MATCH, POPULARITY_DESC]) { + id + episodes + title { + romaji + english + native + } + } + } + } + `, + { + search: guess.title, + }, + beforeRequest, + ); + + const media = data.Page?.media ?? []; + const resolved = pickAniListSearchResult(guess.title, guess.episode, media); + if (!resolved) { + throw new Error(`No AniList media match found for "${guess.title}".`); + } + return resolved; +} + +export async function fetchCharactersForMedia( + mediaId: number, + beforeRequest?: () => Promise, + onPageFetched?: (page: number) => void, +): Promise<{ + mediaTitle: string; + characters: CharacterRecord[]; +}> { + const characters: CharacterRecord[] = []; + let page = 1; + let mediaTitle = ''; + for (;;) { + const data = await fetchAniList( + ` + query($id: Int!, $page: Int!) { + Media(id: $id, type: ANIME) { + title { + romaji + english + native + } + characters(page: $page, perPage: 50, sort: [ROLE, RELEVANCE, ID]) { + pageInfo { + hasNextPage + } + edges { + role + voiceActors(language: JAPANESE) { + id + name { + full + native + } + image { + medium + } + } + node { + id + description(asHtml: false) + gender + age + dateOfBirth { + month + day + } + bloodType + image { + large + medium + } + name { + first + full + last + native + alternative + } + } + } + } + } + } + `, + { + id: mediaId, + page, + }, + beforeRequest, + ); + onPageFetched?.(page); + + const media = data.Media; + if (!media) { + throw new Error(`AniList media ${mediaId} not found.`); + } + if (!mediaTitle) { + mediaTitle = + media.title?.english?.trim() || + media.title?.romaji?.trim() || + media.title?.native?.trim() || + `AniList ${mediaId}`; + } + + const edges = media.characters?.edges ?? []; + for (const edge of edges) { + const node = edge?.node; + if (!node || typeof node.id !== 'number') continue; + const firstNameHint = node.name?.first?.trim() || ''; + const fullName = node.name?.full?.trim() || ''; + const lastNameHint = node.name?.last?.trim() || ''; + const nativeName = node.name?.native?.trim() || ''; + const alternativeNames = [ + ...new Set( + (node.name?.alternative ?? []) + .filter((value): value is string => typeof value === 'string') + .map((value) => value.trim()) + .filter((value) => value.length > 0), + ), + ]; + if (!nativeName) continue; + const voiceActors: VoiceActorRecord[] = []; + for (const va of edge?.voiceActors ?? []) { + if (!va || typeof va.id !== 'number') continue; + const vaFull = va.name?.full?.trim() || ''; + const vaNative = va.name?.native?.trim() || ''; + if (!vaFull && !vaNative) continue; + voiceActors.push({ + id: va.id, + fullName: vaFull, + nativeName: vaNative, + imageUrl: va.image?.medium || null, + }); + } + characters.push({ + id: node.id, + role: mapRole(edge?.role), + firstNameHint, + fullName, + lastNameHint, + nativeName, + alternativeNames, + bloodType: node.bloodType?.trim() || '', + birthday: + typeof node.dateOfBirth?.month === 'number' && typeof node.dateOfBirth?.day === 'number' + ? [node.dateOfBirth.month, node.dateOfBirth.day] + : null, + description: node.description || '', + imageUrl: node.image?.large || node.image?.medium || null, + age: + typeof node.age === 'string' + ? node.age.trim() + : typeof node.age === 'number' + ? String(node.age) + : '', + sex: node.gender?.trim() || '', + voiceActors, + }); + } + + const hasNextPage = Boolean(media.characters?.pageInfo?.hasNextPage); + if (!hasNextPage) { + break; + } + page += 1; + } + + return { + mediaTitle, + characters, + }; +} + +export async function downloadCharacterImage( + imageUrl: string, + charId: number, +): Promise<{ + filename: string; + ext: string; + bytes: Buffer; +} | null> { + try { + const response = await fetch(imageUrl); + if (!response.ok) return null; + const bytes = Buffer.from(await response.arrayBuffer()); + if (bytes.length === 0) return null; + const ext = inferImageExt(response.headers.get('content-type')); + return { + filename: `c${charId}.${ext}`, + ext, + bytes, + }; + } catch { + return null; + } +} diff --git a/src/main/character-dictionary-runtime/fs-utils.ts b/src/main/character-dictionary-runtime/fs-utils.ts new file mode 100644 index 0000000..8c6e225 --- /dev/null +++ b/src/main/character-dictionary-runtime/fs-utils.ts @@ -0,0 +1,6 @@ +import * as fs from 'fs'; + +export function ensureDir(dirPath: string): void { + if (fs.existsSync(dirPath)) return; + fs.mkdirSync(dirPath, { recursive: true }); +} diff --git a/src/main/character-dictionary-runtime/glossary.ts b/src/main/character-dictionary-runtime/glossary.ts new file mode 100644 index 0000000..c042b39 --- /dev/null +++ b/src/main/character-dictionary-runtime/glossary.ts @@ -0,0 +1,243 @@ +import type { AnilistCharacterDictionaryCollapsibleSectionKey } from '../../types'; +import { formatCharacterStats, parseCharacterDescription } from './description'; +import type { + CharacterDictionaryGlossaryEntry, + CharacterDictionaryRole, + CharacterRecord, + VoiceActorRecord, +} from './types'; + +function roleLabel(role: CharacterDictionaryRole): string { + if (role === 'main') return 'Protagonist'; + if (role === 'primary') return 'Main Character'; + if (role === 'side') return 'Side Character'; + return 'Minor Role'; +} + +function roleBadgeStyle(role: CharacterDictionaryRole): Record { + const base = { + borderRadius: '4px', + padding: '0.15em 0.5em', + fontSize: '0.8em', + fontWeight: 'bold', + color: '#fff', + }; + if (role === 'main') return { ...base, backgroundColor: '#4CAF50' }; + if (role === 'primary') return { ...base, backgroundColor: '#2196F3' }; + if (role === 'side') return { ...base, backgroundColor: '#FF9800' }; + return { ...base, backgroundColor: '#9E9E9E' }; +} + +function buildCollapsibleSection( + title: string, + open: boolean, + body: Array> | string | Record, +): Record { + return { + tag: 'details', + open, + style: { marginTop: '0.4em' }, + content: [ + { + tag: 'summary', + style: { fontWeight: 'bold', fontSize: '0.95em', cursor: 'pointer' }, + content: title, + }, + { + tag: 'div', + style: { padding: '0.25em 0 0 0.4em', fontSize: '0.9em' }, + content: body, + }, + ], + }; +} + +function buildVoicedByContent( + voiceActors: VoiceActorRecord[], + vaImagePaths: Map, +): Record { + if (voiceActors.length === 1) { + const va = voiceActors[0]!; + const vaImgPath = vaImagePaths.get(va.id); + const vaLabel = va.nativeName + ? va.fullName + ? `${va.nativeName} (${va.fullName})` + : va.nativeName + : va.fullName; + + if (vaImgPath) { + return { + tag: 'table', + content: { + tag: 'tr', + content: [ + { + tag: 'td', + style: { + verticalAlign: 'top', + padding: '0', + paddingRight: '0.4em', + borderWidth: '0', + }, + content: { + tag: 'img', + path: vaImgPath, + width: 3, + height: 3, + sizeUnits: 'em', + title: vaLabel, + alt: vaLabel, + collapsed: false, + collapsible: false, + background: true, + }, + }, + { + tag: 'td', + style: { verticalAlign: 'middle', padding: '0', borderWidth: '0' }, + content: vaLabel, + }, + ], + }, + }; + } + + return { tag: 'div', content: vaLabel }; + } + + const items: Array> = []; + for (const va of voiceActors) { + const vaLabel = va.nativeName + ? va.fullName + ? `${va.nativeName} (${va.fullName})` + : va.nativeName + : va.fullName; + items.push({ tag: 'li', content: vaLabel }); + } + return { tag: 'ul', style: { marginTop: '0.15em' }, content: items }; +} + +export function createDefinitionGlossary( + character: CharacterRecord, + mediaTitle: string, + imagePath: string | null, + vaImagePaths: Map, + getCollapsibleSectionOpenState: ( + section: AnilistCharacterDictionaryCollapsibleSectionKey, + ) => boolean, +): CharacterDictionaryGlossaryEntry[] { + const displayName = character.nativeName || character.fullName || `Character ${character.id}`; + const secondaryName = + character.nativeName && character.fullName && character.fullName !== character.nativeName + ? character.fullName + : null; + const { fields, text: descriptionText } = parseCharacterDescription(character.description); + + const content: Array> = [ + { + tag: 'div', + style: { fontWeight: 'bold', fontSize: '1.1em', marginBottom: '0.1em' }, + content: displayName, + }, + ]; + + if (secondaryName) { + content.push({ + tag: 'div', + style: { fontSize: '0.85em', fontStyle: 'italic', color: '#b0b0b0', marginBottom: '0.2em' }, + content: secondaryName, + }); + } + + if (imagePath) { + content.push({ + tag: 'div', + style: { marginTop: '0.3em', marginBottom: '0.3em' }, + content: { + tag: 'img', + path: imagePath, + width: 8, + height: 11, + sizeUnits: 'em', + title: displayName, + alt: displayName, + description: `${displayName} · ${mediaTitle}`, + collapsed: false, + collapsible: false, + background: true, + }, + }); + } + + content.push({ + tag: 'div', + style: { fontSize: '0.8em', color: '#999', marginBottom: '0.2em' }, + content: `From: ${mediaTitle}`, + }); + + content.push({ + tag: 'div', + style: { marginBottom: '0.15em' }, + content: { + tag: 'span', + style: roleBadgeStyle(character.role), + content: roleLabel(character.role), + }, + }); + + const statsLine = formatCharacterStats(character); + if (descriptionText) { + content.push( + buildCollapsibleSection( + 'Description', + getCollapsibleSectionOpenState('description'), + descriptionText, + ), + ); + } + + const fieldItems: Array> = []; + if (statsLine) { + fieldItems.push({ + tag: 'li', + style: { fontWeight: 'bold' }, + content: statsLine, + }); + } + fieldItems.push( + ...fields.map((field) => ({ + tag: 'li', + content: `${field.key}: ${field.value}`, + })), + ); + if (fieldItems.length > 0) { + content.push( + buildCollapsibleSection( + 'Character Information', + getCollapsibleSectionOpenState('characterInformation'), + { + tag: 'ul', + style: { marginTop: '0.15em' }, + content: fieldItems, + }, + ), + ); + } + + if (character.voiceActors.length > 0) { + content.push( + buildCollapsibleSection( + 'Voiced by', + getCollapsibleSectionOpenState('voicedBy'), + buildVoicedByContent(character.voiceActors, vaImagePaths), + ), + ); + } + + return [ + { + type: 'structured-content', + content: { tag: 'div', content }, + }, + ]; +} diff --git a/src/main/character-dictionary-runtime/name-reading.ts b/src/main/character-dictionary-runtime/name-reading.ts new file mode 100644 index 0000000..2876ef2 --- /dev/null +++ b/src/main/character-dictionary-runtime/name-reading.ts @@ -0,0 +1,496 @@ +import { HONORIFIC_SUFFIXES } from './constants'; +import type { JapaneseNameParts, NameReadings } from './types'; + +export function hasKanaOnly(value: string): boolean { + return /^[\u3040-\u309f\u30a0-\u30ffー]+$/.test(value); +} + +function katakanaToHiragana(value: string): string { + let output = ''; + for (const char of value) { + const code = char.charCodeAt(0); + if (code >= 0x30a1 && code <= 0x30f6) { + output += String.fromCharCode(code - 0x60); + continue; + } + output += char; + } + return output; +} + +export function buildReading(term: string): string { + const compact = term.replace(/\s+/g, '').trim(); + if (!compact || !hasKanaOnly(compact)) { + return ''; + } + return katakanaToHiragana(compact); +} + +export function containsKanji(value: string): boolean { + for (const char of value) { + const code = char.charCodeAt(0); + if ((code >= 0x4e00 && code <= 0x9fff) || (code >= 0x3400 && code <= 0x4dbf)) { + return true; + } + } + return false; +} + +export function isRomanizedName(value: string): boolean { + return /^[A-Za-zĀĪŪĒŌÂÊÎÔÛāīūēōâêîôû'’.\-\s]+$/.test(value); +} + +function normalizeRomanizedName(value: string): string { + return value + .normalize('NFKC') + .toLowerCase() + .replace(/[’']/g, '') + .replace(/[.\-]/g, ' ') + .replace(/ā|â/g, 'aa') + .replace(/ī|î/g, 'ii') + .replace(/ū|û/g, 'uu') + .replace(/ē|ê/g, 'ei') + .replace(/ō|ô/g, 'ou') + .replace(/\s+/g, ' ') + .trim(); +} + +const ROMANIZED_KANA_DIGRAPHS: ReadonlyArray<[string, string]> = [ + ['kya', 'キャ'], + ['kyu', 'キュ'], + ['kyo', 'キョ'], + ['gya', 'ギャ'], + ['gyu', 'ギュ'], + ['gyo', 'ギョ'], + ['sha', 'シャ'], + ['shu', 'シュ'], + ['sho', 'ショ'], + ['sya', 'シャ'], + ['syu', 'シュ'], + ['syo', 'ショ'], + ['ja', 'ジャ'], + ['ju', 'ジュ'], + ['jo', 'ジョ'], + ['jya', 'ジャ'], + ['jyu', 'ジュ'], + ['jyo', 'ジョ'], + ['cha', 'チャ'], + ['chu', 'チュ'], + ['cho', 'チョ'], + ['tya', 'チャ'], + ['tyu', 'チュ'], + ['tyo', 'チョ'], + ['cya', 'チャ'], + ['cyu', 'チュ'], + ['cyo', 'チョ'], + ['nya', 'ニャ'], + ['nyu', 'ニュ'], + ['nyo', 'ニョ'], + ['hya', 'ヒャ'], + ['hyu', 'ヒュ'], + ['hyo', 'ヒョ'], + ['bya', 'ビャ'], + ['byu', 'ビュ'], + ['byo', 'ビョ'], + ['pya', 'ピャ'], + ['pyu', 'ピュ'], + ['pyo', 'ピョ'], + ['mya', 'ミャ'], + ['myu', 'ミュ'], + ['myo', 'ミョ'], + ['rya', 'リャ'], + ['ryu', 'リュ'], + ['ryo', 'リョ'], + ['fa', 'ファ'], + ['fi', 'フィ'], + ['fe', 'フェ'], + ['fo', 'フォ'], + ['fyu', 'フュ'], + ['fyo', 'フョ'], + ['fya', 'フャ'], + ['va', 'ヴァ'], + ['vi', 'ヴィ'], + ['vu', 'ヴ'], + ['ve', 'ヴェ'], + ['vo', 'ヴォ'], + ['she', 'シェ'], + ['che', 'チェ'], + ['je', 'ジェ'], + ['tsi', 'ツィ'], + ['tse', 'ツェ'], + ['tsa', 'ツァ'], + ['tso', 'ツォ'], + ['thi', 'ティ'], + ['thu', 'テュ'], + ['dhi', 'ディ'], + ['dhu', 'デュ'], + ['wi', 'ウィ'], + ['we', 'ウェ'], + ['wo', 'ウォ'], +]; + +const ROMANIZED_KANA_MONOGRAPHS: ReadonlyArray<[string, string]> = [ + ['a', 'ア'], + ['i', 'イ'], + ['u', 'ウ'], + ['e', 'エ'], + ['o', 'オ'], + ['ka', 'カ'], + ['ki', 'キ'], + ['ku', 'ク'], + ['ke', 'ケ'], + ['ko', 'コ'], + ['ga', 'ガ'], + ['gi', 'ギ'], + ['gu', 'グ'], + ['ge', 'ゲ'], + ['go', 'ゴ'], + ['sa', 'サ'], + ['shi', 'シ'], + ['si', 'シ'], + ['su', 'ス'], + ['se', 'セ'], + ['so', 'ソ'], + ['za', 'ザ'], + ['ji', 'ジ'], + ['zi', 'ジ'], + ['zu', 'ズ'], + ['ze', 'ゼ'], + ['zo', 'ゾ'], + ['ta', 'タ'], + ['chi', 'チ'], + ['ti', 'チ'], + ['tsu', 'ツ'], + ['tu', 'ツ'], + ['te', 'テ'], + ['to', 'ト'], + ['da', 'ダ'], + ['de', 'デ'], + ['do', 'ド'], + ['na', 'ナ'], + ['ni', 'ニ'], + ['nu', 'ヌ'], + ['ne', 'ネ'], + ['no', 'ノ'], + ['ha', 'ハ'], + ['hi', 'ヒ'], + ['fu', 'フ'], + ['hu', 'フ'], + ['he', 'ヘ'], + ['ho', 'ホ'], + ['ba', 'バ'], + ['bi', 'ビ'], + ['bu', 'ブ'], + ['be', 'ベ'], + ['bo', 'ボ'], + ['pa', 'パ'], + ['pi', 'ピ'], + ['pu', 'プ'], + ['pe', 'ペ'], + ['po', 'ポ'], + ['ma', 'マ'], + ['mi', 'ミ'], + ['mu', 'ム'], + ['me', 'メ'], + ['mo', 'モ'], + ['ya', 'ヤ'], + ['yu', 'ユ'], + ['yo', 'ヨ'], + ['ra', 'ラ'], + ['ri', 'リ'], + ['ru', 'ル'], + ['re', 'レ'], + ['ro', 'ロ'], + ['wa', 'ワ'], + ['w', 'ウ'], + ['wo', 'ヲ'], + ['n', 'ン'], +]; + +function romanizedTokenToKatakana(token: string): string | null { + const normalized = normalizeRomanizedName(token).replace(/\s+/g, ''); + if (!normalized || !/^[a-z]+$/.test(normalized)) { + return null; + } + + let output = ''; + for (let i = 0; i < normalized.length; ) { + const current = normalized[i]!; + const next = normalized[i + 1] ?? ''; + + if ( + i + 1 < normalized.length && + current === next && + current !== 'n' && + !'aeiou'.includes(current) + ) { + output += 'ッ'; + i += 1; + continue; + } + + if (current === 'n' && next.length > 0 && next !== 'y' && !'aeiou'.includes(next)) { + output += 'ン'; + i += 1; + continue; + } + + const digraph = ROMANIZED_KANA_DIGRAPHS.find(([romaji]) => normalized.startsWith(romaji, i)); + if (digraph) { + output += digraph[1]; + i += digraph[0].length; + continue; + } + + const monograph = ROMANIZED_KANA_MONOGRAPHS.find(([romaji]) => + normalized.startsWith(romaji, i), + ); + if (monograph) { + output += monograph[1]; + i += monograph[0].length; + continue; + } + + return null; + } + + return output.length > 0 ? output : null; +} + +export function buildReadingFromRomanized(value: string): string { + const katakana = romanizedTokenToKatakana(value); + return katakana ? katakanaToHiragana(katakana) : ''; +} + +function buildReadingFromHint(value: string): string { + return buildReading(value) || buildReadingFromRomanized(value); +} + +function scoreJapaneseNamePartLength(length: number): number { + if (length === 2) return 3; + if (length === 1 || length === 3) return 2; + if (length === 4) return 1; + return 0; +} + +function inferJapaneseNameSplitIndex( + nameOriginal: string, + firstNameHint: string, + lastNameHint: string, +): number | null { + const chars = [...nameOriginal]; + if (chars.length < 2) return null; + + const familyHintLength = [...buildReadingFromHint(lastNameHint)].length; + const givenHintLength = [...buildReadingFromHint(firstNameHint)].length; + const totalHintLength = familyHintLength + givenHintLength; + const defaultBoundary = Math.round(chars.length / 2); + let bestIndex: number | null = null; + let bestScore = Number.NEGATIVE_INFINITY; + + for (let index = 1; index < chars.length; index += 1) { + const familyLength = index; + const givenLength = chars.length - index; + let score = + scoreJapaneseNamePartLength(familyLength) + scoreJapaneseNamePartLength(givenLength); + + if (chars.length >= 4 && familyLength >= 2 && givenLength >= 2) { + score += 1; + } + + if (totalHintLength > 0) { + const expectedFamilyLength = (chars.length * familyHintLength) / totalHintLength; + score -= Math.abs(familyLength - expectedFamilyLength) * 1.5; + } else { + score -= Math.abs(familyLength - defaultBoundary) * 0.5; + } + + if (familyLength === givenLength) { + score += 0.25; + } + + if (score > bestScore) { + bestScore = score; + bestIndex = index; + } + } + + return bestIndex; +} + +export function addRomanizedKanaAliases(values: Iterable): string[] { + const aliases = new Set(); + for (const value of values) { + const trimmed = value.trim(); + if (!trimmed || !isRomanizedName(trimmed)) continue; + const katakana = romanizedTokenToKatakana(trimmed); + if (katakana) { + aliases.add(katakana); + } + } + return [...aliases]; +} + +export function splitJapaneseName( + nameOriginal: string, + firstNameHint?: string, + lastNameHint?: string, +): JapaneseNameParts { + const trimmed = nameOriginal.trim(); + if (!trimmed) { + return { + hasSpace: false, + original: '', + combined: '', + family: null, + given: null, + }; + } + + const normalizedSpace = trimmed.replace(/[\s\u3000]+/g, ' ').trim(); + const spaceParts = normalizedSpace.split(' ').filter((part) => part.length > 0); + if (spaceParts.length === 2) { + const family = spaceParts[0]!; + const given = spaceParts[1]!; + return { + hasSpace: true, + original: normalizedSpace, + combined: `${family}${given}`, + family, + given, + }; + } + + const middleDotParts = trimmed + .split(/[・・·•]/) + .map((part) => part.trim()) + .filter((part) => part.length > 0); + if (middleDotParts.length === 2) { + const family = middleDotParts[0]!; + const given = middleDotParts[1]!; + return { + hasSpace: true, + original: trimmed, + combined: `${family}${given}`, + family, + given, + }; + } + + const hintedFirst = firstNameHint?.trim() || ''; + const hintedLast = lastNameHint?.trim() || ''; + if (hintedFirst && hintedLast) { + const familyGiven = `${hintedLast}${hintedFirst}`; + if (trimmed === familyGiven) { + return { + hasSpace: true, + original: trimmed, + combined: familyGiven, + family: hintedLast, + given: hintedFirst, + }; + } + + const givenFamily = `${hintedFirst}${hintedLast}`; + if (trimmed === givenFamily) { + return { + hasSpace: true, + original: trimmed, + combined: givenFamily, + family: hintedFirst, + given: hintedLast, + }; + } + } + + if (hintedFirst && hintedLast && containsKanji(trimmed)) { + const splitIndex = inferJapaneseNameSplitIndex(trimmed, hintedFirst, hintedLast); + if (splitIndex != null) { + const chars = [...trimmed]; + const family = chars.slice(0, splitIndex).join(''); + const given = chars.slice(splitIndex).join(''); + if (family && given) { + return { + hasSpace: true, + original: trimmed, + combined: trimmed, + family, + given, + }; + } + } + } + + return { + hasSpace: false, + original: trimmed, + combined: trimmed, + family: null, + given: null, + }; +} + +export function generateNameReadings( + nameOriginal: string, + romanizedName: string, + firstNameHint?: string, + lastNameHint?: string, +): NameReadings { + const trimmed = nameOriginal.trim(); + if (!trimmed) { + return { + hasSpace: false, + original: '', + full: '', + family: '', + given: '', + }; + } + + const nameParts = splitJapaneseName(trimmed, firstNameHint, lastNameHint); + if (!nameParts.hasSpace || !nameParts.family || !nameParts.given) { + const full = containsKanji(trimmed) + ? buildReadingFromRomanized(romanizedName) + : buildReading(trimmed); + return { + hasSpace: false, + original: trimmed, + full, + family: full, + given: full, + }; + } + + const romanizedParts = romanizedName + .trim() + .split(/\s+/) + .filter((part) => part.length > 0); + const familyFromHints = buildReadingFromHint(lastNameHint || ''); + const givenFromHints = buildReadingFromHint(firstNameHint || ''); + const familyRomajiFallback = romanizedParts[0] || ''; + const givenRomajiFallback = romanizedParts.slice(1).join(' '); + const family = + familyFromHints || + (containsKanji(nameParts.family) + ? buildReadingFromRomanized(familyRomajiFallback) + : buildReading(nameParts.family)); + const given = + givenFromHints || + (containsKanji(nameParts.given) + ? buildReadingFromRomanized(givenRomajiFallback) + : buildReading(nameParts.given)); + const full = + `${family}${given}` || buildReading(trimmed) || buildReadingFromRomanized(romanizedName); + + return { + hasSpace: true, + original: nameParts.original, + full, + family, + given, + }; +} + +export function buildHonorificAliases(value: string): string[] { + return HONORIFIC_SUFFIXES.map((suffix) => `${value}${suffix.term}`); +} diff --git a/src/main/character-dictionary-runtime/snapshot.ts b/src/main/character-dictionary-runtime/snapshot.ts new file mode 100644 index 0000000..a2e6a5a --- /dev/null +++ b/src/main/character-dictionary-runtime/snapshot.ts @@ -0,0 +1,144 @@ +import type { AnilistCharacterDictionaryCollapsibleSectionKey } from '../../types'; +import { CHARACTER_DICTIONARY_FORMAT_VERSION } from './constants'; +import { createDefinitionGlossary } from './glossary'; +import { generateNameReadings, splitJapaneseName } from './name-reading'; +import { buildNameTerms, buildReadingForTerm, buildTermEntry } from './term-building'; +import type { + CharacterDictionaryGlossaryEntry, + CharacterDictionarySnapshot, + CharacterDictionarySnapshotImage, + CharacterDictionaryTermEntry, + CharacterRecord, +} from './types'; + +export function buildSnapshotImagePath(mediaId: number, charId: number, ext: string): string { + return `img/m${mediaId}-c${charId}.${ext}`; +} + +export function buildVaImagePath(mediaId: number, vaId: number, ext: string): string { + return `img/m${mediaId}-va${vaId}.${ext}`; +} + +export function buildSnapshotFromCharacters( + mediaId: number, + mediaTitle: string, + characters: CharacterRecord[], + imagesByCharacterId: Map, + imagesByVaId: Map, + updatedAt: number, + getCollapsibleSectionOpenState: ( + section: AnilistCharacterDictionaryCollapsibleSectionKey, + ) => boolean, +): CharacterDictionarySnapshot { + const termEntries: CharacterDictionaryTermEntry[] = []; + + for (const character of characters) { + const seenTerms = new Set(); + const imagePath = imagesByCharacterId.get(character.id)?.path ?? null; + const vaImagePaths = new Map(); + for (const va of character.voiceActors) { + const vaImg = imagesByVaId.get(va.id); + if (vaImg) vaImagePaths.set(va.id, vaImg.path); + } + const glossary = createDefinitionGlossary( + character, + mediaTitle, + imagePath, + vaImagePaths, + getCollapsibleSectionOpenState, + ); + const candidateTerms = buildNameTerms(character); + const nameParts = splitJapaneseName( + character.nativeName, + character.firstNameHint, + character.lastNameHint, + ); + const readings = generateNameReadings( + character.nativeName, + character.fullName, + character.firstNameHint, + character.lastNameHint, + ); + for (const term of candidateTerms) { + if (seenTerms.has(term)) continue; + seenTerms.add(term); + const reading = buildReadingForTerm(term, character, readings, nameParts); + termEntries.push(buildTermEntry(term, reading, character.role, glossary)); + } + } + + if (termEntries.length === 0) { + throw new Error('No dictionary entries generated from AniList character data.'); + } + + return { + formatVersion: CHARACTER_DICTIONARY_FORMAT_VERSION, + mediaId, + mediaTitle, + entryCount: termEntries.length, + updatedAt, + termEntries, + images: [...imagesByCharacterId.values(), ...imagesByVaId.values()], + }; +} + +function getCollapsibleSectionKeyFromTitle( + title: string, +): AnilistCharacterDictionaryCollapsibleSectionKey | null { + if (title === 'Description') return 'description'; + if (title === 'Character Information') return 'characterInformation'; + if (title === 'Voiced by') return 'voicedBy'; + return null; +} + +function applyCollapsibleOpenStatesToStructuredValue( + value: unknown, + getCollapsibleSectionOpenState: ( + section: AnilistCharacterDictionaryCollapsibleSectionKey, + ) => boolean, +): unknown { + if (Array.isArray(value)) { + return value.map((item) => + applyCollapsibleOpenStatesToStructuredValue(item, getCollapsibleSectionOpenState), + ); + } + if (!value || typeof value !== 'object') { + return value; + } + + const record = value as Record; + const next: Record = {}; + for (const [key, child] of Object.entries(record)) { + next[key] = applyCollapsibleOpenStatesToStructuredValue(child, getCollapsibleSectionOpenState); + } + + if (record.tag === 'details') { + const content = Array.isArray(record.content) ? record.content : []; + const summary = content[0]; + if (summary && typeof summary === 'object' && !Array.isArray(summary)) { + const summaryContent = (summary as Record).content; + if (typeof summaryContent === 'string') { + const section = getCollapsibleSectionKeyFromTitle(summaryContent); + if (section) { + next.open = getCollapsibleSectionOpenState(section); + } + } + } + } + + return next; +} + +export function applyCollapsibleOpenStatesToTermEntries( + termEntries: CharacterDictionaryTermEntry[], + getCollapsibleSectionOpenState: ( + section: AnilistCharacterDictionaryCollapsibleSectionKey, + ) => boolean, +): CharacterDictionaryTermEntry[] { + return termEntries.map((entry) => { + const glossary = entry[5].map((item) => + applyCollapsibleOpenStatesToStructuredValue(item, getCollapsibleSectionOpenState), + ) as CharacterDictionaryGlossaryEntry[]; + return [...entry.slice(0, 5), glossary, ...entry.slice(6)] as CharacterDictionaryTermEntry; + }); +} diff --git a/src/main/character-dictionary-runtime/term-building.ts b/src/main/character-dictionary-runtime/term-building.ts new file mode 100644 index 0000000..8b776c5 --- /dev/null +++ b/src/main/character-dictionary-runtime/term-building.ts @@ -0,0 +1,170 @@ +import { HONORIFIC_SUFFIXES } from './constants'; +import { + addRomanizedKanaAliases, + buildReading, + buildReadingFromRomanized, + hasKanaOnly, + isRomanizedName, + splitJapaneseName, +} from './name-reading'; +import type { + CharacterDictionaryGlossaryEntry, + CharacterDictionaryRole, + CharacterDictionaryTermEntry, + CharacterRecord, + JapaneseNameParts, + NameReadings, +} from './types'; + +function expandRawNameVariants(rawName: string): string[] { + const trimmed = rawName.trim(); + if (!trimmed) return []; + + const variants = new Set([trimmed]); + const outer = trimmed + .replace(/[((][^()()]+[))]/g, ' ') + .replace(/\s+/g, ' ') + .trim(); + if (outer && outer !== trimmed) { + variants.add(outer); + } + + for (const match of trimmed.matchAll(/[((]([^()()]+)[))]/g)) { + const inner = match[1]?.trim() || ''; + if (inner) { + variants.add(inner); + } + } + + return [...variants]; +} + +export function buildNameTerms(character: CharacterRecord): string[] { + const base = new Set(); + const rawNames = [character.nativeName, character.fullName, ...character.alternativeNames]; + for (const rawName of rawNames) { + for (const name of expandRawNameVariants(rawName)) { + base.add(name); + + const compact = name.replace(/[\s\u3000]+/g, ''); + if (compact && compact !== name) { + base.add(compact); + } + + const noMiddleDots = compact.replace(/[・・·•]/g, ''); + if (noMiddleDots && noMiddleDots !== compact) { + base.add(noMiddleDots); + } + + const split = name.split(/[\s\u3000]+/).filter((part) => part.trim().length > 0); + if (split.length === 2) { + base.add(split[0]!); + base.add(split[1]!); + } + + const splitByMiddleDot = name + .split(/[・・·•]/) + .map((part) => part.trim()) + .filter((part) => part.length > 0); + if (splitByMiddleDot.length >= 2) { + for (const part of splitByMiddleDot) { + base.add(part); + } + } + } + } + + const nativeParts = splitJapaneseName( + character.nativeName, + character.firstNameHint, + character.lastNameHint, + ); + if (nativeParts.family) { + base.add(nativeParts.family); + } + if (nativeParts.given) { + base.add(nativeParts.given); + } + + const withHonorifics = new Set(); + for (const entry of base) { + withHonorifics.add(entry); + for (const suffix of HONORIFIC_SUFFIXES) { + withHonorifics.add(`${entry}${suffix.term}`); + } + } + + for (const alias of addRomanizedKanaAliases(withHonorifics)) { + withHonorifics.add(alias); + for (const suffix of HONORIFIC_SUFFIXES) { + withHonorifics.add(`${alias}${suffix.term}`); + } + } + + return [...withHonorifics].filter((entry) => entry.trim().length > 0); +} + +export function buildReadingForTerm( + term: string, + character: CharacterRecord, + readings: NameReadings, + nameParts: JapaneseNameParts, +): string { + for (const suffix of HONORIFIC_SUFFIXES) { + if (term.endsWith(suffix.term) && term.length > suffix.term.length) { + const baseTerm = term.slice(0, -suffix.term.length); + const baseReading = buildReadingForTerm(baseTerm, character, readings, nameParts); + return baseReading ? `${baseReading}${suffix.reading}` : ''; + } + } + + const compactNative = character.nativeName.replace(/[\s\u3000]+/g, ''); + const noMiddleDotsNative = compactNative.replace(/[・・·•]/g, ''); + if ( + term === character.nativeName || + term === compactNative || + term === noMiddleDotsNative || + term === nameParts.original || + term === nameParts.combined + ) { + return readings.full; + } + + const familyCompact = nameParts.family?.replace(/[・・·•]/g, '') || ''; + if (nameParts.family && (term === nameParts.family || term === familyCompact)) { + return readings.family; + } + + const givenCompact = nameParts.given?.replace(/[・・·•]/g, '') || ''; + if (nameParts.given && (term === nameParts.given || term === givenCompact)) { + return readings.given; + } + + const compact = term.replace(/[\s\u3000]+/g, ''); + if (hasKanaOnly(compact)) { + return buildReading(compact); + } + + if (isRomanizedName(term)) { + return buildReadingFromRomanized(term) || readings.full; + } + + return ''; +} + +function roleInfo(role: CharacterDictionaryRole): { tag: string; score: number } { + if (role === 'main') return { tag: 'main', score: 100 }; + if (role === 'primary') return { tag: 'primary', score: 75 }; + if (role === 'side') return { tag: 'side', score: 50 }; + return { tag: 'appears', score: 25 }; +} + +export function buildTermEntry( + term: string, + reading: string, + role: CharacterDictionaryRole, + glossary: CharacterDictionaryGlossaryEntry[], +): CharacterDictionaryTermEntry { + const { tag, score } = roleInfo(role); + return [term, reading, `name ${tag}`, '', score, glossary, 0, '']; +} diff --git a/src/main/character-dictionary-runtime/types.ts b/src/main/character-dictionary-runtime/types.ts new file mode 100644 index 0000000..81b057d --- /dev/null +++ b/src/main/character-dictionary-runtime/types.ts @@ -0,0 +1,136 @@ +import type { AnilistMediaGuess } from '../../core/services/anilist/anilist-updater'; +import type { AnilistCharacterDictionaryCollapsibleSectionKey } from '../../types'; + +export type CharacterDictionaryRole = 'main' | 'primary' | 'side' | 'appears'; + +export type CharacterDictionaryGlossaryEntry = string | Record; + +export type CharacterDictionaryTermEntry = [ + string, + string, + string, + string, + number, + CharacterDictionaryGlossaryEntry[], + number, + string, +]; + +export type CharacterDictionarySnapshotImage = { + path: string; + dataBase64: string; +}; + +export type CharacterBirthday = [number, number]; + +export type JapaneseNameParts = { + hasSpace: boolean; + original: string; + combined: string; + family: string | null; + given: string | null; +}; + +export type NameReadings = { + hasSpace: boolean; + original: string; + full: string; + family: string; + given: string; +}; + +export type CharacterDictionarySnapshot = { + formatVersion: number; + mediaId: number; + mediaTitle: string; + entryCount: number; + updatedAt: number; + termEntries: CharacterDictionaryTermEntry[]; + images: CharacterDictionarySnapshotImage[]; +}; + +export type VoiceActorRecord = { + id: number; + fullName: string; + nativeName: string; + imageUrl: string | null; +}; + +export type CharacterRecord = { + id: number; + role: CharacterDictionaryRole; + firstNameHint: string; + fullName: string; + lastNameHint: string; + nativeName: string; + alternativeNames: string[]; + bloodType: string; + birthday: CharacterBirthday | null; + description: string; + imageUrl: string | null; + age: string; + sex: string; + voiceActors: VoiceActorRecord[]; +}; + +export type CharacterDictionaryBuildResult = { + zipPath: string; + fromCache: boolean; + mediaId: number; + mediaTitle: string; + entryCount: number; + dictionaryTitle?: string; + revision?: string; +}; + +export type CharacterDictionaryGenerateOptions = { + refreshTtlMs?: number; +}; + +export type CharacterDictionarySnapshotResult = { + mediaId: number; + mediaTitle: string; + entryCount: number; + fromCache: boolean; + updatedAt: number; +}; + +export type CharacterDictionarySnapshotProgress = { + mediaId: number; + mediaTitle: string; +}; + +export type CharacterDictionarySnapshotProgressCallbacks = { + onChecking?: (progress: CharacterDictionarySnapshotProgress) => void; + onGenerating?: (progress: CharacterDictionarySnapshotProgress) => void; +}; + +export type MergedCharacterDictionaryBuildResult = { + zipPath: string; + revision: string; + dictionaryTitle: string; + entryCount: number; +}; + +export interface CharacterDictionaryRuntimeDeps { + userDataPath: string; + getCurrentMediaPath: () => string | null; + getCurrentMediaTitle: () => string | null; + resolveMediaPathForJimaku: (mediaPath: string | null) => string | null; + guessAnilistMediaInfo: ( + mediaPath: string | null, + mediaTitle: string | null, + ) => Promise; + now: () => number; + sleep?: (ms: number) => Promise; + logInfo?: (message: string) => void; + logWarn?: (message: string) => void; + getCollapsibleSectionOpenState?: ( + section: AnilistCharacterDictionaryCollapsibleSectionKey, + ) => boolean; +} + +export type ResolvedAniListMedia = { + id: number; + title: string; +}; diff --git a/src/main/character-dictionary-runtime/zip.ts b/src/main/character-dictionary-runtime/zip.ts new file mode 100644 index 0000000..5b31ce8 --- /dev/null +++ b/src/main/character-dictionary-runtime/zip.ts @@ -0,0 +1,222 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { ensureDir } from './fs-utils'; +import type { CharacterDictionarySnapshotImage, CharacterDictionaryTermEntry } from './types'; + +type ZipEntry = { + name: string; + data: Buffer; + crc32: number; + localHeaderOffset: number; +}; + +function writeUint32LE(buffer: Buffer, value: number, offset: number): number { + const normalized = value >>> 0; + buffer[offset] = normalized & 0xff; + buffer[offset + 1] = (normalized >>> 8) & 0xff; + buffer[offset + 2] = (normalized >>> 16) & 0xff; + buffer[offset + 3] = (normalized >>> 24) & 0xff; + return offset + 4; +} + +export function buildDictionaryTitle(mediaId: number): string { + return `SubMiner Character Dictionary (AniList ${mediaId})`; +} + +function createIndex( + dictionaryTitle: string, + description: string, + revision: string, +): Record { + return { + title: dictionaryTitle, + revision, + format: 3, + author: 'SubMiner', + description, + }; +} + +function createTagBank(): Array<[string, string, number, string, number]> { + return [ + ['name', 'partOfSpeech', 0, 'Character name', 0], + ['main', 'name', 0, 'Protagonist', 0], + ['primary', 'name', 0, 'Main character', 0], + ['side', 'name', 0, 'Side character', 0], + ['appears', 'name', 0, 'Minor appearance', 0], + ]; +} + +const CRC32_TABLE = (() => { + const table = new Uint32Array(256); + for (let i = 0; i < 256; i += 1) { + let crc = i; + for (let j = 0; j < 8; j += 1) { + crc = (crc & 1) !== 0 ? 0xedb88320 ^ (crc >>> 1) : crc >>> 1; + } + table[i] = crc >>> 0; + } + return table; +})(); + +function crc32(data: Buffer): number { + let crc = 0xffffffff; + for (const byte of data) { + crc = CRC32_TABLE[(crc ^ byte) & 0xff]! ^ (crc >>> 8); + } + return (crc ^ 0xffffffff) >>> 0; +} + +function createStoredZip(files: Array<{ name: string; data: Buffer }>): Buffer { + const chunks: Buffer[] = []; + const entries: ZipEntry[] = []; + let offset = 0; + + for (const file of files) { + const fileName = Buffer.from(file.name, 'utf8'); + const fileData = file.data; + const fileCrc32 = crc32(fileData); + const local = Buffer.alloc(30 + fileName.length); + let cursor = 0; + writeUint32LE(local, 0x04034b50, cursor); + cursor += 4; + local.writeUInt16LE(20, cursor); + cursor += 2; + local.writeUInt16LE(0, cursor); + cursor += 2; + local.writeUInt16LE(0, cursor); + cursor += 2; + local.writeUInt16LE(0, cursor); + cursor += 2; + local.writeUInt16LE(0, cursor); + cursor += 2; + writeUint32LE(local, fileCrc32, cursor); + cursor += 4; + writeUint32LE(local, fileData.length, cursor); + cursor += 4; + writeUint32LE(local, fileData.length, cursor); + cursor += 4; + local.writeUInt16LE(fileName.length, cursor); + cursor += 2; + local.writeUInt16LE(0, cursor); + cursor += 2; + fileName.copy(local, cursor); + + chunks.push(local, fileData); + entries.push({ + name: file.name, + data: fileData, + crc32: fileCrc32, + localHeaderOffset: offset, + }); + offset += local.length + fileData.length; + } + + const centralStart = offset; + const centralChunks: Buffer[] = []; + for (const entry of entries) { + const fileName = Buffer.from(entry.name, 'utf8'); + const central = Buffer.alloc(46 + fileName.length); + let cursor = 0; + writeUint32LE(central, 0x02014b50, cursor); + cursor += 4; + central.writeUInt16LE(20, cursor); + cursor += 2; + central.writeUInt16LE(20, cursor); + cursor += 2; + central.writeUInt16LE(0, cursor); + cursor += 2; + central.writeUInt16LE(0, cursor); + cursor += 2; + central.writeUInt16LE(0, cursor); + cursor += 2; + central.writeUInt16LE(0, cursor); + cursor += 2; + writeUint32LE(central, entry.crc32, cursor); + cursor += 4; + writeUint32LE(central, entry.data.length, cursor); + cursor += 4; + writeUint32LE(central, entry.data.length, cursor); + cursor += 4; + central.writeUInt16LE(fileName.length, cursor); + cursor += 2; + central.writeUInt16LE(0, cursor); + cursor += 2; + central.writeUInt16LE(0, cursor); + cursor += 2; + central.writeUInt16LE(0, cursor); + cursor += 2; + central.writeUInt16LE(0, cursor); + cursor += 2; + writeUint32LE(central, 0, cursor); + cursor += 4; + writeUint32LE(central, entry.localHeaderOffset, cursor); + cursor += 4; + fileName.copy(central, cursor); + centralChunks.push(central); + offset += central.length; + } + + const centralSize = offset - centralStart; + const end = Buffer.alloc(22); + let cursor = 0; + writeUint32LE(end, 0x06054b50, cursor); + cursor += 4; + end.writeUInt16LE(0, cursor); + cursor += 2; + end.writeUInt16LE(0, cursor); + cursor += 2; + end.writeUInt16LE(entries.length, cursor); + cursor += 2; + end.writeUInt16LE(entries.length, cursor); + cursor += 2; + writeUint32LE(end, centralSize, cursor); + cursor += 4; + writeUint32LE(end, centralStart, cursor); + cursor += 4; + end.writeUInt16LE(0, cursor); + + return Buffer.concat([...chunks, ...centralChunks, end]); +} + +export function buildDictionaryZip( + outputPath: string, + dictionaryTitle: string, + description: string, + revision: string, + termEntries: CharacterDictionaryTermEntry[], + images: CharacterDictionarySnapshotImage[], +): { zipPath: string; entryCount: number } { + const zipFiles: Array<{ name: string; data: Buffer }> = [ + { + name: 'index.json', + data: Buffer.from( + JSON.stringify(createIndex(dictionaryTitle, description, revision), null, 2), + 'utf8', + ), + }, + { + name: 'tag_bank_1.json', + data: Buffer.from(JSON.stringify(createTagBank()), 'utf8'), + }, + ]; + + for (const image of images) { + zipFiles.push({ + name: image.path, + data: Buffer.from(image.dataBase64, 'base64'), + }); + } + + const entriesPerBank = 10_000; + for (let i = 0; i < termEntries.length; i += entriesPerBank) { + zipFiles.push({ + name: `term_bank_${Math.floor(i / entriesPerBank) + 1}.json`, + data: Buffer.from(JSON.stringify(termEntries.slice(i, i + entriesPerBank)), 'utf8'), + }); + } + + ensureDir(path.dirname(outputPath)); + fs.writeFileSync(outputPath, createStoredZip(zipFiles)); + return { zipPath: outputPath, entryCount: termEntries.length }; +}