feat(dictionary): structured glossary with voice actors and collapsible sections

- Add voice actor data (Japanese VA) from AniList GraphQL query
- Redesign glossary entries as structured-content with role badges,
  collapsible Description/Character Information/Voiced-by sections
- Parse __Key:__ fields from AniList descriptions into structured info
- Download and embed voice actor images alongside character images
- Bump format version to 12
This commit is contained in:
2026-03-06 16:41:58 -08:00
parent 82bec02a36
commit 5ff4cc21bd
2 changed files with 326 additions and 59 deletions

View File

@@ -111,7 +111,7 @@ test('generateForCurrentMedia emits structured-content glossary so image stays w
node: { node: {
id: 123, id: 123,
description: description:
'__Race:__ Human Alexia Midgar is the second princess of the Kingdom of Midgar.', '__Race:__ Human\nAlexia Midgar is the second princess of the Kingdom of Midgar.',
image: { image: {
large: 'https://example.com/alexia.png', large: 'https://example.com/alexia.png',
medium: null, medium: null,
@@ -171,22 +171,55 @@ test('generateForCurrentMedia emits structured-content glossary so image stays w
const entry = glossary[0] as { const entry = glossary[0] as {
type: string; type: string;
content: unknown[]; content: { tag: string; content: Array<Record<string, unknown>> };
}; };
assert.equal(entry.type, 'structured-content'); assert.equal(entry.type, 'structured-content');
assert.equal(Array.isArray(entry.content), true);
const image = entry.content[0] as Record<string, unknown>; const wrapper = entry.content;
assert.equal(wrapper.tag, 'div');
const children = wrapper.content;
const nameDiv = children[0] as { tag: string; content: string };
assert.equal(nameDiv.tag, 'div');
assert.equal(nameDiv.content, 'アレクシア・ミドガル');
const secondaryNameDiv = children[1] as { tag: string; content: string };
assert.equal(secondaryNameDiv.tag, 'div');
assert.equal(secondaryNameDiv.content, 'Alexia Midgar');
const imageWrap = children[2] as { tag: string; content: Record<string, unknown> };
assert.equal(imageWrap.tag, 'div');
const image = imageWrap.content as Record<string, unknown>;
assert.equal(image.tag, 'img'); assert.equal(image.tag, 'img');
assert.equal(image.path, 'img/m130298-c123.png'); assert.equal(image.path, 'img/m130298-c123.png');
assert.equal(image.sizeUnits, 'em'); assert.equal(image.sizeUnits, 'em');
const descriptionLine = entry.content[5]; const sourceDiv = children[3] as { tag: string; content: string };
assert.equal( assert.equal(sourceDiv.tag, 'div');
descriptionLine, assert.ok(sourceDiv.content.includes('The Eminence in Shadow'));
'Race: Human Alexia Midgar is the second princess of the Kingdom of Midgar.',
const roleBadgeDiv = children[4] as { tag: string; content: Record<string, unknown> };
assert.equal(roleBadgeDiv.tag, 'div');
const badge = roleBadgeDiv.content as { tag: string; content: string };
assert.equal(badge.tag, 'span');
assert.equal(badge.content, 'Side Character');
const descSection = children.find(
(c) => (c as { tag?: string }).tag === 'details' && Array.isArray((c as { content?: unknown[] }).content) &&
((c as { content: Array<{ content?: string }> }).content[0]?.content === 'Description'),
) as { tag: string; content: Array<Record<string, unknown>> } | undefined;
assert.ok(descSection, 'expected Description collapsible section');
const descBody = descSection.content[1] as { content: string };
assert.ok(
descBody.content.includes('Alexia Midgar is the second princess of the Kingdom of Midgar.'),
); );
const infoSection = children.find(
(c) => (c as { tag?: string }).tag === 'details' && Array.isArray((c as { content?: unknown[] }).content) &&
((c as { content: Array<{ content?: string }> }).content[0]?.content === 'Character Information'),
) as { tag: string; content: Array<Record<string, unknown>> } | undefined;
assert.ok(infoSection, 'expected Character Information collapsible section with parsed __Race:__ field');
const topLevelImageGlossaryEntry = glossary.find( const topLevelImageGlossaryEntry = glossary.find(
(item) => typeof item === 'object' && item !== null && (item as { type?: string }).type === 'image', (item) => typeof item === 'object' && item !== null && (item as { type?: string }).type === 'image',
); );
@@ -693,7 +726,7 @@ test('generateForCurrentMedia logs progress while resolving and rebuilding snaps
'[dictionary] AniList match: The Eminence in Shadow -> AniList 130298', '[dictionary] AniList match: The Eminence in Shadow -> AniList 130298',
'[dictionary] snapshot miss for AniList 130298, fetching characters', '[dictionary] snapshot miss for AniList 130298, fetching characters',
'[dictionary] downloaded AniList character page 1 for AniList 130298', '[dictionary] downloaded AniList character page 1 for AniList 130298',
'[dictionary] downloading 1 character images for AniList 130298', '[dictionary] downloading 1 images for AniList 130298',
'[dictionary] stored snapshot for AniList 130298: 32 terms', '[dictionary] stored snapshot for AniList 130298: 32 terms',
'[dictionary] building ZIP for AniList 130298', '[dictionary] building ZIP for AniList 130298',
'[dictionary] generated AniList 130298: 32 terms -> ' + '[dictionary] generated AniList 130298: 32 terms -> ' +

View File

@@ -54,7 +54,7 @@ export type CharacterDictionarySnapshot = {
images: CharacterDictionarySnapshotImage[]; images: CharacterDictionarySnapshotImage[];
}; };
const CHARACTER_DICTIONARY_FORMAT_VERSION = 10; const CHARACTER_DICTIONARY_FORMAT_VERSION = 12;
const CHARACTER_DICTIONARY_MERGED_TITLE = 'SubMiner Character Dictionary'; const CHARACTER_DICTIONARY_MERGED_TITLE = 'SubMiner Character Dictionary';
type AniListSearchResponse = { type AniListSearchResponse = {
@@ -84,6 +84,17 @@ type AniListCharacterPageResponse = {
}; };
edges?: Array<{ edges?: Array<{
role?: string | null; role?: string | null;
voiceActors?: Array<{
id: number;
name?: {
full?: string | null;
native?: string | null;
} | null;
image?: {
large?: string | null;
medium?: string | null;
} | null;
}> | null;
node?: { node?: {
id: number; id: number;
description?: string | null; description?: string | null;
@@ -101,6 +112,13 @@ type AniListCharacterPageResponse = {
} | null; } | null;
}; };
type VoiceActorRecord = {
id: number;
fullName: string;
nativeName: string;
imageUrl: string | null;
};
type CharacterRecord = { type CharacterRecord = {
id: number; id: number;
role: CharacterDictionaryRole; role: CharacterDictionaryRole;
@@ -108,6 +126,7 @@ type CharacterRecord = {
nativeName: string; nativeName: string;
description: string; description: string;
imageUrl: string | null; imageUrl: string | null;
voiceActors: VoiceActorRecord[];
}; };
type ZipEntry = { type ZipEntry = {
@@ -531,14 +550,34 @@ function buildNameTerms(character: CharacterRecord): string[] {
return [...withHonorifics].filter((entry) => entry.trim().length > 0); return [...withHonorifics].filter((entry) => entry.trim().length > 0);
} }
function stripDescription(value: string): string { function parseCharacterDescription(raw: string): {
return value.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim(); fields: Array<{ key: string; value: string }>;
} text: string;
} {
const cleaned = raw.replace(/<br\s*\/?>/gi, '\n').replace(/<[^>]+>/g, ' ');
const lines = cleaned.split(/\n/);
const fields: Array<{ key: string; value: string }> = [];
const textLines: string[] = [];
function normalizeDescription(value: string): string { for (const line of lines) {
const stripped = stripDescription(value); const trimmed = line.trim();
if (!stripped) return ''; if (!trimmed) continue;
return stripped const match = trimmed.match(/^__([^_]+):__\s*(.+)$/);
if (match) {
const value = match[2]!
.replace(/__([^_]+)__/g, '$1')
.replace(/\*\*([^*]+)\*\*/g, '$1')
.replace(/_([^_]+)_/g, '$1')
.replace(/\*([^*]+)\*/g, '$1')
.trim();
fields.push({ key: match[1]!.trim(), value });
} else {
textLines.push(trimmed);
}
}
const text = textLines
.join(' ')
.replace(/\[([^\]]+)\]\((https?:\/\/[^)\s]+)\)/g, '$1') .replace(/\[([^\]]+)\]\((https?:\/\/[^)\s]+)\)/g, '$1')
.replace(/https?:\/\/\S+/g, '') .replace(/https?:\/\/\S+/g, '')
.replace(/__([^_]+)__/g, '$1') .replace(/__([^_]+)__/g, '$1')
@@ -547,6 +586,8 @@ function normalizeDescription(value: string): string {
.replace(/!~/g, '') .replace(/!~/g, '')
.replace(/\s+/g, ' ') .replace(/\s+/g, ' ')
.trim(); .trim();
return { fields, text };
} }
function roleInfo(role: CharacterDictionaryRole): { tag: string; score: number } { function roleInfo(role: CharacterDictionaryRole): { tag: string; score: number } {
@@ -708,50 +749,191 @@ function writeSnapshot(snapshotPath: string, snapshot: CharacterDictionarySnapsh
fs.writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2), 'utf8'); fs.writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2), 'utf8');
} }
function roleBadgeStyle(role: CharacterDictionaryRole): Record<string, string> {
const base = { borderRadius: '4px', padding: '0.15em 0.5em', fontSize: '0.8em', fontWeight: 'bold', color: '#fff' };
if (role === 'main') return { ...base, backgroundColor: '#4a8c3f' };
if (role === 'primary') return { ...base, backgroundColor: '#5c82b0' };
if (role === 'side') return { ...base, backgroundColor: '#7889a0' };
return { ...base, backgroundColor: '#777' };
}
function buildCollapsibleSection(
title: string,
body: Array<string | Record<string, unknown>> | string | Record<string, unknown>,
): Record<string, unknown> {
return {
tag: 'details',
open: true,
style: { marginTop: '0.4em' },
content: [
{
tag: 'summary',
style: { fontWeight: 'bold', fontSize: '0.95em', cursor: 'pointer' },
content: title,
},
{
tag: 'div',
style: { padding: '0.25em 0 0 0.4em', fontSize: '0.9em' },
content: body,
},
],
};
}
function buildVoicedByContent(
voiceActors: VoiceActorRecord[],
vaImagePaths: Map<number, string>,
): Record<string, unknown> {
if (voiceActors.length === 1) {
const va = voiceActors[0]!;
const vaImgPath = vaImagePaths.get(va.id);
const vaLabel = va.nativeName
? va.fullName ? `${va.nativeName} (${va.fullName})` : va.nativeName
: va.fullName;
if (vaImgPath) {
return {
tag: 'table',
content: {
tag: 'tr',
content: [
{
tag: 'td',
style: { verticalAlign: 'top', padding: '0', paddingRight: '0.4em', borderWidth: '0' },
content: {
tag: 'img',
path: vaImgPath,
width: 3,
height: 3,
sizeUnits: 'em',
title: vaLabel,
alt: vaLabel,
collapsed: false,
collapsible: false,
background: true,
},
},
{
tag: 'td',
style: { verticalAlign: 'middle', padding: '0', borderWidth: '0' },
content: vaLabel,
},
],
},
};
}
return { tag: 'div', content: vaLabel };
}
const items: Array<Record<string, unknown>> = [];
for (const va of voiceActors) {
const vaLabel = va.nativeName
? va.fullName ? `${va.nativeName} (${va.fullName})` : va.nativeName
: va.fullName;
items.push({ tag: 'li', content: vaLabel });
}
return { tag: 'ul', style: { marginTop: '0.15em' }, content: items };
}
function createDefinitionGlossary( function createDefinitionGlossary(
character: CharacterRecord, character: CharacterRecord,
mediaTitle: string, mediaTitle: string,
imagePath: string | null, imagePath: string | null,
vaImagePaths: Map<number, string>,
): CharacterDictionaryGlossaryEntry[] { ): CharacterDictionaryGlossaryEntry[] {
const displayName = character.nativeName || character.fullName || `Character ${character.id}`; const displayName = character.nativeName || character.fullName || `Character ${character.id}`;
const lines: string[] = [`${displayName} [${roleLabel(character.role)}]`, `${mediaTitle} · AniList`]; const secondaryName =
character.nativeName &&
const description = normalizeDescription(character.description); character.fullName &&
if (description) { character.fullName !== character.nativeName
lines.push(description); ? character.fullName
} : null;
const { fields, text: descriptionText } = parseCharacterDescription(character.description);
if (!imagePath) {
return [lines.join('\n')];
}
const content: Array<string | Record<string, unknown>> = [ const content: Array<string | Record<string, unknown>> = [
{ {
tag: 'img', tag: 'div',
path: imagePath, style: { fontWeight: 'bold', fontSize: '1.1em', marginBottom: '0.1em' },
width: 8, content: displayName,
height: 11,
sizeUnits: 'em',
title: displayName,
alt: displayName,
description: `${displayName} · ${mediaTitle}`,
collapsed: false,
collapsible: false,
background: true,
}, },
]; ];
for (let i = 0; i < lines.length; i += 1) { if (secondaryName) {
if (i > 0) { content.push({
content.push({ tag: 'br' }); tag: 'div',
} style: { fontSize: '0.85em', fontStyle: 'italic', color: '#b0b0b0', marginBottom: '0.2em' },
content.push(lines[i]!); content: secondaryName,
});
}
if (imagePath) {
content.push({
tag: 'div',
style: { marginTop: '0.3em', marginBottom: '0.3em' },
content: {
tag: 'img',
path: imagePath,
width: 8,
height: 11,
sizeUnits: 'em',
title: displayName,
alt: displayName,
description: `${displayName} · ${mediaTitle}`,
collapsed: false,
collapsible: false,
background: true,
},
});
}
content.push({
tag: 'div',
style: { fontSize: '0.8em', color: '#999', marginBottom: '0.2em' },
content: `From: ${mediaTitle}`,
});
content.push({
tag: 'div',
style: { marginBottom: '0.15em' },
content: {
tag: 'span',
style: roleBadgeStyle(character.role),
content: `${roleLabel(character.role)} Character`,
},
});
if (descriptionText) {
content.push(buildCollapsibleSection('Description', descriptionText));
}
if (fields.length > 0) {
const fieldItems: Array<Record<string, unknown>> = fields.map((f) => ({
tag: 'li',
content: `${f.key}: ${f.value}`,
}));
content.push(
buildCollapsibleSection('Character Information', {
tag: 'ul',
style: { marginTop: '0.15em' },
content: fieldItems,
}),
);
}
if (character.voiceActors.length > 0) {
content.push(
buildCollapsibleSection(
'Voiced by',
buildVoicedByContent(character.voiceActors, vaImagePaths),
),
);
} }
return [ return [
{ {
type: 'structured-content', type: 'structured-content',
content, content: { tag: 'div', content },
}, },
]; ];
} }
@@ -760,6 +942,10 @@ function buildSnapshotImagePath(mediaId: number, charId: number, ext: string): s
return `img/m${mediaId}-c${charId}.${ext}`; return `img/m${mediaId}-c${charId}.${ext}`;
} }
function buildVaImagePath(mediaId: number, vaId: number, ext: string): string {
return `img/m${mediaId}-va${vaId}.${ext}`;
}
function buildTermEntry( function buildTermEntry(
term: string, term: string,
reading: string, reading: string,
@@ -998,6 +1184,16 @@ async function fetchCharactersForMedia(
} }
edges { edges {
role role
voiceActors(language: JAPANESE) {
id
name {
full
native
}
image {
medium
}
}
node { node {
id id
description(asHtml: false) description(asHtml: false)
@@ -1042,6 +1238,19 @@ async function fetchCharactersForMedia(
const fullName = node.name?.full?.trim() || ''; const fullName = node.name?.full?.trim() || '';
const nativeName = node.name?.native?.trim() || ''; const nativeName = node.name?.native?.trim() || '';
if (!fullName && !nativeName) continue; if (!fullName && !nativeName) continue;
const voiceActors: VoiceActorRecord[] = [];
for (const va of edge?.voiceActors ?? []) {
if (!va || typeof va.id !== 'number') continue;
const vaFull = va.name?.full?.trim() || '';
const vaNative = va.name?.native?.trim() || '';
if (!vaFull && !vaNative) continue;
voiceActors.push({
id: va.id,
fullName: vaFull,
nativeName: vaNative,
imageUrl: va.image?.medium || null,
});
}
characters.push({ characters.push({
id: node.id, id: node.id,
role: mapRole(edge?.role), role: mapRole(edge?.role),
@@ -1049,6 +1258,7 @@ async function fetchCharactersForMedia(
nativeName, nativeName,
description: node.description || '', description: node.description || '',
imageUrl: node.image?.large || node.image?.medium || null, imageUrl: node.image?.large || node.image?.medium || null,
voiceActors,
}); });
} }
@@ -1119,6 +1329,7 @@ function buildSnapshotFromCharacters(
mediaTitle: string, mediaTitle: string,
characters: CharacterRecord[], characters: CharacterRecord[],
imagesByCharacterId: Map<number, CharacterDictionarySnapshotImage>, imagesByCharacterId: Map<number, CharacterDictionarySnapshotImage>,
imagesByVaId: Map<number, CharacterDictionarySnapshotImage>,
updatedAt: number, updatedAt: number,
): CharacterDictionarySnapshot { ): CharacterDictionarySnapshot {
const termEntries: CharacterDictionaryTermEntry[] = []; const termEntries: CharacterDictionaryTermEntry[] = [];
@@ -1126,7 +1337,12 @@ function buildSnapshotFromCharacters(
for (const character of characters) { for (const character of characters) {
const imagePath = imagesByCharacterId.get(character.id)?.path ?? null; const imagePath = imagesByCharacterId.get(character.id)?.path ?? null;
const glossary = createDefinitionGlossary(character, mediaTitle, imagePath); const vaImagePaths = new Map<number, string>();
for (const va of character.voiceActors) {
const vaImg = imagesByVaId.get(va.id);
if (vaImg) vaImagePaths.set(va.id, vaImg.path);
}
const glossary = createDefinitionGlossary(character, mediaTitle, imagePath, vaImagePaths);
const candidateTerms = buildNameTerms(character); const candidateTerms = buildNameTerms(character);
for (const term of candidateTerms) { for (const term of candidateTerms) {
const reading = buildReading(term); const reading = buildReading(term);
@@ -1148,7 +1364,7 @@ function buildSnapshotFromCharacters(
entryCount: termEntries.length, entryCount: termEntries.length,
updatedAt, updatedAt,
termEntries, termEntries,
images: [...imagesByCharacterId.values()], images: [...imagesByCharacterId.values(), ...imagesByVaId.values()],
}; };
} }
@@ -1278,25 +1494,42 @@ export function createCharacterDictionaryRuntimeService(deps: CharacterDictionar
} }
const imagesByCharacterId = new Map<number, CharacterDictionarySnapshotImage>(); const imagesByCharacterId = new Map<number, CharacterDictionarySnapshotImage>();
const charactersWithImages = characters.filter((character) => Boolean(character.imageUrl)).length; const imagesByVaId = new Map<number, CharacterDictionarySnapshotImage>();
if (charactersWithImages > 0) { const allImageUrls: Array<{ id: number; url: string; kind: 'character' | 'va' }> = [];
for (const character of characters) {
if (character.imageUrl) {
allImageUrls.push({ id: character.id, url: character.imageUrl, kind: 'character' });
}
for (const va of character.voiceActors) {
if (va.imageUrl && !allImageUrls.some((u) => u.kind === 'va' && u.id === va.id)) {
allImageUrls.push({ id: va.id, url: va.imageUrl, kind: 'va' });
}
}
}
if (allImageUrls.length > 0) {
deps.logInfo?.( deps.logInfo?.(
`[dictionary] downloading ${charactersWithImages} character images for AniList ${mediaId}`, `[dictionary] downloading ${allImageUrls.length} images for AniList ${mediaId}`,
); );
} }
let hasAttemptedCharacterImageDownload = false; let hasAttemptedImageDownload = false;
for (const character of characters) { for (const entry of allImageUrls) {
if (!character.imageUrl) continue; if (hasAttemptedImageDownload) {
if (hasAttemptedCharacterImageDownload) {
await sleepMs(CHARACTER_IMAGE_DOWNLOAD_DELAY_MS); await sleepMs(CHARACTER_IMAGE_DOWNLOAD_DELAY_MS);
} }
hasAttemptedCharacterImageDownload = true; hasAttemptedImageDownload = true;
const image = await downloadCharacterImage(character.imageUrl, character.id); const image = await downloadCharacterImage(entry.url, entry.id);
if (!image) continue; if (!image) continue;
imagesByCharacterId.set(character.id, { if (entry.kind === 'character') {
path: buildSnapshotImagePath(mediaId, character.id, image.ext), imagesByCharacterId.set(entry.id, {
dataBase64: image.bytes.toString('base64'), path: buildSnapshotImagePath(mediaId, entry.id, image.ext),
}); dataBase64: image.bytes.toString('base64'),
});
} else {
imagesByVaId.set(entry.id, {
path: buildVaImagePath(mediaId, entry.id, image.ext),
dataBase64: image.bytes.toString('base64'),
});
}
} }
const snapshot = buildSnapshotFromCharacters( const snapshot = buildSnapshotFromCharacters(
@@ -1304,6 +1537,7 @@ export function createCharacterDictionaryRuntimeService(deps: CharacterDictionar
fetchedMediaTitle || mediaTitleHint || `AniList ${mediaId}`, fetchedMediaTitle || mediaTitleHint || `AniList ${mediaId}`,
characters, characters,
imagesByCharacterId, imagesByCharacterId,
imagesByVaId,
deps.now(), deps.now(),
); );
writeSnapshot(snapshotPath, snapshot); writeSnapshot(snapshotPath, snapshot);