mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-06-13 15:13:32 -07:00
Add inline character portraits and dictionary search workflow (#83)
This commit is contained in:
@@ -3,6 +3,7 @@ import { mergeTokens } from '../../token-merger';
|
||||
import { createLogger } from '../../logger';
|
||||
import {
|
||||
FrequencyDictionaryMatchMode,
|
||||
CharacterNameImage,
|
||||
MergedToken,
|
||||
NPlusOneMatchMode,
|
||||
SubtitleData,
|
||||
@@ -48,6 +49,8 @@ export interface TokenizerServiceDeps {
|
||||
getNPlusOneEnabled?: () => boolean;
|
||||
getJlptEnabled?: () => boolean;
|
||||
getNameMatchEnabled?: () => boolean;
|
||||
getNameMatchImagesEnabled?: () => boolean;
|
||||
getCharacterNameImage?: (term: string) => CharacterNameImage | null;
|
||||
getFrequencyDictionaryEnabled?: () => boolean;
|
||||
getFrequencyDictionaryMatchMode?: () => FrequencyDictionaryMatchMode;
|
||||
getFrequencyRank?: FrequencyDictionaryLookup;
|
||||
@@ -80,6 +83,8 @@ export interface TokenizerDepsRuntimeOptions {
|
||||
getNPlusOneEnabled?: () => boolean;
|
||||
getJlptEnabled?: () => boolean;
|
||||
getNameMatchEnabled?: () => boolean;
|
||||
getNameMatchImagesEnabled?: () => boolean;
|
||||
getCharacterNameImage?: (term: string) => CharacterNameImage | null;
|
||||
getFrequencyDictionaryEnabled?: () => boolean;
|
||||
getFrequencyDictionaryMatchMode?: () => FrequencyDictionaryMatchMode;
|
||||
getFrequencyRank?: FrequencyDictionaryLookup;
|
||||
@@ -94,6 +99,7 @@ interface TokenizerAnnotationOptions {
|
||||
nPlusOneEnabled: boolean;
|
||||
jlptEnabled: boolean;
|
||||
nameMatchEnabled: boolean;
|
||||
nameMatchImagesEnabled: boolean;
|
||||
frequencyEnabled: boolean;
|
||||
frequencyMatchMode: FrequencyDictionaryMatchMode;
|
||||
minSentenceWordsForNPlusOne: number | undefined;
|
||||
@@ -229,6 +235,8 @@ export function createTokenizerDepsRuntime(
|
||||
getNPlusOneEnabled: options.getNPlusOneEnabled,
|
||||
getJlptEnabled: options.getJlptEnabled,
|
||||
getNameMatchEnabled: options.getNameMatchEnabled,
|
||||
getNameMatchImagesEnabled: options.getNameMatchImagesEnabled,
|
||||
getCharacterNameImage: options.getCharacterNameImage,
|
||||
getFrequencyDictionaryEnabled: options.getFrequencyDictionaryEnabled,
|
||||
getFrequencyDictionaryMatchMode: options.getFrequencyDictionaryMatchMode ?? (() => 'headword'),
|
||||
getFrequencyRank: options.getFrequencyRank,
|
||||
@@ -684,6 +692,7 @@ function getAnnotationOptions(deps: TokenizerServiceDeps): TokenizerAnnotationOp
|
||||
nPlusOneEnabled,
|
||||
jlptEnabled: deps.getJlptEnabled?.() !== false,
|
||||
nameMatchEnabled: deps.getNameMatchEnabled?.() !== false,
|
||||
nameMatchImagesEnabled: deps.getNameMatchImagesEnabled?.() === true,
|
||||
frequencyEnabled: deps.getFrequencyDictionaryEnabled?.() !== false,
|
||||
frequencyMatchMode: deps.getFrequencyDictionaryMatchMode?.() ?? 'headword',
|
||||
minSentenceWordsForNPlusOne: deps.getMinSentenceWordsForNPlusOne?.(),
|
||||
@@ -780,6 +789,53 @@ async function parseWithYomitanInternalParser(
|
||||
return enrichedTokens;
|
||||
}
|
||||
|
||||
function resolveCharacterNameImageForToken(
|
||||
token: MergedToken,
|
||||
getCharacterNameImage: (term: string) => CharacterNameImage | null,
|
||||
): CharacterNameImage | null {
|
||||
const terms = [token.headword, token.surface]
|
||||
.map((term) => term.trim())
|
||||
.filter((term, index, list) => term.length > 0 && list.indexOf(term) === index);
|
||||
for (const term of terms) {
|
||||
const image = getCharacterNameImage(term);
|
||||
if (image) {
|
||||
return image;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function applyCharacterNameImages(
|
||||
tokens: MergedToken[],
|
||||
deps: TokenizerServiceDeps,
|
||||
options: TokenizerAnnotationOptions,
|
||||
): MergedToken[] {
|
||||
if (
|
||||
!options.nameMatchEnabled ||
|
||||
!options.nameMatchImagesEnabled ||
|
||||
typeof deps.getCharacterNameImage !== 'function'
|
||||
) {
|
||||
return tokens.map((token) => ({ ...token, characterImage: undefined }));
|
||||
}
|
||||
|
||||
const getCharacterNameImage = deps.getCharacterNameImage;
|
||||
return tokens.map((token) => {
|
||||
if (token.isNameMatch !== true) {
|
||||
return { ...token, characterImage: undefined };
|
||||
}
|
||||
let characterImage: CharacterNameImage | undefined;
|
||||
try {
|
||||
characterImage = resolveCharacterNameImageForToken(token, getCharacterNameImage) ?? undefined;
|
||||
} catch (err) {
|
||||
logger.warn('Failed to resolve character name image:', (err as Error).message);
|
||||
}
|
||||
return {
|
||||
...token,
|
||||
characterImage,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export async function tokenizeSubtitle(
|
||||
text: string,
|
||||
deps: TokenizerServiceDeps,
|
||||
@@ -805,9 +861,10 @@ export async function tokenizeSubtitle(
|
||||
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);
|
||||
if (yomitanTokens && yomitanTokens.length > 0) {
|
||||
const annotatedTokens = await applyAnnotationStage(yomitanTokens, deps, annotationOptions);
|
||||
const renderedTokens = applyCharacterNameImages(annotatedTokens, deps, annotationOptions);
|
||||
return {
|
||||
text: displayText,
|
||||
tokens: annotatedTokens.length > 0 ? annotatedTokens : null,
|
||||
tokens: renderedTokens.length > 0 ? renderedTokens : null,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user