mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-04-12 04:19:25 -07:00
feat(immersion): add anime metadata, occurrence tracking, and schema upgrades
- Add imm_anime table with AniList integration - Add imm_subtitle_lines, imm_word_line_occurrences, imm_kanji_line_occurrences - Add POS fields (part_of_speech, pos1, pos2, pos3) to imm_words - Add anime metadata parsing with guessit fallback - Add video duration tracking and watched status - Add episode, streak, trend, and word/kanji detail queries - Deduplicate subtitle line recording within sessions - Pass Anki note IDs through card mining callback chain
This commit is contained in:
@@ -133,6 +133,17 @@ function isFrequencyExcludedByPos(
|
||||
);
|
||||
}
|
||||
|
||||
export function shouldExcludeTokenFromVocabularyPersistence(
|
||||
token: MergedToken,
|
||||
options: Pick<AnnotationStageOptions, 'pos1Exclusions' | 'pos2Exclusions'> = {},
|
||||
): boolean {
|
||||
return isFrequencyExcludedByPos(
|
||||
token,
|
||||
resolvePos1Exclusions(options),
|
||||
resolvePos2Exclusions(options),
|
||||
);
|
||||
}
|
||||
|
||||
function applyFrequencyMarking(
|
||||
tokens: MergedToken[],
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
|
||||
56
src/core/services/tokenizer/part-of-speech.ts
Normal file
56
src/core/services/tokenizer/part-of-speech.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { PartOfSpeech } from '../../../types';
|
||||
|
||||
function normalizePosTag(value: string | null | undefined): string {
|
||||
return typeof value === 'string' ? value.trim() : '';
|
||||
}
|
||||
|
||||
export function isPartOfSpeechValue(value: unknown): value is PartOfSpeech {
|
||||
return typeof value === 'string' && Object.values(PartOfSpeech).includes(value as PartOfSpeech);
|
||||
}
|
||||
|
||||
export function mapMecabPos1ToPartOfSpeech(pos1: string | null | undefined): PartOfSpeech {
|
||||
switch (normalizePosTag(pos1)) {
|
||||
case '名詞':
|
||||
return PartOfSpeech.noun;
|
||||
case '動詞':
|
||||
return PartOfSpeech.verb;
|
||||
case '形容詞':
|
||||
return PartOfSpeech.i_adjective;
|
||||
case '形状詞':
|
||||
case '形容動詞':
|
||||
return PartOfSpeech.na_adjective;
|
||||
case '助詞':
|
||||
return PartOfSpeech.particle;
|
||||
case '助動詞':
|
||||
return PartOfSpeech.bound_auxiliary;
|
||||
case '記号':
|
||||
case '補助記号':
|
||||
return PartOfSpeech.symbol;
|
||||
default:
|
||||
return PartOfSpeech.other;
|
||||
}
|
||||
}
|
||||
|
||||
export function deriveStoredPartOfSpeech(input: {
|
||||
partOfSpeech?: string | null;
|
||||
pos1?: string | null;
|
||||
}): PartOfSpeech {
|
||||
const pos1Parts = normalizePosTag(input.pos1)
|
||||
.split('|')
|
||||
.map((part) => part.trim())
|
||||
.filter((part) => part.length > 0);
|
||||
|
||||
if (pos1Parts.length > 0) {
|
||||
const derivedParts = [...new Set(pos1Parts.map((part) => mapMecabPos1ToPartOfSpeech(part)))];
|
||||
if (derivedParts.length === 1) {
|
||||
return derivedParts[0]!;
|
||||
}
|
||||
return PartOfSpeech.other;
|
||||
}
|
||||
|
||||
if (isPartOfSpeechValue(input.partOfSpeech)) {
|
||||
return input.partOfSpeech;
|
||||
}
|
||||
|
||||
return PartOfSpeech.other;
|
||||
}
|
||||
Reference in New Issue
Block a user