feat(immersion): add anime metadata, occurrence tracking, and schema upgrades

- Add imm_anime table with AniList integration
- Add imm_subtitle_lines, imm_word_line_occurrences, imm_kanji_line_occurrences
- Add POS fields (part_of_speech, pos1, pos2, pos3) to imm_words
- Add anime metadata parsing with guessit fallback
- Add video duration tracking and watched status
- Add episode, streak, trend, and word/kanji detail queries
- Deduplicate subtitle line recording within sessions
- Pass Anki note IDs through card mining callback chain
This commit is contained in:
2026-03-14 22:13:42 -07:00
parent cc5d270b8e
commit fe8bb167c4
19 changed files with 5231 additions and 122 deletions

View File

@@ -133,6 +133,17 @@ function isFrequencyExcludedByPos(
);
}
export function shouldExcludeTokenFromVocabularyPersistence(
token: MergedToken,
options: Pick<AnnotationStageOptions, 'pos1Exclusions' | 'pos2Exclusions'> = {},
): boolean {
return isFrequencyExcludedByPos(
token,
resolvePos1Exclusions(options),
resolvePos2Exclusions(options),
);
}
function applyFrequencyMarking(
tokens: MergedToken[],
pos1Exclusions: ReadonlySet<string>,

View File

@@ -0,0 +1,56 @@
import { PartOfSpeech } from '../../../types';
function normalizePosTag(value: string | null | undefined): string {
return typeof value === 'string' ? value.trim() : '';
}
export function isPartOfSpeechValue(value: unknown): value is PartOfSpeech {
return typeof value === 'string' && Object.values(PartOfSpeech).includes(value as PartOfSpeech);
}
export function mapMecabPos1ToPartOfSpeech(pos1: string | null | undefined): PartOfSpeech {
switch (normalizePosTag(pos1)) {
case '名詞':
return PartOfSpeech.noun;
case '動詞':
return PartOfSpeech.verb;
case '形容詞':
return PartOfSpeech.i_adjective;
case '形状詞':
case '形容動詞':
return PartOfSpeech.na_adjective;
case '助詞':
return PartOfSpeech.particle;
case '助動詞':
return PartOfSpeech.bound_auxiliary;
case '記号':
case '補助記号':
return PartOfSpeech.symbol;
default:
return PartOfSpeech.other;
}
}
export function deriveStoredPartOfSpeech(input: {
partOfSpeech?: string | null;
pos1?: string | null;
}): PartOfSpeech {
const pos1Parts = normalizePosTag(input.pos1)
.split('|')
.map((part) => part.trim())
.filter((part) => part.length > 0);
if (pos1Parts.length > 0) {
const derivedParts = [...new Set(pos1Parts.map((part) => mapMecabPos1ToPartOfSpeech(part)))];
if (derivedParts.length === 1) {
return derivedParts[0]!;
}
return PartOfSpeech.other;
}
if (isPartOfSpeechValue(input.partOfSpeech)) {
return input.partOfSpeech;
}
return PartOfSpeech.other;
}