Files
SubMiner/src/core/services/immersion-tracker/metadata.ts
sudacode f005f542a3 feat(immersion): add anime metadata, occurrence tracking, and schema upgrades
- Add imm_anime table with AniList integration
- Add imm_subtitle_lines, imm_word_line_occurrences, imm_kanji_line_occurrences
- Add POS fields (part_of_speech, pos1, pos2, pos3) to imm_words
- Add anime metadata parsing with guessit fallback
- Add video duration tracking and watched status
- Add episode, streak, trend, and word/kanji detail queries
- Deduplicate subtitle line recording within sessions
- Pass Anki note IDs through card mining callback chain
2026-03-14 23:11:27 -07:00

226 lines
6.3 KiB
TypeScript

import crypto from 'node:crypto';
import { spawn as nodeSpawn } from 'node:child_process';
import * as fs from 'node:fs';
import path from 'node:path';
import { parseMediaInfo } from '../../../jimaku/utils';
import {
guessAnilistMediaInfo,
runGuessit,
type GuessAnilistMediaInfoDeps,
} from '../anilist/anilist-updater';
import {
deriveCanonicalTitle,
emptyMetadata,
hashToCode,
parseFps,
toNullableInt,
} from './reducer';
import {
SOURCE_TYPE_LOCAL,
type ParsedAnimeVideoGuess,
type ProbeMetadata,
type VideoMetadata,
} from './types';
type SpawnFn = typeof nodeSpawn;
interface FsDeps {
createReadStream: typeof fs.createReadStream;
promises: {
stat: typeof fs.promises.stat;
};
}
interface MetadataDeps {
spawn?: SpawnFn;
fs?: FsDeps;
}
interface GuessAnimeVideoMetadataDeps {
runGuessit?: GuessAnilistMediaInfoDeps['runGuessit'];
}
function mapParserConfidenceToScore(confidence: 'high' | 'medium' | 'low'): number {
switch (confidence) {
case 'high':
return 1;
case 'medium':
return 0.6;
default:
return 0.2;
}
}
export async function computeSha256(
mediaPath: string,
deps: MetadataDeps = {},
): Promise<string | null> {
const fileSystem = deps.fs ?? fs;
return new Promise((resolve) => {
const file = fileSystem.createReadStream(mediaPath);
const digest = crypto.createHash('sha256');
file.on('data', (chunk) => digest.update(chunk));
file.on('end', () => resolve(digest.digest('hex')));
file.on('error', () => resolve(null));
});
}
export function runFfprobe(mediaPath: string, deps: MetadataDeps = {}): Promise<ProbeMetadata> {
const spawn = deps.spawn ?? nodeSpawn;
return new Promise((resolve) => {
const child = spawn('ffprobe', [
'-v',
'error',
'-print_format',
'json',
'-show_entries',
'stream=codec_type,codec_tag_string,width,height,avg_frame_rate,bit_rate',
'-show_entries',
'format=duration,bit_rate',
mediaPath,
]);
let output = '';
let errorOutput = '';
child.stdout.on('data', (chunk) => {
output += chunk.toString('utf-8');
});
child.stderr.on('data', (chunk) => {
errorOutput += chunk.toString('utf-8');
});
child.on('error', () => resolve(emptyMetadata()));
child.on('close', () => {
if (errorOutput && output.length === 0) {
resolve(emptyMetadata());
return;
}
try {
const parsed = JSON.parse(output) as {
format?: { duration?: string; bit_rate?: string };
streams?: Array<{
codec_type?: string;
codec_tag_string?: string;
width?: number;
height?: number;
avg_frame_rate?: string;
bit_rate?: string;
}>;
};
const durationText = parsed.format?.duration;
const bitrateText = parsed.format?.bit_rate;
const durationMs = Number(durationText) ? Math.round(Number(durationText) * 1000) : null;
const bitrateKbps = Number(bitrateText) ? Math.round(Number(bitrateText) / 1000) : null;
let codecId: number | null = null;
let containerId: number | null = null;
let widthPx: number | null = null;
let heightPx: number | null = null;
let fpsX100: number | null = null;
let audioCodecId: number | null = null;
for (const stream of parsed.streams ?? []) {
if (stream.codec_type === 'video') {
widthPx = toNullableInt(stream.width);
heightPx = toNullableInt(stream.height);
fpsX100 = parseFps(stream.avg_frame_rate);
codecId = hashToCode(stream.codec_tag_string);
containerId = 0;
}
if (stream.codec_type === 'audio') {
audioCodecId = hashToCode(stream.codec_tag_string);
if (audioCodecId && audioCodecId > 0) {
break;
}
}
}
resolve({
durationMs,
codecId,
containerId,
widthPx,
heightPx,
fpsX100,
bitrateKbps,
audioCodecId,
});
} catch {
resolve(emptyMetadata());
}
});
});
}
export async function getLocalVideoMetadata(
mediaPath: string,
deps: MetadataDeps = {},
): Promise<VideoMetadata> {
const fileSystem = deps.fs ?? fs;
const hash = await computeSha256(mediaPath, deps);
const info = await runFfprobe(mediaPath, deps);
const stat = await fileSystem.promises.stat(mediaPath);
return {
sourceType: SOURCE_TYPE_LOCAL,
canonicalTitle: deriveCanonicalTitle(mediaPath),
durationMs: info.durationMs || 0,
fileSizeBytes: Number.isFinite(stat.size) ? stat.size : null,
codecId: info.codecId ?? null,
containerId: info.containerId ?? null,
widthPx: info.widthPx ?? null,
heightPx: info.heightPx ?? null,
fpsX100: info.fpsX100 ?? null,
bitrateKbps: info.bitrateKbps ?? null,
audioCodecId: info.audioCodecId ?? null,
hashSha256: hash,
screenshotPath: null,
metadataJson: null,
};
}
export async function guessAnimeVideoMetadata(
mediaPath: string | null,
mediaTitle: string | null,
deps: GuessAnimeVideoMetadataDeps = {},
): Promise<ParsedAnimeVideoGuess | null> {
const parsed = await guessAnilistMediaInfo(mediaPath, mediaTitle, {
runGuessit: deps.runGuessit ?? runGuessit,
});
if (!parsed) {
return null;
}
const parsedBasename = mediaPath ? path.basename(mediaPath) : null;
if (parsed.source === 'guessit') {
return {
parsedBasename,
parsedTitle: parsed.title,
parsedSeason: parsed.season,
parsedEpisode: parsed.episode,
parserSource: 'guessit',
parserConfidence: 1,
parseMetadataJson: JSON.stringify({
filename: parsedBasename,
source: 'guessit',
}),
};
}
const fallbackInfo = parseMediaInfo(mediaPath ?? mediaTitle);
return {
parsedBasename: parsedBasename ?? fallbackInfo.filename ?? null,
parsedTitle: parsed.title,
parsedSeason: parsed.season,
parsedEpisode: parsed.episode,
parserSource: 'fallback',
parserConfidence: mapParserConfidenceToScore(fallbackInfo.confidence),
parseMetadataJson: JSON.stringify({
confidence: fallbackInfo.confidence,
filename: fallbackInfo.filename,
rawTitle: fallbackInfo.rawTitle,
source: 'fallback',
}),
};
}