refactor(core): decompose remaining oversized hotspots with seam coverage

# Conflicts:
#	src/config/service.ts
This commit is contained in:
2026-02-21 21:18:37 -08:00
parent 35580ea3e9
commit b271a3b1a9
18 changed files with 3024 additions and 2450 deletions

View File

@@ -4,6 +4,8 @@ import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import { AnkiIntegration } from './anki-integration';
import { FieldGroupingMergeCollaborator } from './anki-integration/field-grouping-merge';
import { AnkiConnectConfig } from './types';
interface IntegrationTestContext {
integration: AnkiIntegration;
@@ -92,6 +94,60 @@ function cleanupIntegrationTestContext(ctx: IntegrationTestContext): void {
fs.rmSync(ctx.stateDir, { recursive: true, force: true });
}
function resolveFieldName(availableFieldNames: string[], preferredName: string): string | null {
const exact = availableFieldNames.find((name) => name === preferredName);
if (exact) return exact;
const lower = preferredName.toLowerCase();
return availableFieldNames.find((name) => name.toLowerCase() === lower) ?? null;
}
function createFieldGroupingMergeCollaborator(options?: {
config?: Partial<AnkiConnectConfig>;
currentSubtitleText?: string;
generatedMedia?: {
audioField?: string;
audioValue?: string;
imageField?: string;
imageValue?: string;
miscInfoValue?: string;
};
}): FieldGroupingMergeCollaborator {
const config = {
fields: {
sentence: 'Sentence',
audio: 'ExpressionAudio',
image: 'Picture',
...(options?.config?.fields ?? {}),
},
...(options?.config ?? {}),
} as AnkiConnectConfig;
return new FieldGroupingMergeCollaborator({
getConfig: () => config,
getEffectiveSentenceCardConfig: () => ({
sentenceField: 'Sentence',
audioField: 'SentenceAudio',
}),
getCurrentSubtitleText: () => options?.currentSubtitleText,
resolveFieldName,
resolveNoteFieldName: (noteInfo, preferredName) => {
if (!preferredName) return null;
return resolveFieldName(Object.keys(noteInfo.fields), preferredName);
},
extractFields: (fields) => {
const result: Record<string, string> = {};
for (const [key, value] of Object.entries(fields)) {
result[key.toLowerCase()] = value.value || '';
}
return result;
},
processSentence: (mpvSentence) => `${mpvSentence}::processed`,
generateMediaForMerge: async () => options?.generatedMedia ?? {},
warnFieldParseOnce: () => undefined,
});
}
test('AnkiIntegration.refreshKnownWordCache bypasses stale checks', async () => {
const ctx = createIntegrationTestContext();
@@ -152,3 +208,61 @@ test('AnkiIntegration.refreshKnownWordCache deduplicates concurrent refreshes',
cleanupIntegrationTestContext(ctx);
}
});
test('FieldGroupingMergeCollaborator synchronizes ExpressionAudio from merged SentenceAudio', async () => {
const collaborator = createFieldGroupingMergeCollaborator();
const merged = await collaborator.computeFieldGroupingMergedFields(
101,
202,
{
noteId: 101,
fields: {
SentenceAudio: { value: '[sound:keep.mp3]' },
ExpressionAudio: { value: '[sound:stale.mp3]' },
},
},
{
noteId: 202,
fields: {
SentenceAudio: { value: '[sound:new.mp3]' },
},
},
false,
);
assert.equal(
merged.SentenceAudio,
'<span data-group-id="101">[sound:keep.mp3]</span><span data-group-id="202">[sound:new.mp3]</span>',
);
assert.equal(merged.ExpressionAudio, merged.SentenceAudio);
});
test('FieldGroupingMergeCollaborator uses generated media fallback when source lacks audio', async () => {
const collaborator = createFieldGroupingMergeCollaborator({
generatedMedia: {
audioField: 'SentenceAudio',
audioValue: '[sound:generated.mp3]',
},
});
const merged = await collaborator.computeFieldGroupingMergedFields(
11,
22,
{
noteId: 11,
fields: {
SentenceAudio: { value: '' },
},
},
{
noteId: 22,
fields: {
SentenceAudio: { value: '' },
},
},
true,
);
assert.equal(merged.SentenceAudio, '<span data-group-id="22">[sound:generated.mp3]</span>');
});

View File

@@ -45,6 +45,7 @@ import { PollingRunner } from './anki-integration/polling';
import { findDuplicateNote as findDuplicateNoteForAnkiIntegration } from './anki-integration/duplicate';
import { CardCreationService } from './anki-integration/card-creation';
import { FieldGroupingService } from './anki-integration/field-grouping';
import { FieldGroupingMergeCollaborator } from './anki-integration/field-grouping-merge';
const log = createLogger('anki').child('integration');
@@ -69,13 +70,6 @@ export class AnkiIntegration {
private updateInProgress = false;
private uiFeedbackState: UiFeedbackState = createUiFeedbackState();
private parseWarningKeys = new Set<string>();
private readonly strictGroupingFieldDefaults = new Set<string>([
'picture',
'sentence',
'sentenceaudio',
'sentencefurigana',
'miscinfo',
]);
private fieldGroupingCallback:
| ((data: {
original: KikuDuplicateCardInfo;
@@ -84,6 +78,7 @@ export class AnkiIntegration {
| null = null;
private knownWordCache: KnownWordCacheManager;
private cardCreationService: CardCreationService;
private fieldGroupingMergeCollaborator: FieldGroupingMergeCollaborator;
private fieldGroupingService: FieldGroupingService;
constructor(
@@ -109,9 +104,27 @@ export class AnkiIntegration {
this.knownWordCache = this.createKnownWordCache(knownWordCacheStatePath);
this.pollingRunner = this.createPollingRunner();
this.cardCreationService = this.createCardCreationService();
this.fieldGroupingMergeCollaborator = this.createFieldGroupingMergeCollaborator();
this.fieldGroupingService = this.createFieldGroupingService();
}
private createFieldGroupingMergeCollaborator(): FieldGroupingMergeCollaborator {
return new FieldGroupingMergeCollaborator({
getConfig: () => this.config,
getEffectiveSentenceCardConfig: () => this.getEffectiveSentenceCardConfig(),
getCurrentSubtitleText: () => this.mpvClient.currentSubText,
resolveFieldName: (availableFieldNames, preferredName) =>
this.resolveFieldName(availableFieldNames, preferredName),
resolveNoteFieldName: (noteInfo, preferredName) =>
this.resolveNoteFieldName(noteInfo, preferredName),
extractFields: (fields) => this.extractFields(fields),
processSentence: (mpvSentence, noteFields) => this.processSentence(mpvSentence, noteFields),
generateMediaForMerge: () => this.generateMediaForMerge(),
warnFieldParseOnce: (fieldName, reason, detail) =>
this.warnFieldParseOnce(fieldName, reason, detail),
});
}
private normalizeConfig(config: AnkiConnectConfig): AnkiConnectConfig {
return {
...DEFAULT_ANKI_CONNECT_CONFIG,
@@ -281,14 +294,14 @@ export class AnkiIntegration {
deleteNoteInfo,
includeGeneratedMedia,
) =>
this.computeFieldGroupingMergedFields(
this.fieldGroupingMergeCollaborator.computeFieldGroupingMergedFields(
keepNoteId,
deleteNoteId,
keepNoteInfo,
deleteNoteInfo,
includeGeneratedMedia,
),
getNoteFieldMap: (noteInfo) => this.getNoteFieldMap(noteInfo),
getNoteFieldMap: (noteInfo) => this.fieldGroupingMergeCollaborator.getNoteFieldMap(noteInfo),
handleFieldGroupingAuto: (originalNoteId, newNoteId, newNoteInfo, expression) =>
this.handleFieldGroupingAuto(originalNoteId, newNoteId, newNoteInfo, expression),
handleFieldGroupingManual: (originalNoteId, newNoteId, newNoteInfo, expression) =>
@@ -982,27 +995,6 @@ export class AnkiIntegration {
});
}
private getGroupableFieldNames(): string[] {
const fields: string[] = [];
fields.push('Sentence');
fields.push('SentenceAudio');
fields.push('Picture');
if (this.config.fields?.image) fields.push(this.config.fields?.image);
if (this.config.fields?.sentence) fields.push(this.config.fields?.sentence);
if (
this.config.fields?.audio &&
this.config.fields?.audio.toLowerCase() !== 'expressionaudio'
) {
fields.push(this.config.fields?.audio);
}
const sentenceCardConfig = this.getEffectiveSentenceCardConfig();
const sentenceAudioField = sentenceCardConfig.audioField;
if (!fields.includes(sentenceAudioField)) fields.push(sentenceAudioField);
if (this.config.fields?.miscInfo) fields.push(this.config.fields?.miscInfo);
fields.push('SentenceFurigana');
return fields;
}
private getPreferredSentenceAudioFieldName(): string {
const sentenceCardConfig = this.getEffectiveSentenceCardConfig();
return sentenceCardConfig.audioField || 'SentenceAudio';
@@ -1015,250 +1007,6 @@ export class AnkiIntegration {
);
}
private extractUngroupedValue(value: string): string {
const groupedSpanRegex = /<span\s+data-group-id="[^"]*">[\s\S]*?<\/span>/gi;
const ungrouped = value.replace(groupedSpanRegex, '').trim();
if (ungrouped) return ungrouped;
return value.trim();
}
private extractLastSoundTag(value: string): string {
const matches = value.match(/\[sound:[^\]]+\]/g);
if (!matches || matches.length === 0) return '';
return matches[matches.length - 1]!;
}
private extractLastImageTag(value: string): string {
const matches = value.match(/<img\b[^>]*>/gi);
if (!matches || matches.length === 0) return '';
return matches[matches.length - 1]!;
}
private extractImageTags(value: string): string[] {
const matches = value.match(/<img\b[^>]*>/gi);
return matches || [];
}
private ensureImageGroupId(imageTag: string, groupId: number): string {
if (!imageTag) return '';
if (/data-group-id=/i.test(imageTag)) {
return imageTag.replace(/data-group-id="[^"]*"/i, `data-group-id="${groupId}"`);
}
return imageTag.replace(/<img\b/i, `<img data-group-id="${groupId}"`);
}
private extractSpanEntries(
value: string,
fieldName: string,
): { groupId: number; content: string }[] {
const entries: { groupId: number; content: string }[] = [];
const malformedIdRegex = /<span\s+[^>]*data-group-id="([^"]*)"[^>]*>/gi;
let malformed;
while ((malformed = malformedIdRegex.exec(value)) !== null) {
const rawId = malformed[1];
const groupId = Number(rawId);
if (!Number.isFinite(groupId) || groupId <= 0) {
this.warnFieldParseOnce(fieldName, 'invalid-group-id', rawId);
}
}
const spanRegex = /<span\s+data-group-id="(\d+)"[^>]*>([\s\S]*?)<\/span>/gi;
let match;
while ((match = spanRegex.exec(value)) !== null) {
const groupId = Number(match[1]);
if (!Number.isFinite(groupId) || groupId <= 0) continue;
const content = this.normalizeStrictGroupedValue(match[2] || '', fieldName);
if (!content) {
this.warnFieldParseOnce(fieldName, 'empty-group-content');
log.debug('Skipping span with empty normalized content', {
fieldName,
rawContent: (match[2] || '').slice(0, 120),
});
continue;
}
entries.push({ groupId, content });
}
if (entries.length === 0 && /<span\b/i.test(value)) {
this.warnFieldParseOnce(fieldName, 'no-usable-span-entries');
}
return entries;
}
private parseStrictEntries(
value: string,
fallbackGroupId: number,
fieldName: string,
): { groupId: number; content: string }[] {
const entries = this.extractSpanEntries(value, fieldName);
if (entries.length === 0) {
const ungrouped = this.normalizeStrictGroupedValue(
this.extractUngroupedValue(value),
fieldName,
);
if (ungrouped) {
entries.push({ groupId: fallbackGroupId, content: ungrouped });
}
}
const unique: { groupId: number; content: string }[] = [];
const seen = new Set<string>();
for (const entry of entries) {
const key = `${entry.groupId}::${entry.content}`;
if (seen.has(key)) continue;
seen.add(key);
unique.push(entry);
}
return unique;
}
private parsePictureEntries(
value: string,
fallbackGroupId: number,
): { groupId: number; tag: string }[] {
const tags = this.extractImageTags(value);
const result: { groupId: number; tag: string }[] = [];
for (const tag of tags) {
const idMatch = tag.match(/data-group-id="(\d+)"/i);
let groupId = fallbackGroupId;
if (idMatch) {
const parsed = Number(idMatch[1]);
if (!Number.isFinite(parsed) || parsed <= 0) {
this.warnFieldParseOnce('Picture', 'invalid-group-id', idMatch[1]);
} else {
groupId = parsed;
}
}
const normalizedTag = this.ensureImageGroupId(tag, groupId);
if (!normalizedTag) {
this.warnFieldParseOnce('Picture', 'empty-image-tag');
continue;
}
result.push({ groupId, tag: normalizedTag });
}
return result;
}
private normalizeStrictGroupedValue(value: string, fieldName: string): string {
const ungrouped = this.extractUngroupedValue(value);
if (!ungrouped) return '';
const normalizedField = fieldName.toLowerCase();
if (normalizedField === 'sentenceaudio' || normalizedField === 'expressionaudio') {
const lastSoundTag = this.extractLastSoundTag(ungrouped);
if (!lastSoundTag) {
this.warnFieldParseOnce(fieldName, 'missing-sound-tag');
}
return lastSoundTag || ungrouped;
}
if (normalizedField === 'picture') {
const lastImageTag = this.extractLastImageTag(ungrouped);
if (!lastImageTag) {
this.warnFieldParseOnce(fieldName, 'missing-image-tag');
}
return lastImageTag || ungrouped;
}
return ungrouped;
}
private getStrictSpanGroupingFields(): Set<string> {
const strictFields = new Set(this.strictGroupingFieldDefaults);
const sentenceCardConfig = this.getEffectiveSentenceCardConfig();
strictFields.add((sentenceCardConfig.sentenceField || 'sentence').toLowerCase());
strictFields.add((sentenceCardConfig.audioField || 'sentenceaudio').toLowerCase());
if (this.config.fields?.image) strictFields.add(this.config.fields.image.toLowerCase());
if (this.config.fields?.miscInfo) strictFields.add(this.config.fields.miscInfo.toLowerCase());
return strictFields;
}
private shouldUseStrictSpanGrouping(fieldName: string): boolean {
const normalized = fieldName.toLowerCase();
return this.getStrictSpanGroupingFields().has(normalized);
}
private applyFieldGrouping(
existingValue: string,
newValue: string,
keepGroupId: number,
sourceGroupId: number,
fieldName: string,
): string {
if (this.shouldUseStrictSpanGrouping(fieldName)) {
if (fieldName.toLowerCase() === 'picture') {
const keepEntries = this.parsePictureEntries(existingValue, keepGroupId);
const sourceEntries = this.parsePictureEntries(newValue, sourceGroupId);
if (keepEntries.length === 0 && sourceEntries.length === 0) {
return existingValue || newValue;
}
const mergedTags = keepEntries.map((entry) =>
this.ensureImageGroupId(entry.tag, entry.groupId),
);
const seen = new Set(mergedTags);
for (const entry of sourceEntries) {
const normalized = this.ensureImageGroupId(entry.tag, entry.groupId);
if (seen.has(normalized)) continue;
seen.add(normalized);
mergedTags.push(normalized);
}
return mergedTags.join('');
}
const keepEntries = this.parseStrictEntries(existingValue, keepGroupId, fieldName);
const sourceEntries = this.parseStrictEntries(newValue, sourceGroupId, fieldName);
if (keepEntries.length === 0 && sourceEntries.length === 0) {
return existingValue || newValue;
}
if (sourceEntries.length === 0) {
return keepEntries
.map((entry) => `<span data-group-id="${entry.groupId}">${entry.content}</span>`)
.join('');
}
const merged = [...keepEntries];
const seen = new Set(keepEntries.map((entry) => `${entry.groupId}::${entry.content}`));
for (const entry of sourceEntries) {
const key = `${entry.groupId}::${entry.content}`;
if (seen.has(key)) continue;
seen.add(key);
merged.push(entry);
}
if (merged.length === 0) return existingValue;
return merged
.map((entry) => `<span data-group-id="${entry.groupId}">${entry.content}</span>`)
.join('');
}
if (!existingValue.trim()) return newValue;
if (!newValue.trim()) return existingValue;
const hasGroups = /data-group-id/.test(existingValue);
if (!hasGroups) {
return `<span data-group-id="${keepGroupId}">${existingValue}</span>\n` + newValue;
}
const groupedSpanRegex = /<span\s+data-group-id="[^"]*">[\s\S]*?<\/span>/g;
let lastEnd = 0;
let result = '';
let match;
while ((match = groupedSpanRegex.exec(existingValue)) !== null) {
const before = existingValue.slice(lastEnd, match.index);
if (before.trim()) {
result += `<span data-group-id="${keepGroupId}">${before.trim()}</span>\n`;
}
result += match[0] + '\n';
lastEnd = match.index + match[0].length;
}
const after = existingValue.slice(lastEnd);
if (after.trim()) {
result += `\n<span data-group-id="${keepGroupId}">${after.trim()}</span>`;
}
return result + '\n' + newValue;
}
private async generateMediaForMerge(): Promise<{
audioField?: string;
audioValue?: string;
@@ -1317,161 +1065,6 @@ export class AnkiIntegration {
return result;
}
private getResolvedFieldValue(noteInfo: NoteInfo, preferredFieldName?: string): string {
if (!preferredFieldName) return '';
const resolved = this.resolveNoteFieldName(noteInfo, preferredFieldName);
if (!resolved) return '';
return noteInfo.fields[resolved]?.value || '';
}
private async computeFieldGroupingMergedFields(
keepNoteId: number,
deleteNoteId: number,
keepNoteInfo: NoteInfo,
deleteNoteInfo: NoteInfo,
includeGeneratedMedia: boolean,
): Promise<Record<string, string>> {
const groupableFields = this.getGroupableFieldNames();
const keepFieldNames = Object.keys(keepNoteInfo.fields);
const sourceFields: Record<string, string> = {};
const resolvedKeepFieldByPreferred = new Map<string, string>();
for (const preferredFieldName of groupableFields) {
sourceFields[preferredFieldName] = this.getResolvedFieldValue(
deleteNoteInfo,
preferredFieldName,
);
const keepResolved = this.resolveFieldName(keepFieldNames, preferredFieldName);
if (keepResolved) {
resolvedKeepFieldByPreferred.set(preferredFieldName, keepResolved);
}
}
if (!sourceFields['SentenceFurigana'] && sourceFields['Sentence']) {
sourceFields['SentenceFurigana'] = sourceFields['Sentence'];
}
if (!sourceFields['Sentence'] && sourceFields['SentenceFurigana']) {
sourceFields['Sentence'] = sourceFields['SentenceFurigana'];
}
if (!sourceFields['Expression'] && sourceFields['Word']) {
sourceFields['Expression'] = sourceFields['Word'];
}
if (!sourceFields['Word'] && sourceFields['Expression']) {
sourceFields['Word'] = sourceFields['Expression'];
}
if (!sourceFields['SentenceAudio'] && sourceFields['ExpressionAudio']) {
sourceFields['SentenceAudio'] = sourceFields['ExpressionAudio'];
}
if (!sourceFields['ExpressionAudio'] && sourceFields['SentenceAudio']) {
sourceFields['ExpressionAudio'] = sourceFields['SentenceAudio'];
}
if (
this.config.fields?.sentence &&
!sourceFields[this.config.fields?.sentence] &&
this.mpvClient.currentSubText
) {
const deleteFields = this.extractFields(deleteNoteInfo.fields);
sourceFields[this.config.fields?.sentence] = this.processSentence(
this.mpvClient.currentSubText,
deleteFields,
);
}
if (includeGeneratedMedia) {
const media = await this.generateMediaForMerge();
if (media.audioField && media.audioValue && !sourceFields[media.audioField]) {
sourceFields[media.audioField] = media.audioValue;
}
if (media.imageField && media.imageValue && !sourceFields[media.imageField]) {
sourceFields[media.imageField] = media.imageValue;
}
if (
this.config.fields?.miscInfo &&
media.miscInfoValue &&
!sourceFields[this.config.fields?.miscInfo]
) {
sourceFields[this.config.fields?.miscInfo] = media.miscInfoValue;
}
}
const mergedFields: Record<string, string> = {};
for (const preferredFieldName of groupableFields) {
const keepFieldName = resolvedKeepFieldByPreferred.get(preferredFieldName);
if (!keepFieldName) continue;
const keepFieldNormalized = keepFieldName.toLowerCase();
if (
keepFieldNormalized === 'expression' ||
keepFieldNormalized === 'expressionfurigana' ||
keepFieldNormalized === 'expressionreading' ||
keepFieldNormalized === 'expressionaudio'
) {
continue;
}
const existingValue = keepNoteInfo.fields[keepFieldName]?.value || '';
const newValue = sourceFields[preferredFieldName] || '';
const isStrictField = this.shouldUseStrictSpanGrouping(keepFieldName);
if (!existingValue.trim() && !newValue.trim()) continue;
if (isStrictField) {
mergedFields[keepFieldName] = this.applyFieldGrouping(
existingValue,
newValue,
keepNoteId,
deleteNoteId,
keepFieldName,
);
} else if (existingValue.trim() && newValue.trim()) {
mergedFields[keepFieldName] = this.applyFieldGrouping(
existingValue,
newValue,
keepNoteId,
deleteNoteId,
keepFieldName,
);
} else {
if (!newValue.trim()) continue;
mergedFields[keepFieldName] = newValue;
}
}
// Keep sentence/expression audio fields aligned after grouping. Otherwise a
// kept note can retain stale ExpressionAudio while SentenceAudio is merged.
const sentenceCardConfig = this.getEffectiveSentenceCardConfig();
const resolvedSentenceAudioField = this.resolveFieldName(
keepFieldNames,
sentenceCardConfig.audioField || 'SentenceAudio',
);
const resolvedExpressionAudioField = this.resolveFieldName(
keepFieldNames,
this.config.fields?.audio || 'ExpressionAudio',
);
if (
resolvedSentenceAudioField &&
resolvedExpressionAudioField &&
resolvedExpressionAudioField !== resolvedSentenceAudioField
) {
const mergedSentenceAudioValue =
mergedFields[resolvedSentenceAudioField] ||
keepNoteInfo.fields[resolvedSentenceAudioField]?.value ||
'';
if (mergedSentenceAudioValue.trim()) {
mergedFields[resolvedExpressionAudioField] = mergedSentenceAudioValue;
}
}
return mergedFields;
}
private getNoteFieldMap(noteInfo: NoteInfo): Record<string, string> {
const fields: Record<string, string> = {};
for (const [name, field] of Object.entries(noteInfo.fields)) {
fields[name] = field?.value || '';
}
return fields;
}
async buildFieldGroupingPreview(
keepNoteId: number,
deleteNoteId: number,
@@ -1498,7 +1091,7 @@ export class AnkiIntegration {
return;
}
const keepNoteInfo = keepNotesInfo[0]!;
const mergedFields = await this.computeFieldGroupingMergedFields(
const mergedFields = await this.fieldGroupingMergeCollaborator.computeFieldGroupingMergedFields(
keepNoteId,
deleteNoteId,
keepNoteInfo,

View File

@@ -0,0 +1,461 @@
import { AnkiConnectConfig } from '../types';
interface FieldGroupingMergeMedia {
audioField?: string;
audioValue?: string;
imageField?: string;
imageValue?: string;
miscInfoValue?: string;
}
export interface FieldGroupingMergeNoteInfo {
noteId: number;
fields: Record<string, { value: string }>;
}
interface FieldGroupingMergeDeps {
getConfig: () => AnkiConnectConfig;
getEffectiveSentenceCardConfig: () => {
sentenceField: string;
audioField: string;
};
getCurrentSubtitleText: () => string | undefined;
resolveFieldName: (availableFieldNames: string[], preferredName: string) => string | null;
resolveNoteFieldName: (
noteInfo: FieldGroupingMergeNoteInfo,
preferredName?: string,
) => string | null;
extractFields: (fields: Record<string, { value: string }>) => Record<string, string>;
processSentence: (mpvSentence: string, noteFields: Record<string, string>) => string;
generateMediaForMerge: () => Promise<FieldGroupingMergeMedia>;
warnFieldParseOnce: (fieldName: string, reason: string, detail?: string) => void;
}
export class FieldGroupingMergeCollaborator {
private readonly strictGroupingFieldDefaults = new Set<string>([
'picture',
'sentence',
'sentenceaudio',
'sentencefurigana',
'miscinfo',
]);
constructor(private readonly deps: FieldGroupingMergeDeps) {}
getGroupableFieldNames(): string[] {
const config = this.deps.getConfig();
const fields: string[] = [];
fields.push('Sentence');
fields.push('SentenceAudio');
fields.push('Picture');
if (config.fields?.image) fields.push(config.fields?.image);
if (config.fields?.sentence) fields.push(config.fields?.sentence);
if (config.fields?.audio && config.fields?.audio.toLowerCase() !== 'expressionaudio') {
fields.push(config.fields?.audio);
}
const sentenceCardConfig = this.deps.getEffectiveSentenceCardConfig();
const sentenceAudioField = sentenceCardConfig.audioField;
if (!fields.includes(sentenceAudioField)) fields.push(sentenceAudioField);
if (config.fields?.miscInfo) fields.push(config.fields?.miscInfo);
fields.push('SentenceFurigana');
return fields;
}
getNoteFieldMap(noteInfo: FieldGroupingMergeNoteInfo): Record<string, string> {
const fields: Record<string, string> = {};
for (const [name, field] of Object.entries(noteInfo.fields)) {
fields[name] = field?.value || '';
}
return fields;
}
async computeFieldGroupingMergedFields(
keepNoteId: number,
deleteNoteId: number,
keepNoteInfo: FieldGroupingMergeNoteInfo,
deleteNoteInfo: FieldGroupingMergeNoteInfo,
includeGeneratedMedia: boolean,
): Promise<Record<string, string>> {
const config = this.deps.getConfig();
const groupableFields = this.getGroupableFieldNames();
const keepFieldNames = Object.keys(keepNoteInfo.fields);
const sourceFields: Record<string, string> = {};
const resolvedKeepFieldByPreferred = new Map<string, string>();
for (const preferredFieldName of groupableFields) {
sourceFields[preferredFieldName] = this.getResolvedFieldValue(
deleteNoteInfo,
preferredFieldName,
);
const keepResolved = this.deps.resolveFieldName(keepFieldNames, preferredFieldName);
if (keepResolved) {
resolvedKeepFieldByPreferred.set(preferredFieldName, keepResolved);
}
}
if (!sourceFields['SentenceFurigana'] && sourceFields['Sentence']) {
sourceFields['SentenceFurigana'] = sourceFields['Sentence'];
}
if (!sourceFields['Sentence'] && sourceFields['SentenceFurigana']) {
sourceFields['Sentence'] = sourceFields['SentenceFurigana'];
}
if (!sourceFields['Expression'] && sourceFields['Word']) {
sourceFields['Expression'] = sourceFields['Word'];
}
if (!sourceFields['Word'] && sourceFields['Expression']) {
sourceFields['Word'] = sourceFields['Expression'];
}
if (!sourceFields['SentenceAudio'] && sourceFields['ExpressionAudio']) {
sourceFields['SentenceAudio'] = sourceFields['ExpressionAudio'];
}
if (!sourceFields['ExpressionAudio'] && sourceFields['SentenceAudio']) {
sourceFields['ExpressionAudio'] = sourceFields['SentenceAudio'];
}
if (
config.fields?.sentence &&
!sourceFields[config.fields?.sentence] &&
this.deps.getCurrentSubtitleText()
) {
const deleteFields = this.deps.extractFields(deleteNoteInfo.fields);
sourceFields[config.fields?.sentence] = this.deps.processSentence(
this.deps.getCurrentSubtitleText()!,
deleteFields,
);
}
if (includeGeneratedMedia) {
const media = await this.deps.generateMediaForMerge();
if (media.audioField && media.audioValue && !sourceFields[media.audioField]) {
sourceFields[media.audioField] = media.audioValue;
}
if (media.imageField && media.imageValue && !sourceFields[media.imageField]) {
sourceFields[media.imageField] = media.imageValue;
}
if (
config.fields?.miscInfo &&
media.miscInfoValue &&
!sourceFields[config.fields?.miscInfo]
) {
sourceFields[config.fields?.miscInfo] = media.miscInfoValue;
}
}
const mergedFields: Record<string, string> = {};
for (const preferredFieldName of groupableFields) {
const keepFieldName = resolvedKeepFieldByPreferred.get(preferredFieldName);
if (!keepFieldName) continue;
const keepFieldNormalized = keepFieldName.toLowerCase();
if (
keepFieldNormalized === 'expression' ||
keepFieldNormalized === 'expressionfurigana' ||
keepFieldNormalized === 'expressionreading' ||
keepFieldNormalized === 'expressionaudio'
) {
continue;
}
const existingValue = keepNoteInfo.fields[keepFieldName]?.value || '';
const newValue = sourceFields[preferredFieldName] || '';
const isStrictField = this.shouldUseStrictSpanGrouping(keepFieldName);
if (!existingValue.trim() && !newValue.trim()) continue;
if (isStrictField) {
mergedFields[keepFieldName] = this.applyFieldGrouping(
existingValue,
newValue,
keepNoteId,
deleteNoteId,
keepFieldName,
);
} else if (existingValue.trim() && newValue.trim()) {
mergedFields[keepFieldName] = this.applyFieldGrouping(
existingValue,
newValue,
keepNoteId,
deleteNoteId,
keepFieldName,
);
} else {
if (!newValue.trim()) continue;
mergedFields[keepFieldName] = newValue;
}
}
const sentenceCardConfig = this.deps.getEffectiveSentenceCardConfig();
const resolvedSentenceAudioField = this.deps.resolveFieldName(
keepFieldNames,
sentenceCardConfig.audioField || 'SentenceAudio',
);
const resolvedExpressionAudioField = this.deps.resolveFieldName(
keepFieldNames,
config.fields?.audio || 'ExpressionAudio',
);
if (
resolvedSentenceAudioField &&
resolvedExpressionAudioField &&
resolvedExpressionAudioField !== resolvedSentenceAudioField
) {
const mergedSentenceAudioValue =
mergedFields[resolvedSentenceAudioField] ||
keepNoteInfo.fields[resolvedSentenceAudioField]?.value ||
'';
if (mergedSentenceAudioValue.trim()) {
mergedFields[resolvedExpressionAudioField] = mergedSentenceAudioValue;
}
}
return mergedFields;
}
private getResolvedFieldValue(
noteInfo: FieldGroupingMergeNoteInfo,
preferredFieldName?: string,
): string {
if (!preferredFieldName) return '';
const resolved = this.deps.resolveNoteFieldName(noteInfo, preferredFieldName);
if (!resolved) return '';
return noteInfo.fields[resolved]?.value || '';
}
private extractUngroupedValue(value: string): string {
const groupedSpanRegex = /<span\s+data-group-id="[^"]*">[\s\S]*?<\/span>/gi;
const ungrouped = value.replace(groupedSpanRegex, '').trim();
if (ungrouped) return ungrouped;
return value.trim();
}
private extractLastSoundTag(value: string): string {
const matches = value.match(/\[sound:[^\]]+\]/g);
if (!matches || matches.length === 0) return '';
return matches[matches.length - 1]!;
}
private extractLastImageTag(value: string): string {
const matches = value.match(/<img\b[^>]*>/gi);
if (!matches || matches.length === 0) return '';
return matches[matches.length - 1]!;
}
private extractImageTags(value: string): string[] {
const matches = value.match(/<img\b[^>]*>/gi);
return matches || [];
}
private ensureImageGroupId(imageTag: string, groupId: number): string {
if (!imageTag) return '';
if (/data-group-id=/i.test(imageTag)) {
return imageTag.replace(/data-group-id="[^"]*"/i, `data-group-id="${groupId}"`);
}
return imageTag.replace(/<img\b/i, `<img data-group-id="${groupId}"`);
}
private extractSpanEntries(
value: string,
fieldName: string,
): { groupId: number; content: string }[] {
const entries: { groupId: number; content: string }[] = [];
const malformedIdRegex = /<span\s+[^>]*data-group-id="([^"]*)"[^>]*>/gi;
let malformed;
while ((malformed = malformedIdRegex.exec(value)) !== null) {
const rawId = malformed[1];
const groupId = Number(rawId);
if (!Number.isFinite(groupId) || groupId <= 0) {
this.deps.warnFieldParseOnce(fieldName, 'invalid-group-id', rawId);
}
}
const spanRegex = /<span\s+data-group-id="(\d+)"[^>]*>([\s\S]*?)<\/span>/gi;
let match;
while ((match = spanRegex.exec(value)) !== null) {
const groupId = Number(match[1]);
if (!Number.isFinite(groupId) || groupId <= 0) continue;
const content = this.normalizeStrictGroupedValue(match[2] || '', fieldName);
if (!content) {
this.deps.warnFieldParseOnce(fieldName, 'empty-group-content');
continue;
}
entries.push({ groupId, content });
}
if (entries.length === 0 && /<span\b/i.test(value)) {
this.deps.warnFieldParseOnce(fieldName, 'no-usable-span-entries');
}
return entries;
}
private parseStrictEntries(
value: string,
fallbackGroupId: number,
fieldName: string,
): { groupId: number; content: string }[] {
const entries = this.extractSpanEntries(value, fieldName);
if (entries.length === 0) {
const ungrouped = this.normalizeStrictGroupedValue(
this.extractUngroupedValue(value),
fieldName,
);
if (ungrouped) {
entries.push({ groupId: fallbackGroupId, content: ungrouped });
}
}
const unique: { groupId: number; content: string }[] = [];
const seen = new Set<string>();
for (const entry of entries) {
const key = `${entry.groupId}::${entry.content}`;
if (seen.has(key)) continue;
seen.add(key);
unique.push(entry);
}
return unique;
}
private parsePictureEntries(
value: string,
fallbackGroupId: number,
): { groupId: number; tag: string }[] {
const tags = this.extractImageTags(value);
const result: { groupId: number; tag: string }[] = [];
for (const tag of tags) {
const idMatch = tag.match(/data-group-id="(\d+)"/i);
let groupId = fallbackGroupId;
if (idMatch) {
const parsed = Number(idMatch[1]);
if (!Number.isFinite(parsed) || parsed <= 0) {
this.deps.warnFieldParseOnce('Picture', 'invalid-group-id', idMatch[1]);
} else {
groupId = parsed;
}
}
const normalizedTag = this.ensureImageGroupId(tag, groupId);
if (!normalizedTag) {
this.deps.warnFieldParseOnce('Picture', 'empty-image-tag');
continue;
}
result.push({ groupId, tag: normalizedTag });
}
return result;
}
private normalizeStrictGroupedValue(value: string, fieldName: string): string {
const ungrouped = this.extractUngroupedValue(value);
if (!ungrouped) return '';
const normalizedField = fieldName.toLowerCase();
if (normalizedField === 'sentenceaudio' || normalizedField === 'expressionaudio') {
const lastSoundTag = this.extractLastSoundTag(ungrouped);
if (!lastSoundTag) {
this.deps.warnFieldParseOnce(fieldName, 'missing-sound-tag');
}
return lastSoundTag || ungrouped;
}
if (normalizedField === 'picture') {
const lastImageTag = this.extractLastImageTag(ungrouped);
if (!lastImageTag) {
this.deps.warnFieldParseOnce(fieldName, 'missing-image-tag');
}
return lastImageTag || ungrouped;
}
return ungrouped;
}
private getStrictSpanGroupingFields(): Set<string> {
const strictFields = new Set(this.strictGroupingFieldDefaults);
const sentenceCardConfig = this.deps.getEffectiveSentenceCardConfig();
strictFields.add((sentenceCardConfig.sentenceField || 'sentence').toLowerCase());
strictFields.add((sentenceCardConfig.audioField || 'sentenceaudio').toLowerCase());
const config = this.deps.getConfig();
if (config.fields?.image) strictFields.add(config.fields.image.toLowerCase());
if (config.fields?.miscInfo) strictFields.add(config.fields.miscInfo.toLowerCase());
return strictFields;
}
private shouldUseStrictSpanGrouping(fieldName: string): boolean {
const normalized = fieldName.toLowerCase();
return this.getStrictSpanGroupingFields().has(normalized);
}
private applyFieldGrouping(
existingValue: string,
newValue: string,
keepGroupId: number,
sourceGroupId: number,
fieldName: string,
): string {
if (this.shouldUseStrictSpanGrouping(fieldName)) {
if (fieldName.toLowerCase() === 'picture') {
const keepEntries = this.parsePictureEntries(existingValue, keepGroupId);
const sourceEntries = this.parsePictureEntries(newValue, sourceGroupId);
if (keepEntries.length === 0 && sourceEntries.length === 0) {
return existingValue || newValue;
}
const mergedTags = keepEntries.map((entry) =>
this.ensureImageGroupId(entry.tag, entry.groupId),
);
const seen = new Set(mergedTags);
for (const entry of sourceEntries) {
const normalized = this.ensureImageGroupId(entry.tag, entry.groupId);
if (seen.has(normalized)) continue;
seen.add(normalized);
mergedTags.push(normalized);
}
return mergedTags.join('');
}
const keepEntries = this.parseStrictEntries(existingValue, keepGroupId, fieldName);
const sourceEntries = this.parseStrictEntries(newValue, sourceGroupId, fieldName);
if (keepEntries.length === 0 && sourceEntries.length === 0) {
return existingValue || newValue;
}
if (sourceEntries.length === 0) {
return keepEntries
.map((entry) => `<span data-group-id="${entry.groupId}">${entry.content}</span>`)
.join('');
}
const merged = [...keepEntries];
const seen = new Set(keepEntries.map((entry) => `${entry.groupId}::${entry.content}`));
for (const entry of sourceEntries) {
const key = `${entry.groupId}::${entry.content}`;
if (seen.has(key)) continue;
seen.add(key);
merged.push(entry);
}
if (merged.length === 0) return existingValue;
return merged
.map((entry) => `<span data-group-id="${entry.groupId}">${entry.content}</span>`)
.join('');
}
if (!existingValue.trim()) return newValue;
if (!newValue.trim()) return existingValue;
const hasGroups = /data-group-id/.test(existingValue);
if (!hasGroups) {
return `<span data-group-id="${keepGroupId}">${existingValue}</span>\n` + newValue;
}
const groupedSpanRegex = /<span\s+data-group-id="[^"]*">[\s\S]*?<\/span>/g;
let lastEnd = 0;
let result = '';
let match;
while ((match = groupedSpanRegex.exec(existingValue)) !== null) {
const before = existingValue.slice(lastEnd, match.index);
if (before.trim()) {
result += `<span data-group-id="${keepGroupId}">${before.trim()}</span>\n`;
}
result += match[0] + '\n';
lastEnd = match.index + match[0].length;
}
const after = existingValue.slice(lastEnd);
if (after.trim()) {
result += `\n<span data-group-id="${keepGroupId}">${after.trim()}</span>`;
}
return result + '\n' + newValue;
}
}

View File

@@ -370,6 +370,88 @@ test('reloadConfigStrict rejects invalid json and preserves previous config', ()
assert.equal(service.getConfig().logging.level, 'error');
});
test('prefers config.jsonc over config.json when both exist', () => {
const dir = makeTempDir();
const jsonPath = path.join(dir, 'config.json');
const jsoncPath = path.join(dir, 'config.jsonc');
fs.writeFileSync(jsonPath, JSON.stringify({ logging: { level: 'error' } }, null, 2));
fs.writeFileSync(
jsoncPath,
`{
"logging": {
"level": "warn"
}
}`,
'utf-8',
);
const service = new ConfigService(dir);
assert.equal(service.getConfig().logging.level, 'warn');
assert.equal(service.getConfigPath(), jsoncPath);
});
test('reloadConfigStrict parse failure does not mutate raw config or warnings', () => {
const dir = makeTempDir();
const configPath = path.join(dir, 'config.jsonc');
fs.writeFileSync(
configPath,
`{
"logging": {
"level": "warn"
},
"websocket": {
"port": "bad"
}
}`,
);
const service = new ConfigService(dir);
const beforePath = service.getConfigPath();
const beforeConfig = service.getConfig();
const beforeRaw = service.getRawConfig();
const beforeWarnings = service.getWarnings();
fs.writeFileSync(configPath, '{"logging":');
const result = service.reloadConfigStrict();
assert.equal(result.ok, false);
assert.equal(service.getConfigPath(), beforePath);
assert.deepEqual(service.getConfig(), beforeConfig);
assert.deepEqual(service.getRawConfig(), beforeRaw);
assert.deepEqual(service.getWarnings(), beforeWarnings);
});
test('warning emission order is deterministic across reloads', () => {
const dir = makeTempDir();
const configPath = path.join(dir, 'config.jsonc');
fs.writeFileSync(
configPath,
`{
"unknownFeature": true,
"websocket": {
"enabled": "sometimes",
"port": -1
},
"logging": {
"level": "trace"
}
}`,
'utf-8',
);
const service = new ConfigService(dir);
const firstWarnings = service.getWarnings();
service.reloadConfig();
const secondWarnings = service.getWarnings();
assert.deepEqual(secondWarnings, firstWarnings);
assert.deepEqual(
firstWarnings.map((warning) => warning.path),
['unknownFeature', 'websocket.enabled', 'websocket.port', 'logging.level'],
);
});
test('accepts valid logging.level', () => {
const dir = makeTempDir();
fs.writeFileSync(

65
src/config/load.ts Normal file
View File

@@ -0,0 +1,65 @@
import * as fs from 'fs';
import { RawConfig } from '../types';
import { parseConfigContent } from './parse';
export interface ConfigPaths {
configDir: string;
configFileJsonc: string;
configFileJson: string;
}
export interface LoadResult {
config: RawConfig;
path: string;
}
export type StrictLoadResult =
| (LoadResult & { ok: true })
| {
ok: false;
error: string;
path: string;
};
function isObject(value: unknown): value is Record<string, unknown> {
return value !== null && typeof value === 'object' && !Array.isArray(value);
}
export function resolveExistingConfigPath(paths: ConfigPaths): string {
if (fs.existsSync(paths.configFileJsonc)) {
return paths.configFileJsonc;
}
if (fs.existsSync(paths.configFileJson)) {
return paths.configFileJson;
}
return paths.configFileJsonc;
}
export function loadRawConfigStrict(paths: ConfigPaths): StrictLoadResult {
const configPath = resolveExistingConfigPath(paths);
if (!fs.existsSync(configPath)) {
return { ok: true, config: {}, path: configPath };
}
try {
const data = fs.readFileSync(configPath, 'utf-8');
const parsed = parseConfigContent(configPath, data);
return {
ok: true,
config: isObject(parsed) ? (parsed as RawConfig) : {},
path: configPath,
};
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown parse error';
return { ok: false, error: message, path: configPath };
}
}
export function loadRawConfig(paths: ConfigPaths): LoadResult {
const strictResult = loadRawConfigStrict(paths);
if (strictResult.ok) {
return strictResult;
}
return { config: {}, path: strictResult.path };
}

17
src/config/parse.ts Normal file
View File

@@ -0,0 +1,17 @@
import { parse as parseJsonc, type ParseError } from 'jsonc-parser';
export function parseConfigContent(configPath: string, data: string): unknown {
if (!configPath.endsWith('.jsonc')) {
return JSON.parse(data);
}
const errors: ParseError[] = [];
const result = parseJsonc(data, errors, {
allowTrailingComma: true,
disallowComments: false,
});
if (errors.length > 0) {
throw new Error(`Invalid JSONC (${errors[0]?.error ?? 'unknown'})`);
}
return result;
}

1414
src/config/resolve.ts Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

19
src/config/warnings.ts Normal file
View File

@@ -0,0 +1,19 @@
import { ConfigValidationWarning } from '../types';
export interface WarningCollector {
warnings: ConfigValidationWarning[];
warn(path: string, value: unknown, fallback: unknown, message: string): void;
}
export function createWarningCollector(): WarningCollector {
const warnings: ConfigValidationWarning[] = [];
const warn = (path: string, value: unknown, fallback: unknown, message: string): void => {
warnings.push({
path,
value,
fallback,
message,
});
};
return { warnings, warn };
}

View File

@@ -4,6 +4,14 @@ import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import type { DatabaseSync as NodeDatabaseSync } from 'node:sqlite';
import { toMonthKey } from './immersion-tracker/maintenance';
import { enqueueWrite } from './immersion-tracker/queue';
import {
deriveCanonicalTitle,
normalizeText,
resolveBoundedInt,
} from './immersion-tracker/reducer';
import type { QueuedWrite } from './immersion-tracker/types';
type ImmersionTrackerService = import('./immersion-tracker-service').ImmersionTrackerService;
type ImmersionTrackerServiceCtor =
@@ -40,6 +48,41 @@ function cleanupDbPath(dbPath: string): void {
}
}
test('seam: resolveBoundedInt keeps fallback for invalid values', () => {
assert.equal(resolveBoundedInt(undefined, 25, 1, 100), 25);
assert.equal(resolveBoundedInt(0, 25, 1, 100), 25);
assert.equal(resolveBoundedInt(101, 25, 1, 100), 25);
assert.equal(resolveBoundedInt(44.8, 25, 1, 100), 44);
});
test('seam: reducer title normalization covers local and remote paths', () => {
assert.equal(normalizeText(' hello\n world '), 'hello world');
assert.equal(deriveCanonicalTitle('/tmp/Episode 01.mkv'), 'Episode 01');
assert.equal(
deriveCanonicalTitle('https://cdn.example.com/show/%E7%AC%AC1%E8%A9%B1.mp4'),
'\u7b2c1\u8a71',
);
});
test('seam: enqueueWrite drops oldest entries once capacity is exceeded', () => {
const queue: QueuedWrite[] = [
{ kind: 'event', sessionId: 1, eventType: 1, sampleMs: 1000 },
{ kind: 'event', sessionId: 1, eventType: 2, sampleMs: 1001 },
];
const incoming: QueuedWrite = { kind: 'event', sessionId: 1, eventType: 3, sampleMs: 1002 };
const result = enqueueWrite(queue, incoming, 2);
assert.equal(result.dropped, 1);
assert.equal(queue.length, 2);
assert.equal(queue[0]!.eventType, 2);
assert.equal(queue[1]!.eventType, 3);
});
test('seam: toMonthKey uses UTC calendar month', () => {
assert.equal(toMonthKey(Date.UTC(2026, 0, 31, 23, 59, 59, 999)), 202601);
assert.equal(toMonthKey(Date.UTC(2026, 1, 1, 0, 0, 0, 0)), 202602);
});
testIfSqlite('startSession generates UUID-like session identifiers', async () => {
const dbPath = makeDbPath();
let tracker: ImmersionTrackerService | null = null;

View File

@@ -4,163 +4,71 @@ import { spawn } from 'node:child_process';
import { DatabaseSync } from 'node:sqlite';
import * as fs from 'node:fs';
import { createLogger } from '../../logger';
import { pruneRetention, runRollupMaintenance } from './immersion-tracker/maintenance';
import {
getDailyRollups,
getMonthlyRollups,
getQueryHints,
getSessionSummaries,
getSessionTimeline,
} from './immersion-tracker/query';
import {
buildVideoKey,
calculateTextMetrics,
createInitialSessionState,
deriveCanonicalTitle,
emptyMetadata,
hashToCode,
isRemoteSource,
normalizeMediaPath,
normalizeText,
parseFps,
resolveBoundedInt,
sanitizePayload,
secToMs,
toNullableInt,
} from './immersion-tracker/reducer';
import { enqueueWrite } from './immersion-tracker/queue';
import {
DEFAULT_BATCH_SIZE,
DEFAULT_DAILY_ROLLUP_RETENTION_MS,
DEFAULT_EVENTS_RETENTION_MS,
DEFAULT_FLUSH_INTERVAL_MS,
DEFAULT_MAINTENANCE_INTERVAL_MS,
DEFAULT_MAX_PAYLOAD_BYTES,
DEFAULT_MONTHLY_ROLLUP_RETENTION_MS,
DEFAULT_QUEUE_CAP,
DEFAULT_TELEMETRY_RETENTION_MS,
DEFAULT_VACUUM_INTERVAL_MS,
EVENT_CARD_MINED,
EVENT_LOOKUP,
EVENT_MEDIA_BUFFER,
EVENT_PAUSE_END,
EVENT_PAUSE_START,
EVENT_SEEK_BACKWARD,
EVENT_SEEK_FORWARD,
EVENT_SUBTITLE_LINE,
SCHEMA_VERSION,
SESSION_STATUS_ACTIVE,
SESSION_STATUS_ENDED,
SOURCE_TYPE_LOCAL,
SOURCE_TYPE_REMOTE,
type ImmersionSessionRollupRow,
type ImmersionTrackerOptions,
type QueuedWrite,
type SessionState,
type SessionSummaryQueryRow,
type SessionTimelineRow,
type VideoMetadata,
} from './immersion-tracker/types';
const SCHEMA_VERSION = 1;
const DEFAULT_QUEUE_CAP = 1_000;
const DEFAULT_BATCH_SIZE = 25;
const DEFAULT_FLUSH_INTERVAL_MS = 500;
const DEFAULT_MAINTENANCE_INTERVAL_MS = 24 * 60 * 60 * 1000;
const ONE_WEEK_MS = 7 * 24 * 60 * 60 * 1000;
const DEFAULT_EVENTS_RETENTION_MS = ONE_WEEK_MS;
const DEFAULT_VACUUM_INTERVAL_MS = ONE_WEEK_MS;
const DEFAULT_TELEMETRY_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
const DEFAULT_DAILY_ROLLUP_RETENTION_MS = 365 * 24 * 60 * 60 * 1000;
const DEFAULT_MONTHLY_ROLLUP_RETENTION_MS = 5 * 365 * 24 * 60 * 60 * 1000;
const DEFAULT_MAX_PAYLOAD_BYTES = 256;
const SOURCE_TYPE_LOCAL = 1;
const SOURCE_TYPE_REMOTE = 2;
const SESSION_STATUS_ACTIVE = 1;
const SESSION_STATUS_ENDED = 2;
const EVENT_SUBTITLE_LINE = 1;
const EVENT_MEDIA_BUFFER = 2;
const EVENT_LOOKUP = 3;
const EVENT_CARD_MINED = 4;
const EVENT_SEEK_FORWARD = 5;
const EVENT_SEEK_BACKWARD = 6;
const EVENT_PAUSE_START = 7;
const EVENT_PAUSE_END = 8;
export interface ImmersionTrackerOptions {
dbPath: string;
policy?: ImmersionTrackerPolicy;
}
export interface ImmersionTrackerPolicy {
queueCap?: number;
batchSize?: number;
flushIntervalMs?: number;
maintenanceIntervalMs?: number;
payloadCapBytes?: number;
retention?: {
eventsDays?: number;
telemetryDays?: number;
dailyRollupsDays?: number;
monthlyRollupsDays?: number;
vacuumIntervalDays?: number;
};
}
interface TelemetryAccumulator {
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
pauseCount: number;
pauseMs: number;
seekForwardCount: number;
seekBackwardCount: number;
mediaBufferEvents: number;
}
interface SessionState extends TelemetryAccumulator {
sessionId: number;
videoId: number;
startedAtMs: number;
currentLineIndex: number;
lastWallClockMs: number;
lastMediaMs: number | null;
lastPauseStartMs: number | null;
isPaused: boolean;
pendingTelemetry: boolean;
}
interface QueuedWrite {
kind: 'telemetry' | 'event';
sessionId: number;
sampleMs?: number;
totalWatchedMs?: number;
activeWatchedMs?: number;
linesSeen?: number;
wordsSeen?: number;
tokensSeen?: number;
cardsMined?: number;
lookupCount?: number;
lookupHits?: number;
pauseCount?: number;
pauseMs?: number;
seekForwardCount?: number;
seekBackwardCount?: number;
mediaBufferEvents?: number;
eventType?: number;
lineIndex?: number | null;
segmentStartMs?: number | null;
segmentEndMs?: number | null;
wordsDelta?: number;
cardsDelta?: number;
payloadJson?: string | null;
}
interface VideoMetadata {
sourceType: number;
canonicalTitle: string;
durationMs: number;
fileSizeBytes: number | null;
codecId: number | null;
containerId: number | null;
widthPx: number | null;
heightPx: number | null;
fpsX100: number | null;
bitrateKbps: number | null;
audioCodecId: number | null;
hashSha256: string | null;
screenshotPath: string | null;
metadataJson: string | null;
}
export interface SessionSummaryQueryRow {
videoId: number | null;
startedAtMs: number;
endedAtMs: number | null;
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
}
export interface SessionTimelineRow {
sampleMs: number;
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
}
export interface ImmersionSessionRollupRow {
rollupDayOrMonth: number;
videoId: number | null;
totalSessions: number;
totalActiveMin: number;
totalLinesSeen: number;
totalWordsSeen: number;
totalTokensSeen: number;
totalCards: number;
cardsPerHour: number | null;
wordsPerMin: number | null;
lookupHitRate: number | null;
}
export type {
ImmersionSessionRollupRow,
ImmersionTrackerOptions,
ImmersionTrackerPolicy,
SessionSummaryQueryRow,
SessionTimelineRow,
} from './immersion-tracker/types';
export class ImmersionTrackerService {
private readonly logger = createLogger('main:immersion-tracker');
@@ -200,21 +108,21 @@ export class ImmersionTrackerService {
}
const policy = options.policy ?? {};
this.queueCap = this.resolveBoundedInt(policy.queueCap, DEFAULT_QUEUE_CAP, 100, 100_000);
this.batchSize = this.resolveBoundedInt(policy.batchSize, DEFAULT_BATCH_SIZE, 1, 10_000);
this.flushIntervalMs = this.resolveBoundedInt(
this.queueCap = resolveBoundedInt(policy.queueCap, DEFAULT_QUEUE_CAP, 100, 100_000);
this.batchSize = resolveBoundedInt(policy.batchSize, DEFAULT_BATCH_SIZE, 1, 10_000);
this.flushIntervalMs = resolveBoundedInt(
policy.flushIntervalMs,
DEFAULT_FLUSH_INTERVAL_MS,
50,
60_000,
);
this.maintenanceIntervalMs = this.resolveBoundedInt(
this.maintenanceIntervalMs = resolveBoundedInt(
policy.maintenanceIntervalMs,
DEFAULT_MAINTENANCE_INTERVAL_MS,
60_000,
7 * 24 * 60 * 60 * 1000,
);
this.maxPayloadBytes = this.resolveBoundedInt(
this.maxPayloadBytes = resolveBoundedInt(
policy.payloadCapBytes,
DEFAULT_MAX_PAYLOAD_BYTES,
64,
@@ -223,35 +131,35 @@ export class ImmersionTrackerService {
const retention = policy.retention ?? {};
this.eventsRetentionMs =
this.resolveBoundedInt(
resolveBoundedInt(
retention.eventsDays,
Math.floor(DEFAULT_EVENTS_RETENTION_MS / 86_400_000),
1,
3650,
) * 86_400_000;
this.telemetryRetentionMs =
this.resolveBoundedInt(
resolveBoundedInt(
retention.telemetryDays,
Math.floor(DEFAULT_TELEMETRY_RETENTION_MS / 86_400_000),
1,
3650,
) * 86_400_000;
this.dailyRollupRetentionMs =
this.resolveBoundedInt(
resolveBoundedInt(
retention.dailyRollupsDays,
Math.floor(DEFAULT_DAILY_ROLLUP_RETENTION_MS / 86_400_000),
1,
36500,
) * 86_400_000;
this.monthlyRollupRetentionMs =
this.resolveBoundedInt(
resolveBoundedInt(
retention.monthlyRollupsDays,
Math.floor(DEFAULT_MONTHLY_ROLLUP_RETENTION_MS / 86_400_000),
1,
36500,
) * 86_400_000;
this.vacuumIntervalMs =
this.resolveBoundedInt(
resolveBoundedInt(
retention.vacuumIntervalDays,
Math.floor(DEFAULT_VACUUM_INTERVAL_MS / 86_400_000),
1,
@@ -300,104 +208,31 @@ export class ImmersionTrackerService {
}
async getSessionSummaries(limit = 50): Promise<SessionSummaryQueryRow[]> {
const prepared = this.db.prepare(`
SELECT
s.video_id AS videoId,
s.started_at_ms AS startedAtMs,
s.ended_at_ms AS endedAtMs,
COALESCE(SUM(t.total_watched_ms), 0) AS totalWatchedMs,
COALESCE(SUM(t.active_watched_ms), 0) AS activeWatchedMs,
COALESCE(SUM(t.lines_seen), 0) AS linesSeen,
COALESCE(SUM(t.words_seen), 0) AS wordsSeen,
COALESCE(SUM(t.tokens_seen), 0) AS tokensSeen,
COALESCE(SUM(t.cards_mined), 0) AS cardsMined,
COALESCE(SUM(t.lookup_count), 0) AS lookupCount,
COALESCE(SUM(t.lookup_hits), 0) AS lookupHits
FROM imm_sessions s
LEFT JOIN imm_session_telemetry t ON t.session_id = s.session_id
GROUP BY s.session_id
ORDER BY s.started_at_ms DESC
LIMIT ?
`);
return prepared.all(limit) as unknown as SessionSummaryQueryRow[];
return getSessionSummaries(this.db, limit);
}
async getSessionTimeline(sessionId: number, limit = 200): Promise<SessionTimelineRow[]> {
const prepared = this.db.prepare(`
SELECT
sample_ms AS sampleMs,
total_watched_ms AS totalWatchedMs,
active_watched_ms AS activeWatchedMs,
lines_seen AS linesSeen,
words_seen AS wordsSeen,
tokens_seen AS tokensSeen,
cards_mined AS cardsMined
FROM imm_session_telemetry
WHERE session_id = ?
ORDER BY sample_ms DESC
LIMIT ?
`);
return prepared.all(sessionId, limit) as unknown as SessionTimelineRow[];
return getSessionTimeline(this.db, sessionId, limit);
}
async getQueryHints(): Promise<{
totalSessions: number;
activeSessions: number;
}> {
const sessions = this.db.prepare('SELECT COUNT(*) AS total FROM imm_sessions');
const active = this.db.prepare(
'SELECT COUNT(*) AS total FROM imm_sessions WHERE ended_at_ms IS NULL',
);
const totalSessions = Number(sessions.get()?.total ?? 0);
const activeSessions = Number(active.get()?.total ?? 0);
return { totalSessions, activeSessions };
return getQueryHints(this.db);
}
async getDailyRollups(limit = 60): Promise<ImmersionSessionRollupRow[]> {
const prepared = this.db.prepare(`
SELECT
rollup_day AS rollupDayOrMonth,
video_id AS videoId,
total_sessions AS totalSessions,
total_active_min AS totalActiveMin,
total_lines_seen AS totalLinesSeen,
total_words_seen AS totalWordsSeen,
total_tokens_seen AS totalTokensSeen,
total_cards AS totalCards,
cards_per_hour AS cardsPerHour,
words_per_min AS wordsPerMin,
lookup_hit_rate AS lookupHitRate
FROM imm_daily_rollups
ORDER BY rollup_day DESC, video_id DESC
LIMIT ?
`);
return prepared.all(limit) as unknown as ImmersionSessionRollupRow[];
return getDailyRollups(this.db, limit);
}
async getMonthlyRollups(limit = 24): Promise<ImmersionSessionRollupRow[]> {
const prepared = this.db.prepare(`
SELECT
rollup_month AS rollupDayOrMonth,
video_id AS videoId,
total_sessions AS totalSessions,
total_active_min AS totalActiveMin,
total_lines_seen AS totalLinesSeen,
total_words_seen AS totalWordsSeen,
total_tokens_seen AS totalTokensSeen,
total_cards AS totalCards,
0 AS cardsPerHour,
0 AS wordsPerMin,
0 AS lookupHitRate
FROM imm_monthly_rollups
ORDER BY rollup_month DESC, video_id DESC
LIMIT ?
`);
return prepared.all(limit) as unknown as ImmersionSessionRollupRow[];
return getMonthlyRollups(this.db, limit);
}
handleMediaChange(mediaPath: string | null, mediaTitle: string | null): void {
const normalizedPath = this.normalizeMediaPath(mediaPath);
const normalizedTitle = this.normalizeText(mediaTitle);
const normalizedPath = normalizeMediaPath(mediaPath);
const normalizedTitle = normalizeText(mediaTitle);
this.logger.info(
`handleMediaChange called with path=${normalizedPath || '<empty>'} title=${normalizedTitle || '<empty>'}`,
);
@@ -419,9 +254,9 @@ export class ImmersionTrackerService {
return;
}
const sourceType = this.isRemoteSource(normalizedPath) ? SOURCE_TYPE_REMOTE : SOURCE_TYPE_LOCAL;
const videoKey = this.buildVideoKey(normalizedPath, sourceType);
const canonicalTitle = normalizedTitle || this.deriveCanonicalTitle(normalizedPath);
const sourceType = isRemoteSource(normalizedPath) ? SOURCE_TYPE_REMOTE : SOURCE_TYPE_LOCAL;
const videoKey = buildVideoKey(normalizedPath, sourceType);
const canonicalTitle = normalizedTitle || deriveCanonicalTitle(normalizedPath);
const sourcePath = sourceType === SOURCE_TYPE_LOCAL ? normalizedPath : null;
const sourceUrl = sourceType === SOURCE_TYPE_REMOTE ? normalizedPath : null;
@@ -444,7 +279,7 @@ export class ImmersionTrackerService {
handleMediaTitleUpdate(mediaTitle: string | null): void {
if (!this.sessionState) return;
const normalizedTitle = this.normalizeText(mediaTitle);
const normalizedTitle = normalizeText(mediaTitle);
if (!normalizedTitle) return;
this.currentVideoKey = normalizedTitle;
this.updateVideoTitleForActiveSession(normalizedTitle);
@@ -452,10 +287,10 @@ export class ImmersionTrackerService {
recordSubtitleLine(text: string, startSec: number, endSec: number): void {
if (!this.sessionState || !text.trim()) return;
const cleaned = this.normalizeText(text);
const cleaned = normalizeText(text);
if (!cleaned) return;
const metrics = this.calculateTextMetrics(cleaned);
const metrics = calculateTextMetrics(cleaned);
this.sessionState.currentLineIndex += 1;
this.sessionState.linesSeen += 1;
this.sessionState.wordsSeen += metrics.words;
@@ -467,16 +302,19 @@ export class ImmersionTrackerService {
sessionId: this.sessionState.sessionId,
sampleMs: Date.now(),
lineIndex: this.sessionState.currentLineIndex,
segmentStartMs: this.secToMs(startSec),
segmentEndMs: this.secToMs(endSec),
segmentStartMs: secToMs(startSec),
segmentEndMs: secToMs(endSec),
wordsDelta: metrics.words,
cardsDelta: 0,
eventType: EVENT_SUBTITLE_LINE,
payloadJson: this.sanitizePayload({
event: 'subtitle-line',
text: cleaned,
words: metrics.words,
}),
payloadJson: sanitizePayload(
{
event: 'subtitle-line',
text: cleaned,
words: metrics.words,
},
this.maxPayloadBytes,
),
});
}
@@ -515,10 +353,13 @@ export class ImmersionTrackerService {
cardsDelta: 0,
segmentStartMs: this.sessionState.lastMediaMs,
segmentEndMs: mediaMs,
payloadJson: this.sanitizePayload({
fromMs: this.sessionState.lastMediaMs,
toMs: mediaMs,
}),
payloadJson: sanitizePayload(
{
fromMs: this.sessionState.lastMediaMs,
toMs: mediaMs,
},
this.maxPayloadBytes,
),
});
} else if (mediaDeltaMs < 0) {
this.sessionState.seekBackwardCount += 1;
@@ -532,10 +373,13 @@ export class ImmersionTrackerService {
cardsDelta: 0,
segmentStartMs: this.sessionState.lastMediaMs,
segmentEndMs: mediaMs,
payloadJson: this.sanitizePayload({
fromMs: this.sessionState.lastMediaMs,
toMs: mediaMs,
}),
payloadJson: sanitizePayload(
{
fromMs: this.sessionState.lastMediaMs,
toMs: mediaMs,
},
this.maxPayloadBytes,
),
});
}
}
@@ -562,7 +406,7 @@ export class ImmersionTrackerService {
eventType: EVENT_PAUSE_START,
cardsDelta: 0,
wordsDelta: 0,
payloadJson: this.sanitizePayload({ paused: true }),
payloadJson: sanitizePayload({ paused: true }, this.maxPayloadBytes),
});
} else {
if (this.sessionState.lastPauseStartMs) {
@@ -577,7 +421,7 @@ export class ImmersionTrackerService {
eventType: EVENT_PAUSE_END,
cardsDelta: 0,
wordsDelta: 0,
payloadJson: this.sanitizePayload({ paused: false }),
payloadJson: sanitizePayload({ paused: false }, this.maxPayloadBytes),
});
}
@@ -598,9 +442,12 @@ export class ImmersionTrackerService {
eventType: EVENT_LOOKUP,
cardsDelta: 0,
wordsDelta: 0,
payloadJson: this.sanitizePayload({
hit,
}),
payloadJson: sanitizePayload(
{
hit,
},
this.maxPayloadBytes,
),
});
}
@@ -615,7 +462,7 @@ export class ImmersionTrackerService {
eventType: EVENT_CARD_MINED,
wordsDelta: 0,
cardsDelta: count,
payloadJson: this.sanitizePayload({ cardsMined: count }),
payloadJson: sanitizePayload({ cardsMined: count }, this.maxPayloadBytes),
});
}
@@ -630,21 +477,22 @@ export class ImmersionTrackerService {
eventType: EVENT_MEDIA_BUFFER,
cardsDelta: 0,
wordsDelta: 0,
payloadJson: this.sanitizePayload({
buffer: true,
}),
payloadJson: sanitizePayload(
{
buffer: true,
},
this.maxPayloadBytes,
),
});
}
private recordWrite(write: QueuedWrite): void {
if (this.isDestroyed) return;
if (this.queue.length >= this.queueCap) {
const overflow = this.queue.length - this.queueCap + 1;
this.queue.splice(0, overflow);
this.droppedWriteCount += overflow;
this.logger.warn(`Immersion tracker queue overflow; dropped ${overflow} oldest writes`);
const { dropped } = enqueueWrite(this.queue, write, this.queueCap);
if (dropped > 0) {
this.droppedWriteCount += dropped;
this.logger.warn(`Immersion tracker queue overflow; dropped ${dropped} oldest writes`);
}
this.queue.push(write);
this.lastQueueWriteAtMs = Date.now();
if (write.kind === 'event' || this.queue.length >= this.batchSize) {
this.scheduleFlush(0);
@@ -909,18 +757,6 @@ export class ImmersionTrackerService {
`);
}
private resolveBoundedInt(
value: number | undefined,
fallback: number,
min: number,
max: number,
): number {
if (!Number.isFinite(value)) return fallback;
const candidate = Math.floor(value as number);
if (candidate < min || candidate > max) return fallback;
return candidate;
}
private scheduleMaintenance(): void {
this.maintenanceTimer = setInterval(() => {
this.runMaintenance();
@@ -934,21 +770,13 @@ export class ImmersionTrackerService {
this.flushTelemetry(true);
this.flushNow();
const nowMs = Date.now();
const eventCutoff = nowMs - this.eventsRetentionMs;
const telemetryCutoff = nowMs - this.telemetryRetentionMs;
const dailyCutoff = nowMs - this.dailyRollupRetentionMs;
const monthlyCutoff = nowMs - this.monthlyRollupRetentionMs;
const dayCutoff = Math.floor(dailyCutoff / 86_400_000);
const monthCutoff = this.toMonthKey(monthlyCutoff);
this.db.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`).run(eventCutoff);
this.db.prepare(`DELETE FROM imm_session_telemetry WHERE sample_ms < ?`).run(telemetryCutoff);
this.db.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`).run(dayCutoff);
this.db.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`).run(monthCutoff);
this.db
.prepare(`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`)
.run(telemetryCutoff);
this.runRollupMaintenance();
pruneRetention(this.db, nowMs, {
eventsRetentionMs: this.eventsRetentionMs,
telemetryRetentionMs: this.telemetryRetentionMs,
dailyRollupRetentionMs: this.dailyRollupRetentionMs,
monthlyRollupRetentionMs: this.monthlyRollupRetentionMs,
});
runRollupMaintenance(this.db);
if (nowMs - this.lastVacuumMs >= this.vacuumIntervalMs && !this.writeLock.locked) {
this.db.exec('VACUUM');
@@ -964,96 +792,14 @@ export class ImmersionTrackerService {
}
private runRollupMaintenance(): void {
this.db.exec(`
INSERT OR REPLACE INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards, cards_per_hour,
words_per_min, lookup_hit_rate
)
SELECT
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
s.video_id AS video_id,
COUNT(DISTINCT s.session_id) AS total_sessions,
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
CASE
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
THEN (COALESCE(SUM(t.cards_mined), 0) * 60.0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
ELSE NULL
END AS cards_per_hour,
CASE
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
THEN COALESCE(SUM(t.words_seen), 0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
ELSE NULL
END AS words_per_min,
CASE
WHEN COALESCE(SUM(t.lookup_count), 0) > 0
THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL)
ELSE NULL
END AS lookup_hit_rate
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
GROUP BY rollup_day, s.video_id
`);
this.db.exec(`
INSERT OR REPLACE INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards
)
SELECT
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
s.video_id AS video_id,
COUNT(DISTINCT s.session_id) AS total_sessions,
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
GROUP BY rollup_month, s.video_id
`);
}
private toMonthKey(timestampMs: number): number {
const monthDate = new Date(timestampMs);
return monthDate.getUTCFullYear() * 100 + monthDate.getUTCMonth() + 1;
runRollupMaintenance(this.db);
}
private startSession(videoId: number, startedAtMs?: number): void {
const nowMs = startedAtMs ?? Date.now();
const result = this.startSessionStatement(videoId, nowMs);
const sessionId = Number(result.lastInsertRowid);
this.sessionState = {
sessionId,
videoId,
startedAtMs: nowMs,
currentLineIndex: 0,
totalWatchedMs: 0,
activeWatchedMs: 0,
linesSeen: 0,
wordsSeen: 0,
tokensSeen: 0,
cardsMined: 0,
lookupCount: 0,
lookupHits: 0,
pauseCount: 0,
pauseMs: 0,
seekForwardCount: 0,
seekBackwardCount: 0,
mediaBufferEvents: 0,
lastWallClockMs: 0,
lastMediaMs: null,
lastPauseStartMs: null,
isPaused: false,
pendingTelemetry: true,
};
this.sessionState = createInitialSessionState(sessionId, videoId, nowMs);
this.recordWrite({
kind: 'telemetry',
sessionId,
@@ -1232,7 +978,7 @@ export class ImmersionTrackerService {
const stat = await fs.promises.stat(mediaPath);
return {
sourceType: SOURCE_TYPE_LOCAL,
canonicalTitle: this.deriveCanonicalTitle(mediaPath),
canonicalTitle: deriveCanonicalTitle(mediaPath),
durationMs: info.durationMs || 0,
fileSizeBytes: Number.isFinite(stat.size) ? stat.size : null,
codecId: info.codecId ?? null,
@@ -1289,10 +1035,10 @@ export class ImmersionTrackerService {
child.stderr.on('data', (chunk) => {
errorOutput += chunk.toString('utf-8');
});
child.on('error', () => resolve(this.emptyMetadata()));
child.on('error', () => resolve(emptyMetadata()));
child.on('close', () => {
if (errorOutput && output.length === 0) {
resolve(this.emptyMetadata());
resolve(emptyMetadata());
return;
}
@@ -1323,14 +1069,14 @@ export class ImmersionTrackerService {
for (const stream of parsed.streams ?? []) {
if (stream.codec_type === 'video') {
widthPx = this.toNullableInt(stream.width);
heightPx = this.toNullableInt(stream.height);
fpsX100 = this.parseFps(stream.avg_frame_rate);
codecId = this.hashToCode(stream.codec_tag_string);
widthPx = toNullableInt(stream.width);
heightPx = toNullableInt(stream.height);
fpsX100 = parseFps(stream.avg_frame_rate);
codecId = hashToCode(stream.codec_tag_string);
containerId = 0;
}
if (stream.codec_type === 'audio') {
audioCodecId = this.hashToCode(stream.codec_tag_string);
audioCodecId = hashToCode(stream.codec_tag_string);
if (audioCodecId && audioCodecId > 0) {
break;
}
@@ -1348,119 +1094,12 @@ export class ImmersionTrackerService {
audioCodecId,
});
} catch {
resolve(this.emptyMetadata());
resolve(emptyMetadata());
}
});
});
}
private emptyMetadata(): {
durationMs: number | null;
codecId: number | null;
containerId: number | null;
widthPx: number | null;
heightPx: number | null;
fpsX100: number | null;
bitrateKbps: number | null;
audioCodecId: number | null;
} {
return {
durationMs: null,
codecId: null,
containerId: null,
widthPx: null,
heightPx: null,
fpsX100: null,
bitrateKbps: null,
audioCodecId: null,
};
}
private parseFps(value?: string): number | null {
if (!value || typeof value !== 'string') return null;
const [num, den] = value.split('/');
const n = Number(num);
const d = Number(den);
if (!Number.isFinite(n) || !Number.isFinite(d) || d === 0) return null;
const fps = n / d;
return Number.isFinite(fps) ? Math.round(fps * 100) : null;
}
private hashToCode(input?: string): number | null {
if (!input) return null;
let hash = 0;
for (let i = 0; i < input.length; i += 1) {
hash = (hash * 31 + input.charCodeAt(i)) & 0x7fffffff;
}
return hash || null;
}
private sanitizePayload(payload: Record<string, unknown>): string {
const json = JSON.stringify(payload);
return json.length <= this.maxPayloadBytes ? json : JSON.stringify({ truncated: true });
}
private calculateTextMetrics(value: string): {
words: number;
tokens: number;
} {
const words = value.split(/\s+/).filter(Boolean).length;
const cjkCount = value.match(/[\u3040-\u30ff\u4e00-\u9fff]/g)?.length ?? 0;
const tokens = Math.max(words, cjkCount);
return { words, tokens };
}
private secToMs(seconds: number): number {
const coerced = Number(seconds);
if (!Number.isFinite(coerced)) return 0;
return Math.round(coerced * 1000);
}
private normalizeMediaPath(mediaPath: string | null): string {
if (!mediaPath || !mediaPath.trim()) return '';
return mediaPath.trim();
}
private normalizeText(value: string | null | undefined): string {
if (!value) return '';
return value.trim().replace(/\s+/g, ' ');
}
private buildVideoKey(mediaPath: string, sourceType: number): string {
if (sourceType === SOURCE_TYPE_REMOTE) {
return `remote:${mediaPath}`;
}
return `local:${mediaPath}`;
}
private isRemoteSource(mediaPath: string): boolean {
return /^[a-z][a-z0-9+.-]*:\/\//i.test(mediaPath);
}
private deriveCanonicalTitle(mediaPath: string): string {
if (this.isRemoteSource(mediaPath)) {
try {
const parsed = new URL(mediaPath);
const parts = parsed.pathname.split('/').filter(Boolean);
if (parts.length > 0) {
const leaf = decodeURIComponent(parts[parts.length - 1]!);
return this.normalizeText(leaf.replace(/\.[^/.]+$/, ''));
}
return this.normalizeText(parsed.hostname) || 'unknown';
} catch {
return this.normalizeText(mediaPath);
}
}
const filename = path.basename(mediaPath);
return this.normalizeText(filename.replace(/\.[^/.]+$/, ''));
}
private toNullableInt(value: number | null | undefined): number | null {
if (value === null || value === undefined || !Number.isFinite(value)) return null;
return value;
}
private updateVideoTitleForActiveSession(canonicalTitle: string): void {
if (!this.sessionState) return;
this.db

View File

@@ -0,0 +1,90 @@
import type { DatabaseSync } from 'node:sqlite';
export function toMonthKey(timestampMs: number): number {
const monthDate = new Date(timestampMs);
return monthDate.getUTCFullYear() * 100 + monthDate.getUTCMonth() + 1;
}
export function pruneRetention(
db: DatabaseSync,
nowMs: number,
policy: {
eventsRetentionMs: number;
telemetryRetentionMs: number;
dailyRollupRetentionMs: number;
monthlyRollupRetentionMs: number;
},
): void {
const eventCutoff = nowMs - policy.eventsRetentionMs;
const telemetryCutoff = nowMs - policy.telemetryRetentionMs;
const dailyCutoff = nowMs - policy.dailyRollupRetentionMs;
const monthlyCutoff = nowMs - policy.monthlyRollupRetentionMs;
const dayCutoff = Math.floor(dailyCutoff / 86_400_000);
const monthCutoff = toMonthKey(monthlyCutoff);
db.prepare(`DELETE FROM imm_session_events WHERE ts_ms < ?`).run(eventCutoff);
db.prepare(`DELETE FROM imm_session_telemetry WHERE sample_ms < ?`).run(telemetryCutoff);
db.prepare(`DELETE FROM imm_daily_rollups WHERE rollup_day < ?`).run(dayCutoff);
db.prepare(`DELETE FROM imm_monthly_rollups WHERE rollup_month < ?`).run(monthCutoff);
db.prepare(`DELETE FROM imm_sessions WHERE ended_at_ms IS NOT NULL AND ended_at_ms < ?`).run(
telemetryCutoff,
);
}
export function runRollupMaintenance(db: DatabaseSync): void {
db.exec(`
INSERT OR REPLACE INTO imm_daily_rollups (
rollup_day, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards, cards_per_hour,
words_per_min, lookup_hit_rate
)
SELECT
CAST(s.started_at_ms / 86400000 AS INTEGER) AS rollup_day,
s.video_id AS video_id,
COUNT(DISTINCT s.session_id) AS total_sessions,
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards,
CASE
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
THEN (COALESCE(SUM(t.cards_mined), 0) * 60.0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
ELSE NULL
END AS cards_per_hour,
CASE
WHEN COALESCE(SUM(t.active_watched_ms), 0) > 0
THEN COALESCE(SUM(t.words_seen), 0) / (COALESCE(SUM(t.active_watched_ms), 0) / 60000.0)
ELSE NULL
END AS words_per_min,
CASE
WHEN COALESCE(SUM(t.lookup_count), 0) > 0
THEN CAST(COALESCE(SUM(t.lookup_hits), 0) AS REAL) / CAST(SUM(t.lookup_count) AS REAL)
ELSE NULL
END AS lookup_hit_rate
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
GROUP BY rollup_day, s.video_id
`);
db.exec(`
INSERT OR REPLACE INTO imm_monthly_rollups (
rollup_month, video_id, total_sessions, total_active_min, total_lines_seen,
total_words_seen, total_tokens_seen, total_cards
)
SELECT
CAST(strftime('%Y%m', s.started_at_ms / 1000, 'unixepoch') AS INTEGER) AS rollup_month,
s.video_id AS video_id,
COUNT(DISTINCT s.session_id) AS total_sessions,
COALESCE(SUM(t.active_watched_ms), 0) / 60000.0 AS total_active_min,
COALESCE(SUM(t.lines_seen), 0) AS total_lines_seen,
COALESCE(SUM(t.words_seen), 0) AS total_words_seen,
COALESCE(SUM(t.tokens_seen), 0) AS total_tokens_seen,
COALESCE(SUM(t.cards_mined), 0) AS total_cards
FROM imm_sessions s
JOIN imm_session_telemetry t
ON t.session_id = s.session_id
GROUP BY rollup_month, s.video_id
`);
}

View File

@@ -0,0 +1,104 @@
import type { DatabaseSync } from 'node:sqlite';
import type {
ImmersionSessionRollupRow,
SessionSummaryQueryRow,
SessionTimelineRow,
} from './types';
export function getSessionSummaries(db: DatabaseSync, limit = 50): SessionSummaryQueryRow[] {
const prepared = db.prepare(`
SELECT
s.video_id AS videoId,
s.started_at_ms AS startedAtMs,
s.ended_at_ms AS endedAtMs,
COALESCE(SUM(t.total_watched_ms), 0) AS totalWatchedMs,
COALESCE(SUM(t.active_watched_ms), 0) AS activeWatchedMs,
COALESCE(SUM(t.lines_seen), 0) AS linesSeen,
COALESCE(SUM(t.words_seen), 0) AS wordsSeen,
COALESCE(SUM(t.tokens_seen), 0) AS tokensSeen,
COALESCE(SUM(t.cards_mined), 0) AS cardsMined,
COALESCE(SUM(t.lookup_count), 0) AS lookupCount,
COALESCE(SUM(t.lookup_hits), 0) AS lookupHits
FROM imm_sessions s
LEFT JOIN imm_session_telemetry t ON t.session_id = s.session_id
GROUP BY s.session_id
ORDER BY s.started_at_ms DESC
LIMIT ?
`);
return prepared.all(limit) as unknown as SessionSummaryQueryRow[];
}
export function getSessionTimeline(
db: DatabaseSync,
sessionId: number,
limit = 200,
): SessionTimelineRow[] {
const prepared = db.prepare(`
SELECT
sample_ms AS sampleMs,
total_watched_ms AS totalWatchedMs,
active_watched_ms AS activeWatchedMs,
lines_seen AS linesSeen,
words_seen AS wordsSeen,
tokens_seen AS tokensSeen,
cards_mined AS cardsMined
FROM imm_session_telemetry
WHERE session_id = ?
ORDER BY sample_ms DESC
LIMIT ?
`);
return prepared.all(sessionId, limit) as unknown as SessionTimelineRow[];
}
export function getQueryHints(db: DatabaseSync): {
totalSessions: number;
activeSessions: number;
} {
const sessions = db.prepare('SELECT COUNT(*) AS total FROM imm_sessions');
const active = db.prepare('SELECT COUNT(*) AS total FROM imm_sessions WHERE ended_at_ms IS NULL');
const totalSessions = Number(sessions.get()?.total ?? 0);
const activeSessions = Number(active.get()?.total ?? 0);
return { totalSessions, activeSessions };
}
export function getDailyRollups(db: DatabaseSync, limit = 60): ImmersionSessionRollupRow[] {
const prepared = db.prepare(`
SELECT
rollup_day AS rollupDayOrMonth,
video_id AS videoId,
total_sessions AS totalSessions,
total_active_min AS totalActiveMin,
total_lines_seen AS totalLinesSeen,
total_words_seen AS totalWordsSeen,
total_tokens_seen AS totalTokensSeen,
total_cards AS totalCards,
cards_per_hour AS cardsPerHour,
words_per_min AS wordsPerMin,
lookup_hit_rate AS lookupHitRate
FROM imm_daily_rollups
ORDER BY rollup_day DESC, video_id DESC
LIMIT ?
`);
return prepared.all(limit) as unknown as ImmersionSessionRollupRow[];
}
export function getMonthlyRollups(db: DatabaseSync, limit = 24): ImmersionSessionRollupRow[] {
const prepared = db.prepare(`
SELECT
rollup_month AS rollupDayOrMonth,
video_id AS videoId,
total_sessions AS totalSessions,
total_active_min AS totalActiveMin,
total_lines_seen AS totalLinesSeen,
total_words_seen AS totalWordsSeen,
total_tokens_seen AS totalTokensSeen,
total_cards AS totalCards,
0 AS cardsPerHour,
0 AS wordsPerMin,
0 AS lookupHitRate
FROM imm_monthly_rollups
ORDER BY rollup_month DESC, video_id DESC
LIMIT ?
`);
return prepared.all(limit) as unknown as ImmersionSessionRollupRow[];
}

View File

@@ -0,0 +1,19 @@
import type { QueuedWrite } from './types';
export function enqueueWrite(
queue: QueuedWrite[],
write: QueuedWrite,
queueCap: number,
): {
dropped: number;
queueLength: number;
} {
let dropped = 0;
if (queue.length >= queueCap) {
const overflow = queue.length - queueCap + 1;
queue.splice(0, overflow);
dropped = overflow;
}
queue.push(write);
return { dropped, queueLength: queue.length };
}

View File

@@ -0,0 +1,144 @@
import path from 'node:path';
import type { ProbeMetadata, SessionState } from './types';
import { SOURCE_TYPE_REMOTE } from './types';
export function createInitialSessionState(
sessionId: number,
videoId: number,
startedAtMs: number,
): SessionState {
return {
sessionId,
videoId,
startedAtMs,
currentLineIndex: 0,
totalWatchedMs: 0,
activeWatchedMs: 0,
linesSeen: 0,
wordsSeen: 0,
tokensSeen: 0,
cardsMined: 0,
lookupCount: 0,
lookupHits: 0,
pauseCount: 0,
pauseMs: 0,
seekForwardCount: 0,
seekBackwardCount: 0,
mediaBufferEvents: 0,
lastWallClockMs: 0,
lastMediaMs: null,
lastPauseStartMs: null,
isPaused: false,
pendingTelemetry: true,
};
}
export function resolveBoundedInt(
value: number | undefined,
fallback: number,
min: number,
max: number,
): number {
if (!Number.isFinite(value)) return fallback;
const candidate = Math.floor(value as number);
if (candidate < min || candidate > max) return fallback;
return candidate;
}
export function sanitizePayload(payload: Record<string, unknown>, maxPayloadBytes: number): string {
const json = JSON.stringify(payload);
return json.length <= maxPayloadBytes ? json : JSON.stringify({ truncated: true });
}
export function calculateTextMetrics(value: string): {
words: number;
tokens: number;
} {
const words = value.split(/\s+/).filter(Boolean).length;
const cjkCount = value.match(/[\u3040-\u30ff\u4e00-\u9fff]/g)?.length ?? 0;
const tokens = Math.max(words, cjkCount);
return { words, tokens };
}
export function secToMs(seconds: number): number {
const coerced = Number(seconds);
if (!Number.isFinite(coerced)) return 0;
return Math.round(coerced * 1000);
}
export function normalizeMediaPath(mediaPath: string | null): string {
if (!mediaPath || !mediaPath.trim()) return '';
return mediaPath.trim();
}
export function normalizeText(value: string | null | undefined): string {
if (!value) return '';
return value.trim().replace(/\s+/g, ' ');
}
export function buildVideoKey(mediaPath: string, sourceType: number): string {
if (sourceType === SOURCE_TYPE_REMOTE) {
return `remote:${mediaPath}`;
}
return `local:${mediaPath}`;
}
export function isRemoteSource(mediaPath: string): boolean {
return /^[a-z][a-z0-9+.-]*:\/\//i.test(mediaPath);
}
export function deriveCanonicalTitle(mediaPath: string): string {
if (isRemoteSource(mediaPath)) {
try {
const parsed = new URL(mediaPath);
const parts = parsed.pathname.split('/').filter(Boolean);
if (parts.length > 0) {
const leaf = decodeURIComponent(parts[parts.length - 1]!);
return normalizeText(leaf.replace(/\.[^/.]+$/, ''));
}
return normalizeText(parsed.hostname) || 'unknown';
} catch {
return normalizeText(mediaPath);
}
}
const filename = path.basename(mediaPath);
return normalizeText(filename.replace(/\.[^/.]+$/, ''));
}
export function parseFps(value?: string): number | null {
if (!value || typeof value !== 'string') return null;
const [num, den] = value.split('/');
const n = Number(num);
const d = Number(den);
if (!Number.isFinite(n) || !Number.isFinite(d) || d === 0) return null;
const fps = n / d;
return Number.isFinite(fps) ? Math.round(fps * 100) : null;
}
export function hashToCode(input?: string): number | null {
if (!input) return null;
let hash = 0;
for (let i = 0; i < input.length; i += 1) {
hash = (hash * 31 + input.charCodeAt(i)) & 0x7fffffff;
}
return hash || null;
}
export function emptyMetadata(): ProbeMetadata {
return {
durationMs: null,
codecId: null,
containerId: null,
widthPx: null,
heightPx: null,
fpsX100: null,
bitrateKbps: null,
audioCodecId: null,
};
}
export function toNullableInt(value: number | null | undefined): number | null {
if (value === null || value === undefined || !Number.isFinite(value)) return null;
return value;
}

View File

@@ -0,0 +1,167 @@
export const SCHEMA_VERSION = 1;
export const DEFAULT_QUEUE_CAP = 1_000;
export const DEFAULT_BATCH_SIZE = 25;
export const DEFAULT_FLUSH_INTERVAL_MS = 500;
export const DEFAULT_MAINTENANCE_INTERVAL_MS = 24 * 60 * 60 * 1000;
const ONE_WEEK_MS = 7 * 24 * 60 * 60 * 1000;
export const DEFAULT_EVENTS_RETENTION_MS = ONE_WEEK_MS;
export const DEFAULT_VACUUM_INTERVAL_MS = ONE_WEEK_MS;
export const DEFAULT_TELEMETRY_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
export const DEFAULT_DAILY_ROLLUP_RETENTION_MS = 365 * 24 * 60 * 60 * 1000;
export const DEFAULT_MONTHLY_ROLLUP_RETENTION_MS = 5 * 365 * 24 * 60 * 60 * 1000;
export const DEFAULT_MAX_PAYLOAD_BYTES = 256;
export const SOURCE_TYPE_LOCAL = 1;
export const SOURCE_TYPE_REMOTE = 2;
export const SESSION_STATUS_ACTIVE = 1;
export const SESSION_STATUS_ENDED = 2;
export const EVENT_SUBTITLE_LINE = 1;
export const EVENT_MEDIA_BUFFER = 2;
export const EVENT_LOOKUP = 3;
export const EVENT_CARD_MINED = 4;
export const EVENT_SEEK_FORWARD = 5;
export const EVENT_SEEK_BACKWARD = 6;
export const EVENT_PAUSE_START = 7;
export const EVENT_PAUSE_END = 8;
export interface ImmersionTrackerOptions {
dbPath: string;
policy?: ImmersionTrackerPolicy;
}
export interface ImmersionTrackerPolicy {
queueCap?: number;
batchSize?: number;
flushIntervalMs?: number;
maintenanceIntervalMs?: number;
payloadCapBytes?: number;
retention?: {
eventsDays?: number;
telemetryDays?: number;
dailyRollupsDays?: number;
monthlyRollupsDays?: number;
vacuumIntervalDays?: number;
};
}
export interface TelemetryAccumulator {
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
pauseCount: number;
pauseMs: number;
seekForwardCount: number;
seekBackwardCount: number;
mediaBufferEvents: number;
}
export interface SessionState extends TelemetryAccumulator {
sessionId: number;
videoId: number;
startedAtMs: number;
currentLineIndex: number;
lastWallClockMs: number;
lastMediaMs: number | null;
lastPauseStartMs: number | null;
isPaused: boolean;
pendingTelemetry: boolean;
}
export interface QueuedWrite {
kind: 'telemetry' | 'event';
sessionId: number;
sampleMs?: number;
totalWatchedMs?: number;
activeWatchedMs?: number;
linesSeen?: number;
wordsSeen?: number;
tokensSeen?: number;
cardsMined?: number;
lookupCount?: number;
lookupHits?: number;
pauseCount?: number;
pauseMs?: number;
seekForwardCount?: number;
seekBackwardCount?: number;
mediaBufferEvents?: number;
eventType?: number;
lineIndex?: number | null;
segmentStartMs?: number | null;
segmentEndMs?: number | null;
wordsDelta?: number;
cardsDelta?: number;
payloadJson?: string | null;
}
export interface VideoMetadata {
sourceType: number;
canonicalTitle: string;
durationMs: number;
fileSizeBytes: number | null;
codecId: number | null;
containerId: number | null;
widthPx: number | null;
heightPx: number | null;
fpsX100: number | null;
bitrateKbps: number | null;
audioCodecId: number | null;
hashSha256: string | null;
screenshotPath: string | null;
metadataJson: string | null;
}
export interface SessionSummaryQueryRow {
videoId: number | null;
startedAtMs: number;
endedAtMs: number | null;
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
lookupCount: number;
lookupHits: number;
}
export interface SessionTimelineRow {
sampleMs: number;
totalWatchedMs: number;
activeWatchedMs: number;
linesSeen: number;
wordsSeen: number;
tokensSeen: number;
cardsMined: number;
}
export interface ImmersionSessionRollupRow {
rollupDayOrMonth: number;
videoId: number | null;
totalSessions: number;
totalActiveMin: number;
totalLinesSeen: number;
totalWordsSeen: number;
totalTokensSeen: number;
totalCards: number;
cardsPerHour: number | null;
wordsPerMin: number | null;
lookupHitRate: number | null;
}
export interface ProbeMetadata {
durationMs: number | null;
codecId: number | null;
containerId: number | null;
widthPx: number | null;
heightPx: number | null;
fpsX100: number | null;
bitrateKbps: number | null;
audioCodecId: number | null;
}