import { AnkiConnectConfig } from '../types'; import { getConfiguredWordFieldName } from '../anki-field-config'; interface FieldGroupingMergeMedia { audioField?: string; audioValue?: string; imageField?: string; imageValue?: string; miscInfoValue?: string; } export interface FieldGroupingMergeNoteInfo { noteId: number; fields: Record; } interface FieldGroupingMergeDeps { getConfig: () => AnkiConnectConfig; getEffectiveSentenceCardConfig: () => { sentenceField: string; audioField: string; }; getCurrentSubtitleText: () => string | undefined; resolveFieldName: (availableFieldNames: string[], preferredName: string) => string | null; resolveNoteFieldName: ( noteInfo: FieldGroupingMergeNoteInfo, preferredName?: string, ) => string | null; extractFields: (fields: Record) => Record; processSentence: (mpvSentence: string, noteFields: Record) => string; generateMediaForMerge: (noteInfo: FieldGroupingMergeNoteInfo) => Promise; warnFieldParseOnce: (fieldName: string, reason: string, detail?: string) => void; } export class FieldGroupingMergeCollaborator { private readonly strictGroupingFieldDefaults = new Set([ 'picture', 'sentence', 'sentenceaudio', 'sentencefurigana', 'miscinfo', ]); constructor(private readonly deps: FieldGroupingMergeDeps) {} getGroupableFieldNames(): string[] { const config = this.deps.getConfig(); const fields: string[] = []; fields.push('Sentence'); fields.push('SentenceAudio'); fields.push('Picture'); if (config.fields?.image) fields.push(config.fields?.image); if (config.fields?.sentence) fields.push(config.fields?.sentence); if (config.fields?.audio && config.fields?.audio.toLowerCase() !== 'expressionaudio') { fields.push(config.fields?.audio); } const sentenceCardConfig = this.deps.getEffectiveSentenceCardConfig(); const sentenceAudioField = sentenceCardConfig.audioField; if (!fields.includes(sentenceAudioField)) fields.push(sentenceAudioField); if (config.fields?.miscInfo) fields.push(config.fields?.miscInfo); fields.push('SentenceFurigana'); return fields; } getNoteFieldMap(noteInfo: FieldGroupingMergeNoteInfo): Record { const fields: Record = {}; for (const [name, field] of Object.entries(noteInfo.fields)) { fields[name] = field?.value || ''; } return fields; } async computeFieldGroupingMergedFields( keepNoteId: number, deleteNoteId: number, keepNoteInfo: FieldGroupingMergeNoteInfo, deleteNoteInfo: FieldGroupingMergeNoteInfo, includeGeneratedMedia: boolean, ): Promise> { const config = this.deps.getConfig(); const configuredWordField = getConfiguredWordFieldName(config); const groupableFields = this.getGroupableFieldNames(); const keepFieldNames = Object.keys(keepNoteInfo.fields); const sourceFields: Record = {}; const resolvedKeepFieldByPreferred = new Map(); for (const preferredFieldName of groupableFields) { sourceFields[preferredFieldName] = this.getResolvedFieldValue( deleteNoteInfo, preferredFieldName, ); const keepResolved = this.deps.resolveFieldName(keepFieldNames, preferredFieldName); if (keepResolved) { resolvedKeepFieldByPreferred.set(preferredFieldName, keepResolved); } } if (!sourceFields['SentenceFurigana'] && sourceFields['Sentence']) { sourceFields['SentenceFurigana'] = sourceFields['Sentence']; } if (!sourceFields['Sentence'] && sourceFields['SentenceFurigana']) { sourceFields['Sentence'] = sourceFields['SentenceFurigana']; } if (!sourceFields[configuredWordField] && sourceFields['Expression']) { sourceFields[configuredWordField] = sourceFields['Expression']; } if (!sourceFields[configuredWordField] && sourceFields['Word']) { sourceFields[configuredWordField] = sourceFields['Word']; } if (!sourceFields['Expression'] && sourceFields[configuredWordField]) { sourceFields['Expression'] = sourceFields[configuredWordField]; } if (!sourceFields['Word'] && sourceFields[configuredWordField]) { sourceFields['Word'] = sourceFields[configuredWordField]; } if (!sourceFields['SentenceAudio'] && sourceFields['ExpressionAudio']) { sourceFields['SentenceAudio'] = sourceFields['ExpressionAudio']; } if (!sourceFields['ExpressionAudio'] && sourceFields['SentenceAudio']) { sourceFields['ExpressionAudio'] = sourceFields['SentenceAudio']; } if ( config.fields?.sentence && !sourceFields[config.fields?.sentence] && this.deps.getCurrentSubtitleText() ) { const deleteFields = this.deps.extractFields(deleteNoteInfo.fields); sourceFields[config.fields?.sentence] = this.deps.processSentence( this.deps.getCurrentSubtitleText()!, deleteFields, ); } if (includeGeneratedMedia) { const media = await this.deps.generateMediaForMerge(keepNoteInfo); if (media.audioField && media.audioValue && !sourceFields[media.audioField]) { sourceFields[media.audioField] = media.audioValue; } if (media.imageField && media.imageValue && !sourceFields[media.imageField]) { sourceFields[media.imageField] = media.imageValue; } if ( config.fields?.miscInfo && media.miscInfoValue && !sourceFields[config.fields?.miscInfo] ) { sourceFields[config.fields?.miscInfo] = media.miscInfoValue; } } const mergedFields: Record = {}; for (const preferredFieldName of groupableFields) { const keepFieldName = resolvedKeepFieldByPreferred.get(preferredFieldName); if (!keepFieldName) continue; const keepFieldNormalized = keepFieldName.toLowerCase(); if ( keepFieldNormalized === 'expression' || keepFieldNormalized === configuredWordField.toLowerCase() || keepFieldNormalized === 'expressionfurigana' || keepFieldNormalized === 'expressionreading' || keepFieldNormalized === 'expressionaudio' ) { continue; } const existingValue = keepNoteInfo.fields[keepFieldName]?.value || ''; const newValue = sourceFields[preferredFieldName] || ''; const isStrictField = this.shouldUseStrictSpanGrouping(keepFieldName); if (!existingValue.trim() && !newValue.trim()) continue; if (isStrictField) { mergedFields[keepFieldName] = this.applyFieldGrouping( existingValue, newValue, keepNoteId, deleteNoteId, keepFieldName, ); } else if (existingValue.trim() && newValue.trim()) { mergedFields[keepFieldName] = this.applyFieldGrouping( existingValue, newValue, keepNoteId, deleteNoteId, keepFieldName, ); } else { if (!newValue.trim()) continue; mergedFields[keepFieldName] = newValue; } } const sentenceCardConfig = this.deps.getEffectiveSentenceCardConfig(); const resolvedSentenceAudioField = this.deps.resolveFieldName( keepFieldNames, sentenceCardConfig.audioField || 'SentenceAudio', ); const resolvedExpressionAudioField = this.deps.resolveFieldName( keepFieldNames, config.fields?.audio || 'ExpressionAudio', ); if ( resolvedSentenceAudioField && resolvedExpressionAudioField && resolvedExpressionAudioField !== resolvedSentenceAudioField ) { const mergedSentenceAudioValue = mergedFields[resolvedSentenceAudioField] || keepNoteInfo.fields[resolvedSentenceAudioField]?.value || ''; if (mergedSentenceAudioValue.trim()) { mergedFields[resolvedExpressionAudioField] = mergedSentenceAudioValue; } } return mergedFields; } private getResolvedFieldValue( noteInfo: FieldGroupingMergeNoteInfo, preferredFieldName?: string, ): string { if (!preferredFieldName) return ''; const resolved = this.deps.resolveNoteFieldName(noteInfo, preferredFieldName); if (!resolved) return ''; return noteInfo.fields[resolved]?.value || ''; } private extractUngroupedValue(value: string): string { const groupedSpanRegex = /[\s\S]*?<\/span>/gi; const ungrouped = value.replace(groupedSpanRegex, '').trim(); if (ungrouped) return ungrouped; return value.trim(); } private extractLastSoundTag(value: string): string { const matches = value.match(/\[sound:[^\]]+\]/g); if (!matches || matches.length === 0) return ''; return matches[matches.length - 1]!; } private extractLastImageTag(value: string): string { const matches = value.match(/]*>/gi); if (!matches || matches.length === 0) return ''; return matches[matches.length - 1]!; } private extractImageTags(value: string): string[] { const matches = value.match(/]*>/gi); return matches || []; } private ensureImageGroupId(imageTag: string, groupId: number): string { if (!imageTag) return ''; if (/data-group-id=/i.test(imageTag)) { return imageTag.replace(/data-group-id="[^"]*"/i, `data-group-id="${groupId}"`); } return imageTag.replace(/]*data-group-id="([^"]*)"[^>]*>/gi; let malformed; while ((malformed = malformedIdRegex.exec(value)) !== null) { const rawId = malformed[1]; const groupId = Number(rawId); if (!Number.isFinite(groupId) || groupId <= 0) { this.deps.warnFieldParseOnce(fieldName, 'invalid-group-id', rawId); } } const spanRegex = /]*>([\s\S]*?)<\/span>/gi; let match; while ((match = spanRegex.exec(value)) !== null) { const groupId = Number(match[1]); if (!Number.isFinite(groupId) || groupId <= 0) continue; const content = this.normalizeStrictGroupedValue(match[2] || '', fieldName); if (!content) { this.deps.warnFieldParseOnce(fieldName, 'empty-group-content'); continue; } entries.push({ groupId, content }); } if (entries.length === 0 && /(); for (const entry of entries) { const key = entry.content; if (seen.has(key)) continue; seen.add(key); unique.push(entry); } return unique; } private parsePictureEntries( value: string, fallbackGroupId: number, ): { groupId: number; tag: string }[] { const tags = this.extractImageTags(value); const result: { groupId: number; tag: string }[] = []; for (const tag of tags) { const idMatch = tag.match(/data-group-id="(\d+)"/i); let groupId = fallbackGroupId; if (idMatch) { const parsed = Number(idMatch[1]); if (!Number.isFinite(parsed) || parsed <= 0) { this.deps.warnFieldParseOnce('Picture', 'invalid-group-id', idMatch[1]); } else { groupId = parsed; } } const normalizedTag = this.ensureImageGroupId(tag, groupId); if (!normalizedTag) { this.deps.warnFieldParseOnce('Picture', 'empty-image-tag'); continue; } result.push({ groupId, tag: normalizedTag }); } return result; } private normalizeStrictGroupedValue(value: string, fieldName: string): string { const ungrouped = this.extractUngroupedValue(value); if (!ungrouped) return ''; const normalizedField = fieldName.toLowerCase(); if (normalizedField === 'sentenceaudio' || normalizedField === 'expressionaudio') { const lastSoundTag = this.extractLastSoundTag(ungrouped); if (!lastSoundTag) { this.deps.warnFieldParseOnce(fieldName, 'missing-sound-tag'); } return lastSoundTag || ungrouped; } if (normalizedField === 'picture') { const lastImageTag = this.extractLastImageTag(ungrouped); if (!lastImageTag) { this.deps.warnFieldParseOnce(fieldName, 'missing-image-tag'); } return lastImageTag || ungrouped; } return ungrouped; } private getPictureDedupKey(tag: string): string { return tag.replace(/\sdata-group-id="[^"]*"/gi, '').trim(); } private getStrictSpanGroupingFields(): Set { const strictFields = new Set(this.strictGroupingFieldDefaults); const sentenceCardConfig = this.deps.getEffectiveSentenceCardConfig(); strictFields.add((sentenceCardConfig.sentenceField || 'sentence').toLowerCase()); strictFields.add((sentenceCardConfig.audioField || 'sentenceaudio').toLowerCase()); const config = this.deps.getConfig(); if (config.fields?.image) strictFields.add(config.fields.image.toLowerCase()); if (config.fields?.miscInfo) strictFields.add(config.fields.miscInfo.toLowerCase()); return strictFields; } private shouldUseStrictSpanGrouping(fieldName: string): boolean { const normalized = fieldName.toLowerCase(); return this.getStrictSpanGroupingFields().has(normalized); } private applyFieldGrouping( existingValue: string, newValue: string, keepGroupId: number, sourceGroupId: number, fieldName: string, ): string { if (this.shouldUseStrictSpanGrouping(fieldName)) { if (fieldName.toLowerCase() === 'picture') { const keepEntries = this.parsePictureEntries(existingValue, keepGroupId); const sourceEntries = this.parsePictureEntries(newValue, sourceGroupId); if (keepEntries.length === 0 && sourceEntries.length === 0) { return existingValue || newValue; } const mergedTags = keepEntries.map((entry) => this.ensureImageGroupId(entry.tag, entry.groupId), ); const seen = new Set(mergedTags.map((tag) => this.getPictureDedupKey(tag))); for (const entry of sourceEntries) { const normalized = this.ensureImageGroupId(entry.tag, entry.groupId); const dedupKey = this.getPictureDedupKey(normalized); if (seen.has(dedupKey)) continue; seen.add(dedupKey); mergedTags.push(normalized); } return mergedTags.join(''); } const keepEntries = this.parseStrictEntries(existingValue, keepGroupId, fieldName); const sourceEntries = this.parseStrictEntries(newValue, sourceGroupId, fieldName); if (keepEntries.length === 0 && sourceEntries.length === 0) { return existingValue || newValue; } if (sourceEntries.length === 0) { return keepEntries .map((entry) => `${entry.content}`) .join(''); } const merged = [...keepEntries]; const seen = new Set(keepEntries.map((entry) => entry.content)); for (const entry of sourceEntries) { const key = entry.content; if (seen.has(key)) continue; seen.add(key); merged.push(entry); } if (merged.length === 0) return existingValue; return merged .map((entry) => `${entry.content}`) .join(''); } if (!existingValue.trim()) return newValue; if (!newValue.trim()) return existingValue; const hasGroups = /data-group-id/.test(existingValue); if (!hasGroups) { return `${existingValue}\n` + newValue; } const groupedSpanRegex = /[\s\S]*?<\/span>/g; let lastEnd = 0; let result = ''; let match; while ((match = groupedSpanRegex.exec(existingValue)) !== null) { const before = existingValue.slice(lastEnd, match.index); if (before.trim()) { result += `${before.trim()}\n`; } result += match[0] + '\n'; lastEnd = match.index + match[0].length; } const after = existingValue.slice(lastEnd); if (after.trim()) { result += `\n${after.trim()}`; } return result + '\n' + newValue; } }