mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-21 12:11:28 -07:00
feat(core): add Electron runtime, services, and app composition
This commit is contained in:
461
src/anki-integration/field-grouping-merge.ts
Normal file
461
src/anki-integration/field-grouping-merge.ts
Normal file
@@ -0,0 +1,461 @@
|
||||
import { AnkiConnectConfig } from '../types';
|
||||
|
||||
interface FieldGroupingMergeMedia {
|
||||
audioField?: string;
|
||||
audioValue?: string;
|
||||
imageField?: string;
|
||||
imageValue?: string;
|
||||
miscInfoValue?: string;
|
||||
}
|
||||
|
||||
export interface FieldGroupingMergeNoteInfo {
|
||||
noteId: number;
|
||||
fields: Record<string, { value: string }>;
|
||||
}
|
||||
|
||||
interface FieldGroupingMergeDeps {
|
||||
getConfig: () => AnkiConnectConfig;
|
||||
getEffectiveSentenceCardConfig: () => {
|
||||
sentenceField: string;
|
||||
audioField: string;
|
||||
};
|
||||
getCurrentSubtitleText: () => string | undefined;
|
||||
resolveFieldName: (availableFieldNames: string[], preferredName: string) => string | null;
|
||||
resolveNoteFieldName: (
|
||||
noteInfo: FieldGroupingMergeNoteInfo,
|
||||
preferredName?: string,
|
||||
) => string | null;
|
||||
extractFields: (fields: Record<string, { value: string }>) => Record<string, string>;
|
||||
processSentence: (mpvSentence: string, noteFields: Record<string, string>) => string;
|
||||
generateMediaForMerge: () => Promise<FieldGroupingMergeMedia>;
|
||||
warnFieldParseOnce: (fieldName: string, reason: string, detail?: string) => void;
|
||||
}
|
||||
|
||||
export class FieldGroupingMergeCollaborator {
|
||||
private readonly strictGroupingFieldDefaults = new Set<string>([
|
||||
'picture',
|
||||
'sentence',
|
||||
'sentenceaudio',
|
||||
'sentencefurigana',
|
||||
'miscinfo',
|
||||
]);
|
||||
|
||||
constructor(private readonly deps: FieldGroupingMergeDeps) {}
|
||||
|
||||
getGroupableFieldNames(): string[] {
|
||||
const config = this.deps.getConfig();
|
||||
const fields: string[] = [];
|
||||
fields.push('Sentence');
|
||||
fields.push('SentenceAudio');
|
||||
fields.push('Picture');
|
||||
if (config.fields?.image) fields.push(config.fields?.image);
|
||||
if (config.fields?.sentence) fields.push(config.fields?.sentence);
|
||||
if (config.fields?.audio && config.fields?.audio.toLowerCase() !== 'expressionaudio') {
|
||||
fields.push(config.fields?.audio);
|
||||
}
|
||||
const sentenceCardConfig = this.deps.getEffectiveSentenceCardConfig();
|
||||
const sentenceAudioField = sentenceCardConfig.audioField;
|
||||
if (!fields.includes(sentenceAudioField)) fields.push(sentenceAudioField);
|
||||
if (config.fields?.miscInfo) fields.push(config.fields?.miscInfo);
|
||||
fields.push('SentenceFurigana');
|
||||
return fields;
|
||||
}
|
||||
|
||||
getNoteFieldMap(noteInfo: FieldGroupingMergeNoteInfo): Record<string, string> {
|
||||
const fields: Record<string, string> = {};
|
||||
for (const [name, field] of Object.entries(noteInfo.fields)) {
|
||||
fields[name] = field?.value || '';
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
async computeFieldGroupingMergedFields(
|
||||
keepNoteId: number,
|
||||
deleteNoteId: number,
|
||||
keepNoteInfo: FieldGroupingMergeNoteInfo,
|
||||
deleteNoteInfo: FieldGroupingMergeNoteInfo,
|
||||
includeGeneratedMedia: boolean,
|
||||
): Promise<Record<string, string>> {
|
||||
const config = this.deps.getConfig();
|
||||
const groupableFields = this.getGroupableFieldNames();
|
||||
const keepFieldNames = Object.keys(keepNoteInfo.fields);
|
||||
const sourceFields: Record<string, string> = {};
|
||||
const resolvedKeepFieldByPreferred = new Map<string, string>();
|
||||
for (const preferredFieldName of groupableFields) {
|
||||
sourceFields[preferredFieldName] = this.getResolvedFieldValue(
|
||||
deleteNoteInfo,
|
||||
preferredFieldName,
|
||||
);
|
||||
const keepResolved = this.deps.resolveFieldName(keepFieldNames, preferredFieldName);
|
||||
if (keepResolved) {
|
||||
resolvedKeepFieldByPreferred.set(preferredFieldName, keepResolved);
|
||||
}
|
||||
}
|
||||
|
||||
if (!sourceFields['SentenceFurigana'] && sourceFields['Sentence']) {
|
||||
sourceFields['SentenceFurigana'] = sourceFields['Sentence'];
|
||||
}
|
||||
if (!sourceFields['Sentence'] && sourceFields['SentenceFurigana']) {
|
||||
sourceFields['Sentence'] = sourceFields['SentenceFurigana'];
|
||||
}
|
||||
if (!sourceFields['Expression'] && sourceFields['Word']) {
|
||||
sourceFields['Expression'] = sourceFields['Word'];
|
||||
}
|
||||
if (!sourceFields['Word'] && sourceFields['Expression']) {
|
||||
sourceFields['Word'] = sourceFields['Expression'];
|
||||
}
|
||||
if (!sourceFields['SentenceAudio'] && sourceFields['ExpressionAudio']) {
|
||||
sourceFields['SentenceAudio'] = sourceFields['ExpressionAudio'];
|
||||
}
|
||||
if (!sourceFields['ExpressionAudio'] && sourceFields['SentenceAudio']) {
|
||||
sourceFields['ExpressionAudio'] = sourceFields['SentenceAudio'];
|
||||
}
|
||||
|
||||
if (
|
||||
config.fields?.sentence &&
|
||||
!sourceFields[config.fields?.sentence] &&
|
||||
this.deps.getCurrentSubtitleText()
|
||||
) {
|
||||
const deleteFields = this.deps.extractFields(deleteNoteInfo.fields);
|
||||
sourceFields[config.fields?.sentence] = this.deps.processSentence(
|
||||
this.deps.getCurrentSubtitleText()!,
|
||||
deleteFields,
|
||||
);
|
||||
}
|
||||
|
||||
if (includeGeneratedMedia) {
|
||||
const media = await this.deps.generateMediaForMerge();
|
||||
if (media.audioField && media.audioValue && !sourceFields[media.audioField]) {
|
||||
sourceFields[media.audioField] = media.audioValue;
|
||||
}
|
||||
if (media.imageField && media.imageValue && !sourceFields[media.imageField]) {
|
||||
sourceFields[media.imageField] = media.imageValue;
|
||||
}
|
||||
if (
|
||||
config.fields?.miscInfo &&
|
||||
media.miscInfoValue &&
|
||||
!sourceFields[config.fields?.miscInfo]
|
||||
) {
|
||||
sourceFields[config.fields?.miscInfo] = media.miscInfoValue;
|
||||
}
|
||||
}
|
||||
|
||||
const mergedFields: Record<string, string> = {};
|
||||
for (const preferredFieldName of groupableFields) {
|
||||
const keepFieldName = resolvedKeepFieldByPreferred.get(preferredFieldName);
|
||||
if (!keepFieldName) continue;
|
||||
|
||||
const keepFieldNormalized = keepFieldName.toLowerCase();
|
||||
if (
|
||||
keepFieldNormalized === 'expression' ||
|
||||
keepFieldNormalized === 'expressionfurigana' ||
|
||||
keepFieldNormalized === 'expressionreading' ||
|
||||
keepFieldNormalized === 'expressionaudio'
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existingValue = keepNoteInfo.fields[keepFieldName]?.value || '';
|
||||
const newValue = sourceFields[preferredFieldName] || '';
|
||||
const isStrictField = this.shouldUseStrictSpanGrouping(keepFieldName);
|
||||
if (!existingValue.trim() && !newValue.trim()) continue;
|
||||
|
||||
if (isStrictField) {
|
||||
mergedFields[keepFieldName] = this.applyFieldGrouping(
|
||||
existingValue,
|
||||
newValue,
|
||||
keepNoteId,
|
||||
deleteNoteId,
|
||||
keepFieldName,
|
||||
);
|
||||
} else if (existingValue.trim() && newValue.trim()) {
|
||||
mergedFields[keepFieldName] = this.applyFieldGrouping(
|
||||
existingValue,
|
||||
newValue,
|
||||
keepNoteId,
|
||||
deleteNoteId,
|
||||
keepFieldName,
|
||||
);
|
||||
} else {
|
||||
if (!newValue.trim()) continue;
|
||||
mergedFields[keepFieldName] = newValue;
|
||||
}
|
||||
}
|
||||
|
||||
const sentenceCardConfig = this.deps.getEffectiveSentenceCardConfig();
|
||||
const resolvedSentenceAudioField = this.deps.resolveFieldName(
|
||||
keepFieldNames,
|
||||
sentenceCardConfig.audioField || 'SentenceAudio',
|
||||
);
|
||||
const resolvedExpressionAudioField = this.deps.resolveFieldName(
|
||||
keepFieldNames,
|
||||
config.fields?.audio || 'ExpressionAudio',
|
||||
);
|
||||
if (
|
||||
resolvedSentenceAudioField &&
|
||||
resolvedExpressionAudioField &&
|
||||
resolvedExpressionAudioField !== resolvedSentenceAudioField
|
||||
) {
|
||||
const mergedSentenceAudioValue =
|
||||
mergedFields[resolvedSentenceAudioField] ||
|
||||
keepNoteInfo.fields[resolvedSentenceAudioField]?.value ||
|
||||
'';
|
||||
if (mergedSentenceAudioValue.trim()) {
|
||||
mergedFields[resolvedExpressionAudioField] = mergedSentenceAudioValue;
|
||||
}
|
||||
}
|
||||
|
||||
return mergedFields;
|
||||
}
|
||||
|
||||
private getResolvedFieldValue(
|
||||
noteInfo: FieldGroupingMergeNoteInfo,
|
||||
preferredFieldName?: string,
|
||||
): string {
|
||||
if (!preferredFieldName) return '';
|
||||
const resolved = this.deps.resolveNoteFieldName(noteInfo, preferredFieldName);
|
||||
if (!resolved) return '';
|
||||
return noteInfo.fields[resolved]?.value || '';
|
||||
}
|
||||
|
||||
private extractUngroupedValue(value: string): string {
|
||||
const groupedSpanRegex = /<span\s+data-group-id="[^"]*">[\s\S]*?<\/span>/gi;
|
||||
const ungrouped = value.replace(groupedSpanRegex, '').trim();
|
||||
if (ungrouped) return ungrouped;
|
||||
return value.trim();
|
||||
}
|
||||
|
||||
private extractLastSoundTag(value: string): string {
|
||||
const matches = value.match(/\[sound:[^\]]+\]/g);
|
||||
if (!matches || matches.length === 0) return '';
|
||||
return matches[matches.length - 1]!;
|
||||
}
|
||||
|
||||
private extractLastImageTag(value: string): string {
|
||||
const matches = value.match(/<img\b[^>]*>/gi);
|
||||
if (!matches || matches.length === 0) return '';
|
||||
return matches[matches.length - 1]!;
|
||||
}
|
||||
|
||||
private extractImageTags(value: string): string[] {
|
||||
const matches = value.match(/<img\b[^>]*>/gi);
|
||||
return matches || [];
|
||||
}
|
||||
|
||||
private ensureImageGroupId(imageTag: string, groupId: number): string {
|
||||
if (!imageTag) return '';
|
||||
if (/data-group-id=/i.test(imageTag)) {
|
||||
return imageTag.replace(/data-group-id="[^"]*"/i, `data-group-id="${groupId}"`);
|
||||
}
|
||||
return imageTag.replace(/<img\b/i, `<img data-group-id="${groupId}"`);
|
||||
}
|
||||
|
||||
private extractSpanEntries(
|
||||
value: string,
|
||||
fieldName: string,
|
||||
): { groupId: number; content: string }[] {
|
||||
const entries: { groupId: number; content: string }[] = [];
|
||||
const malformedIdRegex = /<span\s+[^>]*data-group-id="([^"]*)"[^>]*>/gi;
|
||||
let malformed;
|
||||
while ((malformed = malformedIdRegex.exec(value)) !== null) {
|
||||
const rawId = malformed[1];
|
||||
const groupId = Number(rawId);
|
||||
if (!Number.isFinite(groupId) || groupId <= 0) {
|
||||
this.deps.warnFieldParseOnce(fieldName, 'invalid-group-id', rawId);
|
||||
}
|
||||
}
|
||||
|
||||
const spanRegex = /<span\s+data-group-id="(\d+)"[^>]*>([\s\S]*?)<\/span>/gi;
|
||||
let match;
|
||||
while ((match = spanRegex.exec(value)) !== null) {
|
||||
const groupId = Number(match[1]);
|
||||
if (!Number.isFinite(groupId) || groupId <= 0) continue;
|
||||
const content = this.normalizeStrictGroupedValue(match[2] || '', fieldName);
|
||||
if (!content) {
|
||||
this.deps.warnFieldParseOnce(fieldName, 'empty-group-content');
|
||||
continue;
|
||||
}
|
||||
entries.push({ groupId, content });
|
||||
}
|
||||
if (entries.length === 0 && /<span\b/i.test(value)) {
|
||||
this.deps.warnFieldParseOnce(fieldName, 'no-usable-span-entries');
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
private parseStrictEntries(
|
||||
value: string,
|
||||
fallbackGroupId: number,
|
||||
fieldName: string,
|
||||
): { groupId: number; content: string }[] {
|
||||
const entries = this.extractSpanEntries(value, fieldName);
|
||||
if (entries.length === 0) {
|
||||
const ungrouped = this.normalizeStrictGroupedValue(
|
||||
this.extractUngroupedValue(value),
|
||||
fieldName,
|
||||
);
|
||||
if (ungrouped) {
|
||||
entries.push({ groupId: fallbackGroupId, content: ungrouped });
|
||||
}
|
||||
}
|
||||
|
||||
const unique: { groupId: number; content: string }[] = [];
|
||||
const seen = new Set<string>();
|
||||
for (const entry of entries) {
|
||||
const key = `${entry.groupId}::${entry.content}`;
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
unique.push(entry);
|
||||
}
|
||||
return unique;
|
||||
}
|
||||
|
||||
private parsePictureEntries(
|
||||
value: string,
|
||||
fallbackGroupId: number,
|
||||
): { groupId: number; tag: string }[] {
|
||||
const tags = this.extractImageTags(value);
|
||||
const result: { groupId: number; tag: string }[] = [];
|
||||
for (const tag of tags) {
|
||||
const idMatch = tag.match(/data-group-id="(\d+)"/i);
|
||||
let groupId = fallbackGroupId;
|
||||
if (idMatch) {
|
||||
const parsed = Number(idMatch[1]);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
this.deps.warnFieldParseOnce('Picture', 'invalid-group-id', idMatch[1]);
|
||||
} else {
|
||||
groupId = parsed;
|
||||
}
|
||||
}
|
||||
const normalizedTag = this.ensureImageGroupId(tag, groupId);
|
||||
if (!normalizedTag) {
|
||||
this.deps.warnFieldParseOnce('Picture', 'empty-image-tag');
|
||||
continue;
|
||||
}
|
||||
result.push({ groupId, tag: normalizedTag });
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private normalizeStrictGroupedValue(value: string, fieldName: string): string {
|
||||
const ungrouped = this.extractUngroupedValue(value);
|
||||
if (!ungrouped) return '';
|
||||
|
||||
const normalizedField = fieldName.toLowerCase();
|
||||
if (normalizedField === 'sentenceaudio' || normalizedField === 'expressionaudio') {
|
||||
const lastSoundTag = this.extractLastSoundTag(ungrouped);
|
||||
if (!lastSoundTag) {
|
||||
this.deps.warnFieldParseOnce(fieldName, 'missing-sound-tag');
|
||||
}
|
||||
return lastSoundTag || ungrouped;
|
||||
}
|
||||
|
||||
if (normalizedField === 'picture') {
|
||||
const lastImageTag = this.extractLastImageTag(ungrouped);
|
||||
if (!lastImageTag) {
|
||||
this.deps.warnFieldParseOnce(fieldName, 'missing-image-tag');
|
||||
}
|
||||
return lastImageTag || ungrouped;
|
||||
}
|
||||
|
||||
return ungrouped;
|
||||
}
|
||||
|
||||
private getStrictSpanGroupingFields(): Set<string> {
|
||||
const strictFields = new Set(this.strictGroupingFieldDefaults);
|
||||
const sentenceCardConfig = this.deps.getEffectiveSentenceCardConfig();
|
||||
strictFields.add((sentenceCardConfig.sentenceField || 'sentence').toLowerCase());
|
||||
strictFields.add((sentenceCardConfig.audioField || 'sentenceaudio').toLowerCase());
|
||||
const config = this.deps.getConfig();
|
||||
if (config.fields?.image) strictFields.add(config.fields.image.toLowerCase());
|
||||
if (config.fields?.miscInfo) strictFields.add(config.fields.miscInfo.toLowerCase());
|
||||
return strictFields;
|
||||
}
|
||||
|
||||
private shouldUseStrictSpanGrouping(fieldName: string): boolean {
|
||||
const normalized = fieldName.toLowerCase();
|
||||
return this.getStrictSpanGroupingFields().has(normalized);
|
||||
}
|
||||
|
||||
private applyFieldGrouping(
|
||||
existingValue: string,
|
||||
newValue: string,
|
||||
keepGroupId: number,
|
||||
sourceGroupId: number,
|
||||
fieldName: string,
|
||||
): string {
|
||||
if (this.shouldUseStrictSpanGrouping(fieldName)) {
|
||||
if (fieldName.toLowerCase() === 'picture') {
|
||||
const keepEntries = this.parsePictureEntries(existingValue, keepGroupId);
|
||||
const sourceEntries = this.parsePictureEntries(newValue, sourceGroupId);
|
||||
if (keepEntries.length === 0 && sourceEntries.length === 0) {
|
||||
return existingValue || newValue;
|
||||
}
|
||||
const mergedTags = keepEntries.map((entry) =>
|
||||
this.ensureImageGroupId(entry.tag, entry.groupId),
|
||||
);
|
||||
const seen = new Set(mergedTags);
|
||||
for (const entry of sourceEntries) {
|
||||
const normalized = this.ensureImageGroupId(entry.tag, entry.groupId);
|
||||
if (seen.has(normalized)) continue;
|
||||
seen.add(normalized);
|
||||
mergedTags.push(normalized);
|
||||
}
|
||||
return mergedTags.join('');
|
||||
}
|
||||
|
||||
const keepEntries = this.parseStrictEntries(existingValue, keepGroupId, fieldName);
|
||||
const sourceEntries = this.parseStrictEntries(newValue, sourceGroupId, fieldName);
|
||||
if (keepEntries.length === 0 && sourceEntries.length === 0) {
|
||||
return existingValue || newValue;
|
||||
}
|
||||
if (sourceEntries.length === 0) {
|
||||
return keepEntries
|
||||
.map((entry) => `<span data-group-id="${entry.groupId}">${entry.content}</span>`)
|
||||
.join('');
|
||||
}
|
||||
const merged = [...keepEntries];
|
||||
const seen = new Set(keepEntries.map((entry) => `${entry.groupId}::${entry.content}`));
|
||||
for (const entry of sourceEntries) {
|
||||
const key = `${entry.groupId}::${entry.content}`;
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
merged.push(entry);
|
||||
}
|
||||
if (merged.length === 0) return existingValue;
|
||||
return merged
|
||||
.map((entry) => `<span data-group-id="${entry.groupId}">${entry.content}</span>`)
|
||||
.join('');
|
||||
}
|
||||
|
||||
if (!existingValue.trim()) return newValue;
|
||||
if (!newValue.trim()) return existingValue;
|
||||
|
||||
const hasGroups = /data-group-id/.test(existingValue);
|
||||
|
||||
if (!hasGroups) {
|
||||
return `<span data-group-id="${keepGroupId}">${existingValue}</span>\n` + newValue;
|
||||
}
|
||||
|
||||
const groupedSpanRegex = /<span\s+data-group-id="[^"]*">[\s\S]*?<\/span>/g;
|
||||
let lastEnd = 0;
|
||||
let result = '';
|
||||
let match;
|
||||
|
||||
while ((match = groupedSpanRegex.exec(existingValue)) !== null) {
|
||||
const before = existingValue.slice(lastEnd, match.index);
|
||||
if (before.trim()) {
|
||||
result += `<span data-group-id="${keepGroupId}">${before.trim()}</span>\n`;
|
||||
}
|
||||
result += match[0] + '\n';
|
||||
lastEnd = match.index + match[0].length;
|
||||
}
|
||||
|
||||
const after = existingValue.slice(lastEnd);
|
||||
if (after.trim()) {
|
||||
result += `\n<span data-group-id="${keepGroupId}">${after.trim()}</span>`;
|
||||
}
|
||||
|
||||
return result + '\n' + newValue;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user