diff --git a/config.example.jsonc b/config.example.jsonc index cf9f1ae..b4cebc7 100644 --- a/config.example.jsonc +++ b/config.example.jsonc @@ -340,6 +340,7 @@ "animatedFps": 10, // Animated fps setting. "animatedMaxWidth": 640, // Animated max width setting. "animatedCrf": 35, // Animated crf setting. + "syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false "audioPadding": 0.5, // Audio padding setting. "fallbackDuration": 3, // Fallback duration setting. "maxMediaDuration": 30 // Max media duration setting. diff --git a/docs-site/public/config.example.jsonc b/docs-site/public/config.example.jsonc index cf9f1ae..b4cebc7 100644 --- a/docs-site/public/config.example.jsonc +++ b/docs-site/public/config.example.jsonc @@ -340,6 +340,7 @@ "animatedFps": 10, // Animated fps setting. "animatedMaxWidth": 640, // Animated max width setting. "animatedCrf": 35, // Animated crf setting. + "syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false "audioPadding": 0.5, // Audio padding setting. "fallbackDuration": 3, // Fallback duration setting. "maxMediaDuration": 30 // Max media duration setting. diff --git a/src/anki-integration.ts b/src/anki-integration.ts index ad93798..2f867c7 100644 --- a/src/anki-integration.ts +++ b/src/anki-integration.ts @@ -54,6 +54,7 @@ import { FieldGroupingService } from './anki-integration/field-grouping'; import { FieldGroupingMergeCollaborator } from './anki-integration/field-grouping-merge'; import { NoteUpdateWorkflow } from './anki-integration/note-update-workflow'; import { FieldGroupingWorkflow } from './anki-integration/field-grouping-workflow'; +import { resolveAnimatedImageLeadInSeconds } from './anki-integration/animated-image-sync'; import { AnkiIntegrationRuntime, normalizeAnkiIntegrationConfig } from './anki-integration/runtime'; const log = createLogger('anki').child('integration'); @@ -190,7 +191,7 @@ export class AnkiIntegration { this.resolveNoteFieldName(noteInfo, preferredName), extractFields: (fields) => this.extractFields(fields), processSentence: (mpvSentence, noteFields) => this.processSentence(mpvSentence, noteFields), - generateMediaForMerge: () => this.generateMediaForMerge(), + generateMediaForMerge: (noteInfo) => this.generateMediaForMerge(noteInfo), warnFieldParseOnce: (fieldName, reason, detail) => this.warnFieldParseOnce(fieldName, reason, detail), }); @@ -286,6 +287,7 @@ export class AnkiIntegration { storeMediaFile: (filename, data) => this.client.storeMediaFile(filename, data), findNotes: async (query, options) => (await this.client.findNotes(query, options)) as number[], + retrieveMediaFile: (filename) => this.client.retrieveMediaFile(filename), }, mediaGenerator: { generateAudio: (videoPath, startTime, endTime, audioPadding, audioStreamIndex) => @@ -319,6 +321,7 @@ export class AnkiIntegration { this.resolveConfiguredFieldName(noteInfo, ...preferredNames), resolveNoteFieldName: (noteInfo, preferredName) => this.resolveNoteFieldName(noteInfo, preferredName), + getAnimatedImageLeadInSeconds: (noteInfo) => this.getAnimatedImageLeadInSeconds(noteInfo), extractFields: (fields) => this.extractFields(fields), processSentence: (mpvSentence, noteFields) => this.processSentence(mpvSentence, noteFields), setCardTypeFields: (updatedFields, availableFieldNames, cardKind) => @@ -407,12 +410,13 @@ export class AnkiIntegration { this.resolveConfiguredFieldName(noteInfo, ...preferredNames), getResolvedSentenceAudioFieldName: (noteInfo) => this.getResolvedSentenceAudioFieldName(noteInfo), + getAnimatedImageLeadInSeconds: (noteInfo) => this.getAnimatedImageLeadInSeconds(noteInfo), mergeFieldValue: (existing, newValue, overwrite) => this.mergeFieldValue(existing, newValue, overwrite), generateAudioFilename: () => this.generateAudioFilename(), generateAudio: () => this.generateAudio(), generateImageFilename: () => this.generateImageFilename(), - generateImage: () => this.generateImage(), + generateImage: (animatedLeadInSeconds) => this.generateImage(animatedLeadInSeconds), formatMiscInfoPattern: (fallbackFilename, startTimeSeconds) => this.formatMiscInfoPattern(fallbackFilename, startTimeSeconds), addConfiguredTagsToNote: (noteId) => this.addConfiguredTagsToNote(noteId), @@ -637,7 +641,7 @@ export class AnkiIntegration { ); } - private async generateImage(): Promise { + private async generateImage(animatedLeadInSeconds = 0): Promise { if (!this.mpvClient || !this.mpvClient.currentVideoPath) { return null; } @@ -665,6 +669,7 @@ export class AnkiIntegration { maxWidth: this.config.media?.animatedMaxWidth, maxHeight: this.config.media?.animatedMaxHeight, crf: this.config.media?.animatedCrf, + leadingStillDuration: animatedLeadInSeconds, }, ); } else { @@ -1020,7 +1025,18 @@ export class AnkiIntegration { return getPreferredWordValueFromExtractedFields(fields, this.config); } - private async generateMediaForMerge(): Promise<{ + private async getAnimatedImageLeadInSeconds(noteInfo: NoteInfo): Promise { + return resolveAnimatedImageLeadInSeconds({ + config: this.config, + noteInfo, + resolveConfiguredFieldName: (candidateNoteInfo, ...preferredNames) => + this.resolveConfiguredFieldName(candidateNoteInfo, ...preferredNames), + retrieveMediaFileBase64: (filename) => this.client.retrieveMediaFile(filename), + logWarn: (message, ...args) => log.warn(message, ...args), + }); + } + + private async generateMediaForMerge(noteInfo?: NoteInfo): Promise<{ audioField?: string; audioValue?: string; imageField?: string; @@ -1057,8 +1073,11 @@ export class AnkiIntegration { if (this.config.media?.generateImage && this.mpvClient?.currentVideoPath) { try { + const animatedLeadInSeconds = noteInfo + ? await this.getAnimatedImageLeadInSeconds(noteInfo) + : 0; const imageFilename = this.generateImageFilename(); - const imageBuffer = await this.generateImage(); + const imageBuffer = await this.generateImage(animatedLeadInSeconds); if (imageBuffer) { await this.client.storeMediaFile(imageFilename, imageBuffer); result.imageField = this.config.fields?.image || DEFAULT_ANKI_CONNECT_CONFIG.fields.image; diff --git a/src/anki-integration/animated-image-sync.test.ts b/src/anki-integration/animated-image-sync.test.ts new file mode 100644 index 0000000..c0d25cf --- /dev/null +++ b/src/anki-integration/animated-image-sync.test.ts @@ -0,0 +1,82 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; + +import { resolveAnimatedImageLeadInSeconds, extractSoundFilenames } from './animated-image-sync'; + +test('extractSoundFilenames returns ordered sound filenames from an Anki field value', () => { + assert.deepEqual( + extractSoundFilenames('before [sound:word.mp3] middle [sound:alt.ogg] after'), + ['word.mp3', 'alt.ogg'], + ); +}); + +test('resolveAnimatedImageLeadInSeconds sums configured word audio durations for animated images', async () => { + const leadInSeconds = await resolveAnimatedImageLeadInSeconds({ + config: { + fields: { + audio: 'ExpressionAudio', + }, + media: { + imageType: 'avif', + syncAnimatedImageToWordAudio: true, + }, + }, + noteInfo: { + noteId: 42, + fields: { + ExpressionAudio: { + value: '[sound:word.mp3][sound:alt.ogg]', + }, + }, + }, + resolveConfiguredFieldName: (noteInfo, ...preferredNames) => { + for (const preferredName of preferredNames) { + if (!preferredName) continue; + const resolved = Object.keys(noteInfo.fields).find( + (fieldName) => fieldName.toLowerCase() === preferredName.toLowerCase(), + ); + if (resolved) return resolved; + } + return null; + }, + retrieveMediaFileBase64: async (filename) => + filename === 'word.mp3' ? 'd29yZA==' : filename === 'alt.ogg' ? 'YWx0' : '', + probeAudioDurationSeconds: async (_buffer, filename) => + filename === 'word.mp3' ? 0.41 : filename === 'alt.ogg' ? 0.84 : null, + logWarn: () => undefined, + }); + + assert.equal(leadInSeconds, 1.25); +}); + +test('resolveAnimatedImageLeadInSeconds falls back to zero when sync is disabled', async () => { + const leadInSeconds = await resolveAnimatedImageLeadInSeconds({ + config: { + fields: { + audio: 'ExpressionAudio', + }, + media: { + imageType: 'avif', + syncAnimatedImageToWordAudio: false, + }, + }, + noteInfo: { + noteId: 42, + fields: { + ExpressionAudio: { + value: '[sound:word.mp3]', + }, + }, + }, + resolveConfiguredFieldName: () => 'ExpressionAudio', + retrieveMediaFileBase64: async () => { + throw new Error('should not be called'); + }, + probeAudioDurationSeconds: async () => { + throw new Error('should not be called'); + }, + logWarn: () => undefined, + }); + + assert.equal(leadInSeconds, 0); +}); diff --git a/src/anki-integration/animated-image-sync.ts b/src/anki-integration/animated-image-sync.ts new file mode 100644 index 0000000..9a53df9 --- /dev/null +++ b/src/anki-integration/animated-image-sync.ts @@ -0,0 +1,133 @@ +import { execFile as nodeExecFile } from 'node:child_process'; +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; + +import { DEFAULT_ANKI_CONNECT_CONFIG } from '../config'; +import type { AnkiConnectConfig } from '../types'; + +type NoteInfoLike = { + noteId: number; + fields: Record; +}; + +interface ResolveAnimatedImageLeadInSecondsArgs { + config: Pick; + noteInfo: TNoteInfo; + resolveConfiguredFieldName: ( + noteInfo: TNoteInfo, + ...preferredNames: (string | undefined)[] + ) => string | null; + retrieveMediaFileBase64: (filename: string) => Promise; + probeAudioDurationSeconds?: (buffer: Buffer, filename: string) => Promise; + logWarn?: (message: string, ...args: unknown[]) => void; +} + +interface ProbeAudioDurationDeps { + execFile?: typeof nodeExecFile; + mkdtempSync?: typeof fs.mkdtempSync; + writeFileSync?: typeof fs.writeFileSync; + rmSync?: typeof fs.rmSync; +} + +export function extractSoundFilenames(value: string): string[] { + const matches = value.matchAll(/\[sound:([^\]]+)\]/gi); + return Array.from(matches, (match) => match[1]?.trim() || '').filter((value) => value.length > 0); +} + +function shouldSyncAnimatedImageToWordAudio(config: Pick): boolean { + return ( + config.media?.imageType === 'avif' && config.media?.syncAnimatedImageToWordAudio !== false + ); +} + +export async function probeAudioDurationSeconds( + buffer: Buffer, + filename: string, + deps: ProbeAudioDurationDeps = {}, +): Promise { + const execFile = deps.execFile ?? nodeExecFile; + const mkdtempSync = deps.mkdtempSync ?? fs.mkdtempSync; + const writeFileSync = deps.writeFileSync ?? fs.writeFileSync; + const rmSync = deps.rmSync ?? fs.rmSync; + + const tempDir = mkdtempSync(path.join(os.tmpdir(), 'subminer-audio-probe-')); + const ext = path.extname(filename) || '.bin'; + const tempPath = path.join(tempDir, `probe${ext}`); + writeFileSync(tempPath, buffer); + + return new Promise((resolve) => { + execFile( + 'ffprobe', + [ + '-v', + 'error', + '-show_entries', + 'format=duration', + '-of', + 'default=noprint_wrappers=1:nokey=1', + tempPath, + ], + (error, stdout) => { + try { + if (error) { + resolve(null); + return; + } + + const durationSeconds = Number.parseFloat((stdout || '').trim()); + resolve(Number.isFinite(durationSeconds) && durationSeconds > 0 ? durationSeconds : null); + } finally { + rmSync(tempDir, { recursive: true, force: true }); + } + }, + ); + }); +} + +export async function resolveAnimatedImageLeadInSeconds({ + config, + noteInfo, + resolveConfiguredFieldName, + retrieveMediaFileBase64, + probeAudioDurationSeconds: probeDuration = probeAudioDurationSeconds, + logWarn, +}: ResolveAnimatedImageLeadInSecondsArgs): Promise { + if (!shouldSyncAnimatedImageToWordAudio(config)) { + return 0; + } + + const wordAudioFieldName = resolveConfiguredFieldName( + noteInfo, + config.fields?.audio, + DEFAULT_ANKI_CONNECT_CONFIG.fields.audio, + ); + if (!wordAudioFieldName) { + return 0; + } + + const wordAudioValue = noteInfo.fields[wordAudioFieldName]?.value || ''; + const filenames = extractSoundFilenames(wordAudioValue); + if (filenames.length === 0) { + return 0; + } + + let totalLeadInSeconds = 0; + for (const filename of filenames) { + const encoded = await retrieveMediaFileBase64(filename); + if (!encoded) { + logWarn?.('Animated image sync skipped: failed to retrieve word audio', filename); + return 0; + } + + const durationSeconds = await probeDuration(Buffer.from(encoded, 'base64'), filename); + if (!(typeof durationSeconds === 'number' && Number.isFinite(durationSeconds))) { + logWarn?.('Animated image sync skipped: failed to probe word audio duration', filename); + return 0; + } + + totalLeadInSeconds += durationSeconds; + } + + return totalLeadInSeconds; +} diff --git a/src/anki-integration/card-creation.ts b/src/anki-integration/card-creation.ts index f7f47cf..6820fe1 100644 --- a/src/anki-integration/card-creation.ts +++ b/src/anki-integration/card-creation.ts @@ -30,6 +30,7 @@ interface CardCreationClient { updateNoteFields(noteId: number, fields: Record): Promise; storeMediaFile(filename: string, data: Buffer): Promise; findNotes(query: string, options?: { maxRetries?: number }): Promise; + retrieveMediaFile(filename: string): Promise; } interface CardCreationMediaGenerator { @@ -60,6 +61,7 @@ interface CardCreationMediaGenerator { maxWidth?: number; maxHeight?: number; crf?: number; + leadingStillDuration?: number; }, ): Promise; } @@ -83,6 +85,7 @@ interface CardCreationDeps { ...preferredNames: (string | undefined)[] ) => string | null; resolveNoteFieldName: (noteInfo: CardCreationNoteInfo, preferredName?: string) => string | null; + getAnimatedImageLeadInSeconds: (noteInfo: CardCreationNoteInfo) => Promise; extractFields: (fields: Record) => Record; processSentence: (mpvSentence: string, noteFields: Record) => string; setCardTypeFields: ( @@ -258,11 +261,14 @@ export class CardCreationService { if (this.deps.getConfig().media?.generateImage) { try { + const animatedLeadInSeconds = + await this.deps.getAnimatedImageLeadInSeconds(noteInfo); const imageFilename = this.generateImageFilename(); const imageBuffer = await this.generateImageBuffer( mpvClient.currentVideoPath, rangeStart, rangeEnd, + animatedLeadInSeconds, ); if (imageBuffer) { @@ -414,11 +420,14 @@ export class CardCreationService { if (this.deps.getConfig().media?.generateImage) { try { + const animatedLeadInSeconds = + await this.deps.getAnimatedImageLeadInSeconds(noteInfo); const imageFilename = this.generateImageFilename(); const imageBuffer = await this.generateImageBuffer( mpvClient.currentVideoPath, startTime, endTime, + animatedLeadInSeconds, ); const imageField = this.deps.getConfig().fields?.image; @@ -679,6 +688,7 @@ export class CardCreationService { videoPath: string, startTime: number, endTime: number, + animatedLeadInSeconds = 0, ): Promise { const mpvClient = this.deps.getMpvClient(); if (!mpvClient) { @@ -707,6 +717,7 @@ export class CardCreationService { maxWidth: this.deps.getConfig().media?.animatedMaxWidth, maxHeight: this.deps.getConfig().media?.animatedMaxHeight, crf: this.deps.getConfig().media?.animatedCrf, + leadingStillDuration: animatedLeadInSeconds, }, ); } diff --git a/src/anki-integration/field-grouping-merge.ts b/src/anki-integration/field-grouping-merge.ts index 043f1e7..4384b49 100644 --- a/src/anki-integration/field-grouping-merge.ts +++ b/src/anki-integration/field-grouping-merge.ts @@ -28,7 +28,7 @@ interface FieldGroupingMergeDeps { ) => string | null; extractFields: (fields: Record) => Record; processSentence: (mpvSentence: string, noteFields: Record) => string; - generateMediaForMerge: () => Promise; + generateMediaForMerge: (noteInfo: FieldGroupingMergeNoteInfo) => Promise; warnFieldParseOnce: (fieldName: string, reason: string, detail?: string) => void; } @@ -132,7 +132,7 @@ export class FieldGroupingMergeCollaborator { } if (includeGeneratedMedia) { - const media = await this.deps.generateMediaForMerge(); + const media = await this.deps.generateMediaForMerge(keepNoteInfo); if (media.audioField && media.audioValue && !sourceFields[media.audioField]) { sourceFields[media.audioField] = media.audioValue; } diff --git a/src/anki-integration/note-update-workflow.test.ts b/src/anki-integration/note-update-workflow.test.ts index dc69c63..49e259f 100644 --- a/src/anki-integration/note-update-workflow.test.ts +++ b/src/anki-integration/note-update-workflow.test.ts @@ -62,6 +62,7 @@ function createWorkflowHarness() { return names.find((name) => name.toLowerCase() === preferred.toLowerCase()) ?? null; }, getResolvedSentenceAudioFieldName: () => null, + getAnimatedImageLeadInSeconds: async () => 0, mergeFieldValue: (_existing: string, next: string, _overwrite: boolean) => next, generateAudioFilename: () => 'audio_1.mp3', generateAudio: async () => null, @@ -163,3 +164,42 @@ test('NoteUpdateWorkflow updates note before auto field grouping merge', async ( assert.deepEqual(callOrder, ['update', 'auto']); assert.equal(harness.updates.length, 1); }); + +test('NoteUpdateWorkflow passes animated image lead-in when syncing avif to word audio', async () => { + const harness = createWorkflowHarness(); + let receivedLeadInSeconds = 0; + + harness.deps.client.notesInfo = async () => + [ + { + noteId: 42, + fields: { + Expression: { value: 'taberu' }, + ExpressionAudio: { value: '[sound:word.mp3]' }, + Sentence: { value: '' }, + Picture: { value: '' }, + }, + }, + ] satisfies NoteUpdateWorkflowNoteInfo[]; + harness.deps.getConfig = () => ({ + fields: { + sentence: 'Sentence', + image: 'Picture', + }, + media: { + generateImage: true, + imageType: 'avif', + syncAnimatedImageToWordAudio: true, + }, + behavior: {}, + }); + harness.deps.getAnimatedImageLeadInSeconds = async () => 1.25; + harness.deps.generateImage = async (leadInSeconds?: number) => { + receivedLeadInSeconds = leadInSeconds ?? 0; + return Buffer.from('image'); + }; + + await harness.workflow.execute(42); + + assert.equal(receivedLeadInSeconds, 1.25); +}); diff --git a/src/anki-integration/note-update-workflow.ts b/src/anki-integration/note-update-workflow.ts index ca6ceb0..26613ff 100644 --- a/src/anki-integration/note-update-workflow.ts +++ b/src/anki-integration/note-update-workflow.ts @@ -22,6 +22,8 @@ export interface NoteUpdateWorkflowDeps { media?: { generateAudio?: boolean; generateImage?: boolean; + imageType?: 'static' | 'avif'; + syncAnimatedImageToWordAudio?: boolean; }; behavior?: { overwriteAudio?: boolean; @@ -60,11 +62,12 @@ export interface NoteUpdateWorkflowDeps { ...preferredNames: (string | undefined)[] ) => string | null; getResolvedSentenceAudioFieldName: (noteInfo: NoteUpdateWorkflowNoteInfo) => string | null; + getAnimatedImageLeadInSeconds: (noteInfo: NoteUpdateWorkflowNoteInfo) => Promise; mergeFieldValue: (existing: string, newValue: string, overwrite: boolean) => string; generateAudioFilename: () => string; generateAudio: () => Promise; generateImageFilename: () => string; - generateImage: () => Promise; + generateImage: (animatedLeadInSeconds?: number) => Promise; formatMiscInfoPattern: (fallbackFilename: string, startTimeSeconds?: number) => string; addConfiguredTagsToNote: (noteId: number) => Promise; showNotification: (noteId: number, label: string | number) => Promise; @@ -153,8 +156,9 @@ export class NoteUpdateWorkflow { if (config.media?.generateImage) { try { + const animatedLeadInSeconds = await this.deps.getAnimatedImageLeadInSeconds(noteInfo); const imageFilename = this.deps.generateImageFilename(); - const imageBuffer = await this.deps.generateImage(); + const imageBuffer = await this.deps.generateImage(animatedLeadInSeconds); if (imageBuffer) { await this.deps.client.storeMediaFile(imageFilename, imageBuffer); diff --git a/src/anki-integration/runtime.test.ts b/src/anki-integration/runtime.test.ts index dbde606..20bbbd2 100644 --- a/src/anki-integration/runtime.test.ts +++ b/src/anki-integration/runtime.test.ts @@ -59,6 +59,10 @@ test('AnkiIntegrationRuntime normalizes url and proxy defaults', () => { normalized.media?.fallbackDuration, DEFAULT_ANKI_CONNECT_CONFIG.media.fallbackDuration, ); + assert.equal( + normalized.media?.syncAnimatedImageToWordAudio, + DEFAULT_ANKI_CONNECT_CONFIG.media.syncAnimatedImageToWordAudio, + ); }); test('AnkiIntegrationRuntime starts proxy transport when proxy mode is enabled', () => { diff --git a/src/config/definitions/defaults-integrations.ts b/src/config/definitions/defaults-integrations.ts index a10e27b..dfd58d3 100644 --- a/src/config/definitions/defaults-integrations.ts +++ b/src/config/definitions/defaults-integrations.ts @@ -47,6 +47,7 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick< animatedMaxWidth: 640, animatedMaxHeight: undefined, animatedCrf: 35, + syncAnimatedImageToWordAudio: true, audioPadding: 0.5, fallbackDuration: 3.0, maxMediaDuration: 30, diff --git a/src/config/definitions/options-integrations.ts b/src/config/definitions/options-integrations.ts index 275ffd5..f0470f4 100644 --- a/src/config/definitions/options-integrations.ts +++ b/src/config/definitions/options-integrations.ts @@ -82,6 +82,13 @@ export function buildIntegrationConfigOptionRegistry( description: 'Automatically update newly added cards.', runtime: runtimeOptionById.get('anki.autoUpdateNewCards'), }, + { + path: 'ankiConnect.media.syncAnimatedImageToWordAudio', + kind: 'boolean', + defaultValue: defaultConfig.ankiConnect.media.syncAnimatedImageToWordAudio, + description: + 'For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio.', + }, { path: 'ankiConnect.knownWords.matchMode', kind: 'enum', diff --git a/src/config/resolve/anki-connect.test.ts b/src/config/resolve/anki-connect.test.ts index 490cefc..0c0a944 100644 --- a/src/config/resolve/anki-connect.test.ts +++ b/src/config/resolve/anki-connect.test.ts @@ -121,6 +121,22 @@ test('accepts configured ankiConnect.fields.word override', () => { ); }); +test('accepts ankiConnect.media.syncAnimatedImageToWordAudio override', () => { + const { context, warnings } = makeContext({ + media: { + syncAnimatedImageToWordAudio: false, + }, + }); + + applyAnkiConnectResolution(context); + + assert.equal(context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio, false); + assert.equal( + warnings.some((warning) => warning.path === 'ankiConnect.media.syncAnimatedImageToWordAudio'), + false, + ); +}); + test('maps legacy ankiConnect.wordField to modern ankiConnect.fields.word', () => { const { context, warnings } = makeContext({ wordField: 'TargetWordLegacy', diff --git a/src/config/resolve/anki-connect.ts b/src/config/resolve/anki-connect.ts index 540d875..b306ab6 100644 --- a/src/config/resolve/anki-connect.ts +++ b/src/config/resolve/anki-connect.ts @@ -31,6 +31,7 @@ export function applyAnkiConnectResolution(context: ResolveContext): void { 'animatedMaxWidth', 'animatedMaxHeight', 'animatedCrf', + 'syncAnimatedImageToWordAudio', 'audioPadding', 'fallbackDuration', 'maxMediaDuration', @@ -536,6 +537,17 @@ export function applyAnkiConnectResolution(context: ResolveContext): void { 'Expected integer between 0 and 63.', ); } + if (!hasOwn(media, 'syncAnimatedImageToWordAudio')) { + mapLegacy( + 'syncAnimatedImageToWordAudio', + asBoolean, + (value) => { + context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio = value; + }, + context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio, + 'Expected boolean.', + ); + } if (!hasOwn(media, 'audioPadding')) { mapLegacy( 'audioPadding', diff --git a/src/core/services/stats-server.ts b/src/core/services/stats-server.ts index c029907..2303bc9 100644 --- a/src/core/services/stats-server.ts +++ b/src/core/services/stats-server.ts @@ -12,6 +12,12 @@ import { getConfiguredWordFieldName, getPreferredNoteFieldValue, } from '../../anki-field-config.js'; +import { resolveAnimatedImageLeadInSeconds } from '../../anki-integration/animated-image-sync.js'; + +type StatsServerNoteInfo = { + noteId: number; + fields: Record; +}; function parseIntQuery(raw: string | undefined, fallback: number, maxLimit?: number): number { if (raw === undefined) return fallback; @@ -40,6 +46,20 @@ function parseEventTypesQuery(raw: string | undefined): number[] | undefined { return parsed.length > 0 ? parsed : undefined; } +function resolveStatsNoteFieldName( + noteInfo: StatsServerNoteInfo, + ...preferredNames: (string | undefined)[] +): string | null { + for (const preferredName of preferredNames) { + if (!preferredName) continue; + const resolved = Object.keys(noteInfo.fields).find( + (fieldName) => fieldName.toLowerCase() === preferredName.toLowerCase(), + ); + if (resolved) return resolved; + } + return null; +} + /** Load known words cache from disk into a Set. Returns null if unavailable. */ function loadKnownWordsSet(cachePath: string | undefined): Set | null { if (!cachePath || !existsSync(cachePath)) return null; @@ -621,36 +641,41 @@ export function createStatsApp( const generateAudio = ankiConfig.media?.generateAudio !== false; const generateImage = ankiConfig.media?.generateImage !== false && mode !== 'audio'; const imageType = ankiConfig.media?.imageType ?? 'static'; + const syncAnimatedImageToWordAudio = + imageType === 'avif' && ankiConfig.media?.syncAnimatedImageToWordAudio !== false; const audioPromise = generateAudio ? mediaGen.generateAudio(sourcePath, startSec, clampedEndSec, audioPadding) : Promise.resolve(null); - let imagePromise: Promise; - if (!generateImage) { - imagePromise = Promise.resolve(null); - } else if (imageType === 'avif') { - imagePromise = mediaGen.generateAnimatedImage( - sourcePath, - startSec, - clampedEndSec, - audioPadding, - { + const createImagePromise = (animatedLeadInSeconds = 0): Promise => { + if (!generateImage) { + return Promise.resolve(null); + } + + if (imageType === 'avif') { + return mediaGen.generateAnimatedImage(sourcePath, startSec, clampedEndSec, audioPadding, { fps: ankiConfig.media?.animatedFps ?? 10, maxWidth: ankiConfig.media?.animatedMaxWidth ?? 640, maxHeight: ankiConfig.media?.animatedMaxHeight, crf: ankiConfig.media?.animatedCrf ?? 35, - }, - ); - } else { + leadingStillDuration: animatedLeadInSeconds, + }); + } + const midpointSec = (startSec + clampedEndSec) / 2; - imagePromise = mediaGen.generateScreenshot(sourcePath, midpointSec, { + return mediaGen.generateScreenshot(sourcePath, midpointSec, { format: ankiConfig.media?.imageFormat ?? 'jpg', quality: ankiConfig.media?.imageQuality ?? 92, maxWidth: ankiConfig.media?.imageMaxWidth, maxHeight: ankiConfig.media?.imageMaxHeight, }); - } + }; + + const imagePromise = + mode === 'word' && syncAnimatedImageToWordAudio + ? Promise.resolve(null) + : createImagePromise(); const errors: string[] = []; let noteId: number; @@ -677,12 +702,31 @@ export function createStatsApp( noteId = yomitanResult.value; const audioBuffer = audioResult.status === 'fulfilled' ? audioResult.value : null; - const imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null; if (audioResult.status === 'rejected') errors.push(`audio: ${(audioResult.reason as Error).message}`); if (imageResult.status === 'rejected') errors.push(`image: ${(imageResult.reason as Error).message}`); + let imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null; + if (syncAnimatedImageToWordAudio && generateImage) { + try { + const noteInfoResult = (await client.notesInfo([noteId])) as StatsServerNoteInfo[]; + const noteInfo = noteInfoResult[0] ?? null; + const animatedLeadInSeconds = noteInfo + ? await resolveAnimatedImageLeadInSeconds({ + config: ankiConfig, + noteInfo, + resolveConfiguredFieldName: (candidateNoteInfo, ...preferredNames) => + resolveStatsNoteFieldName(candidateNoteInfo, ...preferredNames), + retrieveMediaFileBase64: (filename) => client.retrieveMediaFile(filename), + }) + : 0; + imageBuffer = await createImagePromise(animatedLeadInSeconds); + } catch (err) { + errors.push(`image: ${(err as Error).message}`); + } + } + const mediaFields: Record = {}; const timestamp = Date.now(); const sentenceFieldName = ankiConfig.fields?.sentence ?? 'Sentence'; diff --git a/src/media-generator.test.ts b/src/media-generator.test.ts new file mode 100644 index 0000000..fb1cfab --- /dev/null +++ b/src/media-generator.test.ts @@ -0,0 +1,15 @@ +import assert from 'node:assert/strict'; +import test from 'node:test'; + +import { buildAnimatedImageVideoFilter } from './media-generator'; + +test('buildAnimatedImageVideoFilter prepends a cloned first frame when lead-in is provided', () => { + assert.equal( + buildAnimatedImageVideoFilter({ + fps: 10, + maxWidth: 640, + leadingStillDuration: 1.25, + }), + 'tpad=start_duration=1.25:start_mode=clone,fps=10,scale=w=640:h=-2', + ); +}); diff --git a/src/media-generator.ts b/src/media-generator.ts index 8268b27..479b98a 100644 --- a/src/media-generator.ts +++ b/src/media-generator.ts @@ -24,6 +24,33 @@ import { createLogger } from './logger'; const log = createLogger('media'); +export function buildAnimatedImageVideoFilter(options: { + fps?: number; + maxWidth?: number; + maxHeight?: number; + leadingStillDuration?: number; +}): string { + const { fps = 10, maxWidth = 640, maxHeight, leadingStillDuration = 0 } = options; + const clampedFps = Math.max(1, Math.min(60, fps)); + const vfParts: string[] = []; + + if (leadingStillDuration > 0) { + vfParts.push(`tpad=start_duration=${leadingStillDuration}:start_mode=clone`); + } + + vfParts.push(`fps=${clampedFps}`); + + if (maxWidth && maxWidth > 0 && maxHeight && maxHeight > 0) { + vfParts.push(`scale=w=${maxWidth}:h=${maxHeight}:force_original_aspect_ratio=decrease`); + } else if (maxWidth && maxWidth > 0) { + vfParts.push(`scale=w=${maxWidth}:h=-2`); + } else if (maxHeight && maxHeight > 0) { + vfParts.push(`scale=w=-2:h=${maxHeight}`); + } + + return vfParts.join(','); +} + export class MediaGenerator { private tempDir: string; private notifyIconDir: string; @@ -289,25 +316,15 @@ export class MediaGenerator { maxWidth?: number; maxHeight?: number; crf?: number; + leadingStillDuration?: number; } = {}, ): Promise { const start = Math.max(0, startTime - padding); const duration = endTime - startTime + 2 * padding; - const { fps = 10, maxWidth = 640, maxHeight, crf = 35 } = options; + const { fps = 10, maxWidth = 640, maxHeight, crf = 35, leadingStillDuration = 0 } = options; - const clampedFps = Math.max(1, Math.min(60, fps)); const clampedCrf = Math.max(0, Math.min(63, crf)); - const vfParts: string[] = []; - vfParts.push(`fps=${clampedFps}`); - if (maxWidth && maxWidth > 0 && maxHeight && maxHeight > 0) { - vfParts.push(`scale=w=${maxWidth}:h=${maxHeight}:force_original_aspect_ratio=decrease`); - } else if (maxWidth && maxWidth > 0) { - vfParts.push(`scale=w=${maxWidth}:h=-2`); - } else if (maxHeight && maxHeight > 0) { - vfParts.push(`scale=w=-2:h=${maxHeight}`); - } - const av1Encoder = await this.detectAv1Encoder(); if (!av1Encoder) { throw new Error( @@ -338,7 +355,12 @@ export class MediaGenerator { '-i', videoPath, '-vf', - vfParts.join(','), + buildAnimatedImageVideoFilter({ + fps, + maxWidth, + maxHeight, + leadingStillDuration, + }), ...encoderArgs, '-y', outputPath, diff --git a/src/types.ts b/src/types.ts index c8a743b..899caaf 100644 --- a/src/types.ts +++ b/src/types.ts @@ -241,6 +241,7 @@ export interface AnkiConnectConfig { animatedMaxWidth?: number; animatedMaxHeight?: number; animatedCrf?: number; + syncAnimatedImageToWordAudio?: boolean; audioPadding?: number; fallbackDuration?: number; maxMediaDuration?: number; @@ -745,6 +746,7 @@ export interface ResolvedConfig { animatedMaxWidth: number; animatedMaxHeight?: number; animatedCrf: number; + syncAnimatedImageToWordAudio: boolean; audioPadding: number; fallbackDuration: number; maxMediaDuration: number;