feat: sync animated anki images to sentence audio

This commit is contained in:
2026-03-18 19:21:12 -07:00
parent f4cce31d4a
commit ad1f66a842
18 changed files with 452 additions and 38 deletions

View File

@@ -340,6 +340,7 @@
"animatedFps": 10, // Animated fps setting.
"animatedMaxWidth": 640, // Animated max width setting.
"animatedCrf": 35, // Animated crf setting.
"syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false
"audioPadding": 0.5, // Audio padding setting.
"fallbackDuration": 3, // Fallback duration setting.
"maxMediaDuration": 30 // Max media duration setting.

View File

@@ -340,6 +340,7 @@
"animatedFps": 10, // Animated fps setting.
"animatedMaxWidth": 640, // Animated max width setting.
"animatedCrf": 35, // Animated crf setting.
"syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false
"audioPadding": 0.5, // Audio padding setting.
"fallbackDuration": 3, // Fallback duration setting.
"maxMediaDuration": 30 // Max media duration setting.

View File

@@ -54,6 +54,7 @@ import { FieldGroupingService } from './anki-integration/field-grouping';
import { FieldGroupingMergeCollaborator } from './anki-integration/field-grouping-merge';
import { NoteUpdateWorkflow } from './anki-integration/note-update-workflow';
import { FieldGroupingWorkflow } from './anki-integration/field-grouping-workflow';
import { resolveAnimatedImageLeadInSeconds } from './anki-integration/animated-image-sync';
import { AnkiIntegrationRuntime, normalizeAnkiIntegrationConfig } from './anki-integration/runtime';
const log = createLogger('anki').child('integration');
@@ -190,7 +191,7 @@ export class AnkiIntegration {
this.resolveNoteFieldName(noteInfo, preferredName),
extractFields: (fields) => this.extractFields(fields),
processSentence: (mpvSentence, noteFields) => this.processSentence(mpvSentence, noteFields),
generateMediaForMerge: () => this.generateMediaForMerge(),
generateMediaForMerge: (noteInfo) => this.generateMediaForMerge(noteInfo),
warnFieldParseOnce: (fieldName, reason, detail) =>
this.warnFieldParseOnce(fieldName, reason, detail),
});
@@ -286,6 +287,7 @@ export class AnkiIntegration {
storeMediaFile: (filename, data) => this.client.storeMediaFile(filename, data),
findNotes: async (query, options) =>
(await this.client.findNotes(query, options)) as number[],
retrieveMediaFile: (filename) => this.client.retrieveMediaFile(filename),
},
mediaGenerator: {
generateAudio: (videoPath, startTime, endTime, audioPadding, audioStreamIndex) =>
@@ -319,6 +321,7 @@ export class AnkiIntegration {
this.resolveConfiguredFieldName(noteInfo, ...preferredNames),
resolveNoteFieldName: (noteInfo, preferredName) =>
this.resolveNoteFieldName(noteInfo, preferredName),
getAnimatedImageLeadInSeconds: (noteInfo) => this.getAnimatedImageLeadInSeconds(noteInfo),
extractFields: (fields) => this.extractFields(fields),
processSentence: (mpvSentence, noteFields) => this.processSentence(mpvSentence, noteFields),
setCardTypeFields: (updatedFields, availableFieldNames, cardKind) =>
@@ -407,12 +410,13 @@ export class AnkiIntegration {
this.resolveConfiguredFieldName(noteInfo, ...preferredNames),
getResolvedSentenceAudioFieldName: (noteInfo) =>
this.getResolvedSentenceAudioFieldName(noteInfo),
getAnimatedImageLeadInSeconds: (noteInfo) => this.getAnimatedImageLeadInSeconds(noteInfo),
mergeFieldValue: (existing, newValue, overwrite) =>
this.mergeFieldValue(existing, newValue, overwrite),
generateAudioFilename: () => this.generateAudioFilename(),
generateAudio: () => this.generateAudio(),
generateImageFilename: () => this.generateImageFilename(),
generateImage: () => this.generateImage(),
generateImage: (animatedLeadInSeconds) => this.generateImage(animatedLeadInSeconds),
formatMiscInfoPattern: (fallbackFilename, startTimeSeconds) =>
this.formatMiscInfoPattern(fallbackFilename, startTimeSeconds),
addConfiguredTagsToNote: (noteId) => this.addConfiguredTagsToNote(noteId),
@@ -637,7 +641,7 @@ export class AnkiIntegration {
);
}
private async generateImage(): Promise<Buffer | null> {
private async generateImage(animatedLeadInSeconds = 0): Promise<Buffer | null> {
if (!this.mpvClient || !this.mpvClient.currentVideoPath) {
return null;
}
@@ -665,6 +669,7 @@ export class AnkiIntegration {
maxWidth: this.config.media?.animatedMaxWidth,
maxHeight: this.config.media?.animatedMaxHeight,
crf: this.config.media?.animatedCrf,
leadingStillDuration: animatedLeadInSeconds,
},
);
} else {
@@ -1020,7 +1025,18 @@ export class AnkiIntegration {
return getPreferredWordValueFromExtractedFields(fields, this.config);
}
private async generateMediaForMerge(): Promise<{
private async getAnimatedImageLeadInSeconds(noteInfo: NoteInfo): Promise<number> {
return resolveAnimatedImageLeadInSeconds({
config: this.config,
noteInfo,
resolveConfiguredFieldName: (candidateNoteInfo, ...preferredNames) =>
this.resolveConfiguredFieldName(candidateNoteInfo, ...preferredNames),
retrieveMediaFileBase64: (filename) => this.client.retrieveMediaFile(filename),
logWarn: (message, ...args) => log.warn(message, ...args),
});
}
private async generateMediaForMerge(noteInfo?: NoteInfo): Promise<{
audioField?: string;
audioValue?: string;
imageField?: string;
@@ -1057,8 +1073,11 @@ export class AnkiIntegration {
if (this.config.media?.generateImage && this.mpvClient?.currentVideoPath) {
try {
const animatedLeadInSeconds = noteInfo
? await this.getAnimatedImageLeadInSeconds(noteInfo)
: 0;
const imageFilename = this.generateImageFilename();
const imageBuffer = await this.generateImage();
const imageBuffer = await this.generateImage(animatedLeadInSeconds);
if (imageBuffer) {
await this.client.storeMediaFile(imageFilename, imageBuffer);
result.imageField = this.config.fields?.image || DEFAULT_ANKI_CONNECT_CONFIG.fields.image;

View File

@@ -0,0 +1,82 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { resolveAnimatedImageLeadInSeconds, extractSoundFilenames } from './animated-image-sync';
test('extractSoundFilenames returns ordered sound filenames from an Anki field value', () => {
assert.deepEqual(
extractSoundFilenames('before [sound:word.mp3] middle [sound:alt.ogg] after'),
['word.mp3', 'alt.ogg'],
);
});
test('resolveAnimatedImageLeadInSeconds sums configured word audio durations for animated images', async () => {
const leadInSeconds = await resolveAnimatedImageLeadInSeconds({
config: {
fields: {
audio: 'ExpressionAudio',
},
media: {
imageType: 'avif',
syncAnimatedImageToWordAudio: true,
},
},
noteInfo: {
noteId: 42,
fields: {
ExpressionAudio: {
value: '[sound:word.mp3][sound:alt.ogg]',
},
},
},
resolveConfiguredFieldName: (noteInfo, ...preferredNames) => {
for (const preferredName of preferredNames) {
if (!preferredName) continue;
const resolved = Object.keys(noteInfo.fields).find(
(fieldName) => fieldName.toLowerCase() === preferredName.toLowerCase(),
);
if (resolved) return resolved;
}
return null;
},
retrieveMediaFileBase64: async (filename) =>
filename === 'word.mp3' ? 'd29yZA==' : filename === 'alt.ogg' ? 'YWx0' : '',
probeAudioDurationSeconds: async (_buffer, filename) =>
filename === 'word.mp3' ? 0.41 : filename === 'alt.ogg' ? 0.84 : null,
logWarn: () => undefined,
});
assert.equal(leadInSeconds, 1.25);
});
test('resolveAnimatedImageLeadInSeconds falls back to zero when sync is disabled', async () => {
const leadInSeconds = await resolveAnimatedImageLeadInSeconds({
config: {
fields: {
audio: 'ExpressionAudio',
},
media: {
imageType: 'avif',
syncAnimatedImageToWordAudio: false,
},
},
noteInfo: {
noteId: 42,
fields: {
ExpressionAudio: {
value: '[sound:word.mp3]',
},
},
},
resolveConfiguredFieldName: () => 'ExpressionAudio',
retrieveMediaFileBase64: async () => {
throw new Error('should not be called');
},
probeAudioDurationSeconds: async () => {
throw new Error('should not be called');
},
logWarn: () => undefined,
});
assert.equal(leadInSeconds, 0);
});

View File

@@ -0,0 +1,133 @@
import { execFile as nodeExecFile } from 'node:child_process';
import * as fs from 'node:fs';
import * as os from 'node:os';
import * as path from 'node:path';
import { DEFAULT_ANKI_CONNECT_CONFIG } from '../config';
import type { AnkiConnectConfig } from '../types';
type NoteInfoLike = {
noteId: number;
fields: Record<string, { value: string }>;
};
interface ResolveAnimatedImageLeadInSecondsArgs<TNoteInfo extends NoteInfoLike> {
config: Pick<AnkiConnectConfig, 'fields' | 'media'>;
noteInfo: TNoteInfo;
resolveConfiguredFieldName: (
noteInfo: TNoteInfo,
...preferredNames: (string | undefined)[]
) => string | null;
retrieveMediaFileBase64: (filename: string) => Promise<string>;
probeAudioDurationSeconds?: (buffer: Buffer, filename: string) => Promise<number | null>;
logWarn?: (message: string, ...args: unknown[]) => void;
}
interface ProbeAudioDurationDeps {
execFile?: typeof nodeExecFile;
mkdtempSync?: typeof fs.mkdtempSync;
writeFileSync?: typeof fs.writeFileSync;
rmSync?: typeof fs.rmSync;
}
export function extractSoundFilenames(value: string): string[] {
const matches = value.matchAll(/\[sound:([^\]]+)\]/gi);
return Array.from(matches, (match) => match[1]?.trim() || '').filter((value) => value.length > 0);
}
function shouldSyncAnimatedImageToWordAudio(config: Pick<AnkiConnectConfig, 'media'>): boolean {
return (
config.media?.imageType === 'avif' && config.media?.syncAnimatedImageToWordAudio !== false
);
}
export async function probeAudioDurationSeconds(
buffer: Buffer,
filename: string,
deps: ProbeAudioDurationDeps = {},
): Promise<number | null> {
const execFile = deps.execFile ?? nodeExecFile;
const mkdtempSync = deps.mkdtempSync ?? fs.mkdtempSync;
const writeFileSync = deps.writeFileSync ?? fs.writeFileSync;
const rmSync = deps.rmSync ?? fs.rmSync;
const tempDir = mkdtempSync(path.join(os.tmpdir(), 'subminer-audio-probe-'));
const ext = path.extname(filename) || '.bin';
const tempPath = path.join(tempDir, `probe${ext}`);
writeFileSync(tempPath, buffer);
return new Promise((resolve) => {
execFile(
'ffprobe',
[
'-v',
'error',
'-show_entries',
'format=duration',
'-of',
'default=noprint_wrappers=1:nokey=1',
tempPath,
],
(error, stdout) => {
try {
if (error) {
resolve(null);
return;
}
const durationSeconds = Number.parseFloat((stdout || '').trim());
resolve(Number.isFinite(durationSeconds) && durationSeconds > 0 ? durationSeconds : null);
} finally {
rmSync(tempDir, { recursive: true, force: true });
}
},
);
});
}
export async function resolveAnimatedImageLeadInSeconds<TNoteInfo extends NoteInfoLike>({
config,
noteInfo,
resolveConfiguredFieldName,
retrieveMediaFileBase64,
probeAudioDurationSeconds: probeDuration = probeAudioDurationSeconds,
logWarn,
}: ResolveAnimatedImageLeadInSecondsArgs<TNoteInfo>): Promise<number> {
if (!shouldSyncAnimatedImageToWordAudio(config)) {
return 0;
}
const wordAudioFieldName = resolveConfiguredFieldName(
noteInfo,
config.fields?.audio,
DEFAULT_ANKI_CONNECT_CONFIG.fields.audio,
);
if (!wordAudioFieldName) {
return 0;
}
const wordAudioValue = noteInfo.fields[wordAudioFieldName]?.value || '';
const filenames = extractSoundFilenames(wordAudioValue);
if (filenames.length === 0) {
return 0;
}
let totalLeadInSeconds = 0;
for (const filename of filenames) {
const encoded = await retrieveMediaFileBase64(filename);
if (!encoded) {
logWarn?.('Animated image sync skipped: failed to retrieve word audio', filename);
return 0;
}
const durationSeconds = await probeDuration(Buffer.from(encoded, 'base64'), filename);
if (!(typeof durationSeconds === 'number' && Number.isFinite(durationSeconds))) {
logWarn?.('Animated image sync skipped: failed to probe word audio duration', filename);
return 0;
}
totalLeadInSeconds += durationSeconds;
}
return totalLeadInSeconds;
}

View File

@@ -30,6 +30,7 @@ interface CardCreationClient {
updateNoteFields(noteId: number, fields: Record<string, string>): Promise<void>;
storeMediaFile(filename: string, data: Buffer): Promise<void>;
findNotes(query: string, options?: { maxRetries?: number }): Promise<number[]>;
retrieveMediaFile(filename: string): Promise<string>;
}
interface CardCreationMediaGenerator {
@@ -60,6 +61,7 @@ interface CardCreationMediaGenerator {
maxWidth?: number;
maxHeight?: number;
crf?: number;
leadingStillDuration?: number;
},
): Promise<Buffer | null>;
}
@@ -83,6 +85,7 @@ interface CardCreationDeps {
...preferredNames: (string | undefined)[]
) => string | null;
resolveNoteFieldName: (noteInfo: CardCreationNoteInfo, preferredName?: string) => string | null;
getAnimatedImageLeadInSeconds: (noteInfo: CardCreationNoteInfo) => Promise<number>;
extractFields: (fields: Record<string, { value: string }>) => Record<string, string>;
processSentence: (mpvSentence: string, noteFields: Record<string, string>) => string;
setCardTypeFields: (
@@ -258,11 +261,14 @@ export class CardCreationService {
if (this.deps.getConfig().media?.generateImage) {
try {
const animatedLeadInSeconds =
await this.deps.getAnimatedImageLeadInSeconds(noteInfo);
const imageFilename = this.generateImageFilename();
const imageBuffer = await this.generateImageBuffer(
mpvClient.currentVideoPath,
rangeStart,
rangeEnd,
animatedLeadInSeconds,
);
if (imageBuffer) {
@@ -414,11 +420,14 @@ export class CardCreationService {
if (this.deps.getConfig().media?.generateImage) {
try {
const animatedLeadInSeconds =
await this.deps.getAnimatedImageLeadInSeconds(noteInfo);
const imageFilename = this.generateImageFilename();
const imageBuffer = await this.generateImageBuffer(
mpvClient.currentVideoPath,
startTime,
endTime,
animatedLeadInSeconds,
);
const imageField = this.deps.getConfig().fields?.image;
@@ -679,6 +688,7 @@ export class CardCreationService {
videoPath: string,
startTime: number,
endTime: number,
animatedLeadInSeconds = 0,
): Promise<Buffer | null> {
const mpvClient = this.deps.getMpvClient();
if (!mpvClient) {
@@ -707,6 +717,7 @@ export class CardCreationService {
maxWidth: this.deps.getConfig().media?.animatedMaxWidth,
maxHeight: this.deps.getConfig().media?.animatedMaxHeight,
crf: this.deps.getConfig().media?.animatedCrf,
leadingStillDuration: animatedLeadInSeconds,
},
);
}

View File

@@ -28,7 +28,7 @@ interface FieldGroupingMergeDeps {
) => string | null;
extractFields: (fields: Record<string, { value: string }>) => Record<string, string>;
processSentence: (mpvSentence: string, noteFields: Record<string, string>) => string;
generateMediaForMerge: () => Promise<FieldGroupingMergeMedia>;
generateMediaForMerge: (noteInfo: FieldGroupingMergeNoteInfo) => Promise<FieldGroupingMergeMedia>;
warnFieldParseOnce: (fieldName: string, reason: string, detail?: string) => void;
}
@@ -132,7 +132,7 @@ export class FieldGroupingMergeCollaborator {
}
if (includeGeneratedMedia) {
const media = await this.deps.generateMediaForMerge();
const media = await this.deps.generateMediaForMerge(keepNoteInfo);
if (media.audioField && media.audioValue && !sourceFields[media.audioField]) {
sourceFields[media.audioField] = media.audioValue;
}

View File

@@ -62,6 +62,7 @@ function createWorkflowHarness() {
return names.find((name) => name.toLowerCase() === preferred.toLowerCase()) ?? null;
},
getResolvedSentenceAudioFieldName: () => null,
getAnimatedImageLeadInSeconds: async () => 0,
mergeFieldValue: (_existing: string, next: string, _overwrite: boolean) => next,
generateAudioFilename: () => 'audio_1.mp3',
generateAudio: async () => null,
@@ -163,3 +164,42 @@ test('NoteUpdateWorkflow updates note before auto field grouping merge', async (
assert.deepEqual(callOrder, ['update', 'auto']);
assert.equal(harness.updates.length, 1);
});
test('NoteUpdateWorkflow passes animated image lead-in when syncing avif to word audio', async () => {
const harness = createWorkflowHarness();
let receivedLeadInSeconds = 0;
harness.deps.client.notesInfo = async () =>
[
{
noteId: 42,
fields: {
Expression: { value: 'taberu' },
ExpressionAudio: { value: '[sound:word.mp3]' },
Sentence: { value: '' },
Picture: { value: '' },
},
},
] satisfies NoteUpdateWorkflowNoteInfo[];
harness.deps.getConfig = () => ({
fields: {
sentence: 'Sentence',
image: 'Picture',
},
media: {
generateImage: true,
imageType: 'avif',
syncAnimatedImageToWordAudio: true,
},
behavior: {},
});
harness.deps.getAnimatedImageLeadInSeconds = async () => 1.25;
harness.deps.generateImage = async (leadInSeconds?: number) => {
receivedLeadInSeconds = leadInSeconds ?? 0;
return Buffer.from('image');
};
await harness.workflow.execute(42);
assert.equal(receivedLeadInSeconds, 1.25);
});

View File

@@ -22,6 +22,8 @@ export interface NoteUpdateWorkflowDeps {
media?: {
generateAudio?: boolean;
generateImage?: boolean;
imageType?: 'static' | 'avif';
syncAnimatedImageToWordAudio?: boolean;
};
behavior?: {
overwriteAudio?: boolean;
@@ -60,11 +62,12 @@ export interface NoteUpdateWorkflowDeps {
...preferredNames: (string | undefined)[]
) => string | null;
getResolvedSentenceAudioFieldName: (noteInfo: NoteUpdateWorkflowNoteInfo) => string | null;
getAnimatedImageLeadInSeconds: (noteInfo: NoteUpdateWorkflowNoteInfo) => Promise<number>;
mergeFieldValue: (existing: string, newValue: string, overwrite: boolean) => string;
generateAudioFilename: () => string;
generateAudio: () => Promise<Buffer | null>;
generateImageFilename: () => string;
generateImage: () => Promise<Buffer | null>;
generateImage: (animatedLeadInSeconds?: number) => Promise<Buffer | null>;
formatMiscInfoPattern: (fallbackFilename: string, startTimeSeconds?: number) => string;
addConfiguredTagsToNote: (noteId: number) => Promise<void>;
showNotification: (noteId: number, label: string | number) => Promise<void>;
@@ -153,8 +156,9 @@ export class NoteUpdateWorkflow {
if (config.media?.generateImage) {
try {
const animatedLeadInSeconds = await this.deps.getAnimatedImageLeadInSeconds(noteInfo);
const imageFilename = this.deps.generateImageFilename();
const imageBuffer = await this.deps.generateImage();
const imageBuffer = await this.deps.generateImage(animatedLeadInSeconds);
if (imageBuffer) {
await this.deps.client.storeMediaFile(imageFilename, imageBuffer);

View File

@@ -59,6 +59,10 @@ test('AnkiIntegrationRuntime normalizes url and proxy defaults', () => {
normalized.media?.fallbackDuration,
DEFAULT_ANKI_CONNECT_CONFIG.media.fallbackDuration,
);
assert.equal(
normalized.media?.syncAnimatedImageToWordAudio,
DEFAULT_ANKI_CONNECT_CONFIG.media.syncAnimatedImageToWordAudio,
);
});
test('AnkiIntegrationRuntime starts proxy transport when proxy mode is enabled', () => {

View File

@@ -47,6 +47,7 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick<
animatedMaxWidth: 640,
animatedMaxHeight: undefined,
animatedCrf: 35,
syncAnimatedImageToWordAudio: true,
audioPadding: 0.5,
fallbackDuration: 3.0,
maxMediaDuration: 30,

View File

@@ -82,6 +82,13 @@ export function buildIntegrationConfigOptionRegistry(
description: 'Automatically update newly added cards.',
runtime: runtimeOptionById.get('anki.autoUpdateNewCards'),
},
{
path: 'ankiConnect.media.syncAnimatedImageToWordAudio',
kind: 'boolean',
defaultValue: defaultConfig.ankiConnect.media.syncAnimatedImageToWordAudio,
description:
'For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio.',
},
{
path: 'ankiConnect.knownWords.matchMode',
kind: 'enum',

View File

@@ -121,6 +121,22 @@ test('accepts configured ankiConnect.fields.word override', () => {
);
});
test('accepts ankiConnect.media.syncAnimatedImageToWordAudio override', () => {
const { context, warnings } = makeContext({
media: {
syncAnimatedImageToWordAudio: false,
},
});
applyAnkiConnectResolution(context);
assert.equal(context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio, false);
assert.equal(
warnings.some((warning) => warning.path === 'ankiConnect.media.syncAnimatedImageToWordAudio'),
false,
);
});
test('maps legacy ankiConnect.wordField to modern ankiConnect.fields.word', () => {
const { context, warnings } = makeContext({
wordField: 'TargetWordLegacy',

View File

@@ -31,6 +31,7 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
'animatedMaxWidth',
'animatedMaxHeight',
'animatedCrf',
'syncAnimatedImageToWordAudio',
'audioPadding',
'fallbackDuration',
'maxMediaDuration',
@@ -536,6 +537,17 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
'Expected integer between 0 and 63.',
);
}
if (!hasOwn(media, 'syncAnimatedImageToWordAudio')) {
mapLegacy(
'syncAnimatedImageToWordAudio',
asBoolean,
(value) => {
context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio = value;
},
context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio,
'Expected boolean.',
);
}
if (!hasOwn(media, 'audioPadding')) {
mapLegacy(
'audioPadding',

View File

@@ -12,6 +12,12 @@ import {
getConfiguredWordFieldName,
getPreferredNoteFieldValue,
} from '../../anki-field-config.js';
import { resolveAnimatedImageLeadInSeconds } from '../../anki-integration/animated-image-sync.js';
type StatsServerNoteInfo = {
noteId: number;
fields: Record<string, { value: string }>;
};
function parseIntQuery(raw: string | undefined, fallback: number, maxLimit?: number): number {
if (raw === undefined) return fallback;
@@ -40,6 +46,20 @@ function parseEventTypesQuery(raw: string | undefined): number[] | undefined {
return parsed.length > 0 ? parsed : undefined;
}
function resolveStatsNoteFieldName(
noteInfo: StatsServerNoteInfo,
...preferredNames: (string | undefined)[]
): string | null {
for (const preferredName of preferredNames) {
if (!preferredName) continue;
const resolved = Object.keys(noteInfo.fields).find(
(fieldName) => fieldName.toLowerCase() === preferredName.toLowerCase(),
);
if (resolved) return resolved;
}
return null;
}
/** Load known words cache from disk into a Set. Returns null if unavailable. */
function loadKnownWordsSet(cachePath: string | undefined): Set<string> | null {
if (!cachePath || !existsSync(cachePath)) return null;
@@ -621,36 +641,41 @@ export function createStatsApp(
const generateAudio = ankiConfig.media?.generateAudio !== false;
const generateImage = ankiConfig.media?.generateImage !== false && mode !== 'audio';
const imageType = ankiConfig.media?.imageType ?? 'static';
const syncAnimatedImageToWordAudio =
imageType === 'avif' && ankiConfig.media?.syncAnimatedImageToWordAudio !== false;
const audioPromise = generateAudio
? mediaGen.generateAudio(sourcePath, startSec, clampedEndSec, audioPadding)
: Promise.resolve(null);
let imagePromise: Promise<Buffer | null>;
if (!generateImage) {
imagePromise = Promise.resolve(null);
} else if (imageType === 'avif') {
imagePromise = mediaGen.generateAnimatedImage(
sourcePath,
startSec,
clampedEndSec,
audioPadding,
{
const createImagePromise = (animatedLeadInSeconds = 0): Promise<Buffer | null> => {
if (!generateImage) {
return Promise.resolve(null);
}
if (imageType === 'avif') {
return mediaGen.generateAnimatedImage(sourcePath, startSec, clampedEndSec, audioPadding, {
fps: ankiConfig.media?.animatedFps ?? 10,
maxWidth: ankiConfig.media?.animatedMaxWidth ?? 640,
maxHeight: ankiConfig.media?.animatedMaxHeight,
crf: ankiConfig.media?.animatedCrf ?? 35,
},
);
} else {
leadingStillDuration: animatedLeadInSeconds,
});
}
const midpointSec = (startSec + clampedEndSec) / 2;
imagePromise = mediaGen.generateScreenshot(sourcePath, midpointSec, {
return mediaGen.generateScreenshot(sourcePath, midpointSec, {
format: ankiConfig.media?.imageFormat ?? 'jpg',
quality: ankiConfig.media?.imageQuality ?? 92,
maxWidth: ankiConfig.media?.imageMaxWidth,
maxHeight: ankiConfig.media?.imageMaxHeight,
});
}
};
const imagePromise =
mode === 'word' && syncAnimatedImageToWordAudio
? Promise.resolve<Buffer | null>(null)
: createImagePromise();
const errors: string[] = [];
let noteId: number;
@@ -677,12 +702,31 @@ export function createStatsApp(
noteId = yomitanResult.value;
const audioBuffer = audioResult.status === 'fulfilled' ? audioResult.value : null;
const imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null;
if (audioResult.status === 'rejected')
errors.push(`audio: ${(audioResult.reason as Error).message}`);
if (imageResult.status === 'rejected')
errors.push(`image: ${(imageResult.reason as Error).message}`);
let imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null;
if (syncAnimatedImageToWordAudio && generateImage) {
try {
const noteInfoResult = (await client.notesInfo([noteId])) as StatsServerNoteInfo[];
const noteInfo = noteInfoResult[0] ?? null;
const animatedLeadInSeconds = noteInfo
? await resolveAnimatedImageLeadInSeconds({
config: ankiConfig,
noteInfo,
resolveConfiguredFieldName: (candidateNoteInfo, ...preferredNames) =>
resolveStatsNoteFieldName(candidateNoteInfo, ...preferredNames),
retrieveMediaFileBase64: (filename) => client.retrieveMediaFile(filename),
})
: 0;
imageBuffer = await createImagePromise(animatedLeadInSeconds);
} catch (err) {
errors.push(`image: ${(err as Error).message}`);
}
}
const mediaFields: Record<string, string> = {};
const timestamp = Date.now();
const sentenceFieldName = ankiConfig.fields?.sentence ?? 'Sentence';

View File

@@ -0,0 +1,15 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { buildAnimatedImageVideoFilter } from './media-generator';
test('buildAnimatedImageVideoFilter prepends a cloned first frame when lead-in is provided', () => {
assert.equal(
buildAnimatedImageVideoFilter({
fps: 10,
maxWidth: 640,
leadingStillDuration: 1.25,
}),
'tpad=start_duration=1.25:start_mode=clone,fps=10,scale=w=640:h=-2',
);
});

View File

@@ -24,6 +24,33 @@ import { createLogger } from './logger';
const log = createLogger('media');
export function buildAnimatedImageVideoFilter(options: {
fps?: number;
maxWidth?: number;
maxHeight?: number;
leadingStillDuration?: number;
}): string {
const { fps = 10, maxWidth = 640, maxHeight, leadingStillDuration = 0 } = options;
const clampedFps = Math.max(1, Math.min(60, fps));
const vfParts: string[] = [];
if (leadingStillDuration > 0) {
vfParts.push(`tpad=start_duration=${leadingStillDuration}:start_mode=clone`);
}
vfParts.push(`fps=${clampedFps}`);
if (maxWidth && maxWidth > 0 && maxHeight && maxHeight > 0) {
vfParts.push(`scale=w=${maxWidth}:h=${maxHeight}:force_original_aspect_ratio=decrease`);
} else if (maxWidth && maxWidth > 0) {
vfParts.push(`scale=w=${maxWidth}:h=-2`);
} else if (maxHeight && maxHeight > 0) {
vfParts.push(`scale=w=-2:h=${maxHeight}`);
}
return vfParts.join(',');
}
export class MediaGenerator {
private tempDir: string;
private notifyIconDir: string;
@@ -289,25 +316,15 @@ export class MediaGenerator {
maxWidth?: number;
maxHeight?: number;
crf?: number;
leadingStillDuration?: number;
} = {},
): Promise<Buffer> {
const start = Math.max(0, startTime - padding);
const duration = endTime - startTime + 2 * padding;
const { fps = 10, maxWidth = 640, maxHeight, crf = 35 } = options;
const { fps = 10, maxWidth = 640, maxHeight, crf = 35, leadingStillDuration = 0 } = options;
const clampedFps = Math.max(1, Math.min(60, fps));
const clampedCrf = Math.max(0, Math.min(63, crf));
const vfParts: string[] = [];
vfParts.push(`fps=${clampedFps}`);
if (maxWidth && maxWidth > 0 && maxHeight && maxHeight > 0) {
vfParts.push(`scale=w=${maxWidth}:h=${maxHeight}:force_original_aspect_ratio=decrease`);
} else if (maxWidth && maxWidth > 0) {
vfParts.push(`scale=w=${maxWidth}:h=-2`);
} else if (maxHeight && maxHeight > 0) {
vfParts.push(`scale=w=-2:h=${maxHeight}`);
}
const av1Encoder = await this.detectAv1Encoder();
if (!av1Encoder) {
throw new Error(
@@ -338,7 +355,12 @@ export class MediaGenerator {
'-i',
videoPath,
'-vf',
vfParts.join(','),
buildAnimatedImageVideoFilter({
fps,
maxWidth,
maxHeight,
leadingStillDuration,
}),
...encoderArgs,
'-y',
outputPath,

View File

@@ -241,6 +241,7 @@ export interface AnkiConnectConfig {
animatedMaxWidth?: number;
animatedMaxHeight?: number;
animatedCrf?: number;
syncAnimatedImageToWordAudio?: boolean;
audioPadding?: number;
fallbackDuration?: number;
maxMediaDuration?: number;
@@ -745,6 +746,7 @@ export interface ResolvedConfig {
animatedMaxWidth: number;
animatedMaxHeight?: number;
animatedCrf: number;
syncAnimatedImageToWordAudio: boolean;
audioPadding: number;
fallbackDuration: number;
maxMediaDuration: number;