diff --git a/changes/2026-06-01-avif-audio-sync.md b/changes/2026-06-01-avif-audio-sync.md new file mode 100644 index 00000000..ff148621 --- /dev/null +++ b/changes/2026-06-01-avif-audio-sync.md @@ -0,0 +1,4 @@ +type: fixed +area: anki + +- Kept animated AVIF timing from starting or ending early by holding word-audio lead-in and clip duration through the next AVIF frame boundary. diff --git a/src/media-generator.test.ts b/src/media-generator.test.ts index 4ace408a..700cbabf 100644 --- a/src/media-generator.test.ts +++ b/src/media-generator.test.ts @@ -56,14 +56,14 @@ function readFfmpegArgs(argsPath: string): string[] { return fs.readFileSync(argsPath, 'utf8').trim().split('\n'); } -test('buildAnimatedImageVideoFilter prepends a cloned first frame when lead-in is provided', () => { +test('buildAnimatedImageVideoFilter holds lead-in until the next frame after the audio boundary', () => { assert.equal( buildAnimatedImageVideoFilter({ - fps: 10, + fps: 24, maxWidth: 640, leadingStillDuration: 1.25, }), - 'tpad=start_duration=1.25:start_mode=clone,fps=10,scale=w=640:h=-2', + 'tpad=start_duration=1.2916666666666667:start_mode=clone,fps=24,scale=w=640:h=-2', ); }); @@ -76,12 +76,12 @@ test('generateAnimatedImage includes leading audio padding in the source range', const args = readFfmpegArgs(argsPath); assert.equal(args[args.indexOf('-ss') + 1], '9.5'); - assert.equal(args[args.indexOf('-t') + 1], '3'); + assert.equal(args[args.indexOf('-t') + 1], '3.1'); assert.equal(args[args.indexOf('-vf') + 1], 'fps=10,scale=w=640:h=-2'); }); }); -test('generateAnimatedImage defaults to unpadded sentence timing', async () => { +test('generateAnimatedImage defaults to unpadded source start and holds through the next frame', async () => { await withStubbedFfmpeg(async (generator, argsPath) => { await generator.generateAnimatedImage('/video.mp4', 10, 12, undefined, { fps: 10, @@ -90,7 +90,21 @@ test('generateAnimatedImage defaults to unpadded sentence timing', async () => { const args = readFfmpegArgs(argsPath); assert.equal(args[args.indexOf('-ss') + 1], '10'); - assert.equal(args[args.indexOf('-t') + 1], '2'); + assert.equal(args[args.indexOf('-t') + 1], '2.1'); + assert.equal(args[args.indexOf('-vf') + 1], 'fps=10,scale=w=640:h=-2'); + }); +}); + +test('generateAnimatedImage rounds fractional source duration through the next frame boundary', async () => { + await withStubbedFfmpeg(async (generator, argsPath) => { + await generator.generateAnimatedImage('/video.mp4', 10, 12.04, undefined, { + fps: 10, + maxWidth: 640, + }); + + const args = readFfmpegArgs(argsPath); + assert.equal(args[args.indexOf('-ss') + 1], '10'); + assert.equal(args[args.indexOf('-t') + 1], '2.1'); assert.equal(args[args.indexOf('-vf') + 1], 'fps=10,scale=w=640:h=-2'); }); }); @@ -105,10 +119,10 @@ test('generateAnimatedImage keeps word-audio lead-in separate from audio padding const args = readFfmpegArgs(argsPath); assert.equal(args[args.indexOf('-ss') + 1], '9.5'); - assert.equal(args[args.indexOf('-t') + 1], '3'); + assert.equal(args[args.indexOf('-t') + 1], '3.1'); assert.equal( args[args.indexOf('-vf') + 1], - 'tpad=start_duration=1.25:start_mode=clone,fps=10,scale=w=640:h=-2', + 'tpad=start_duration=1.3:start_mode=clone,fps=10,scale=w=640:h=-2', ); }); }); @@ -122,7 +136,7 @@ test('generateAnimatedImage clips padded source range at the start of media', as const args = readFfmpegArgs(argsPath); assert.equal(args[args.indexOf('-ss') + 1], '0'); - assert.equal(args[args.indexOf('-t') + 1], '1.7'); + assert.equal(args[args.indexOf('-t') + 1], '1.8'); assert.equal(args[args.indexOf('-vf') + 1], 'fps=10,scale=w=640:h=-2'); }); }); diff --git a/src/media-generator.ts b/src/media-generator.ts index e7def7d3..d9a02c57 100644 --- a/src/media-generator.ts +++ b/src/media-generator.ts @@ -24,6 +24,20 @@ import { createLogger } from './logger'; const log = createLogger('media'); +function normalizeAnimatedImageFps(fps: number | undefined): number { + const fallbackFps = 10; + const safeFps = typeof fps === 'number' && Number.isFinite(fps) ? fps : fallbackFps; + return Math.max(1, Math.min(60, safeFps)); +} + +function roundDurationUpToNextFrameBoundary(seconds: number, fps: number): number { + if (!(Number.isFinite(seconds) && seconds > 0)) { + return 0; + } + + return (Math.floor(seconds * fps + 1e-9) + 1) / fps; +} + export function buildAnimatedImageVideoFilter(options: { fps?: number; maxWidth?: number; @@ -31,11 +45,15 @@ export function buildAnimatedImageVideoFilter(options: { leadingStillDuration?: number; }): string { const { fps = 10, maxWidth = 640, maxHeight, leadingStillDuration = 0 } = options; - const clampedFps = Math.max(1, Math.min(60, fps)); + const clampedFps = normalizeAnimatedImageFps(fps); + const alignedLeadingStillDuration = roundDurationUpToNextFrameBoundary( + leadingStillDuration, + clampedFps, + ); const vfParts: string[] = []; - if (leadingStillDuration > 0) { - vfParts.push(`tpad=start_duration=${leadingStillDuration}:start_mode=clone`); + if (alignedLeadingStillDuration > 0) { + vfParts.push(`tpad=start_duration=${alignedLeadingStillDuration}:start_mode=clone`); } vfParts.push(`fps=${clampedFps}`); @@ -321,9 +339,10 @@ export class MediaGenerator { } = {}, ): Promise { const { fps = 10, maxWidth = 640, maxHeight, crf = 35, leadingStillDuration = 0 } = options; + const clampedFps = normalizeAnimatedImageFps(fps); const safePadding = Number.isFinite(padding) ? Math.max(0, padding) : 0; const start = Math.max(0, startTime - safePadding); - const duration = endTime - start + safePadding; + const duration = roundDurationUpToNextFrameBoundary(endTime - start + safePadding, clampedFps); const totalLeadingStillDuration = Math.max(0, leadingStillDuration); const clampedCrf = Math.max(0, Math.min(63, crf)); @@ -359,7 +378,7 @@ export class MediaGenerator { videoPath, '-vf', buildAnimatedImageVideoFilter({ - fps, + fps: clampedFps, maxWidth, maxHeight, leadingStillDuration: totalLeadingStillDuration,