fix: include audio padding in animated AVIF source range (#94)

This commit is contained in:
2026-05-27 14:06:48 -07:00
committed by GitHub
parent f033f87329
commit 3e6591e390
7 changed files with 21 additions and 27 deletions
+1 -1
View File
@@ -1,5 +1,5 @@
type: fixed
area: anki
- Made sentence-audio padding opt-in by default, and kept animated AVIF motion aligned when padding is configured by freezing the first frame during leading audio padding.
- Made sentence-audio padding opt-in by default, and kept animated AVIF motion aligned when padding is configured by generating the same padded source range as sentence audio.
- Kept multi-line sentence mining aligned when repeated subtitle text appears in the selected history range.
+1 -1
View File
@@ -523,7 +523,7 @@
"animatedMaxHeight": 0, // Maximum height for animated AVIF captures, in pixels. Set to 0 to preserve aspect ratio.
"animatedCrf": 35, // Animated AVIF CRF quality target. Lower values produce larger, higher-quality files.
"syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false
"audioPadding": 0, // Seconds of padding appended to both ends of generated sentence audio.
"audioPadding": 0, // Seconds of padding appended to both ends of generated sentence audio and animated AVIF clips.
"fallbackDuration": 3, // Fallback clip duration in seconds when subtitle timing data is unavailable.
"maxMediaDuration": 30 // Maximum allowed media clip duration in seconds.
}, // Media setting.
+1 -1
View File
@@ -973,7 +973,7 @@ This example is intentionally compact. The option table below documents availabl
| `media.animatedMaxHeight` | number (px) | Optional max height for animated AVIF. Unset keeps source aspect-constrained height. |
| `media.animatedCrf` | number (0-63) | CRF quality for AVIF; lower = higher quality (default: `35`) |
| `media.syncAnimatedImageToWordAudio` | `true`, `false` | Whether animated AVIF includes an opening frame synced to sentence word-audio timing (default: `true`). |
| `media.audioPadding` | number (seconds) | Optional padding around audio clip timing (default: `0`). Animated AVIF clips freeze the first frame during leading audio padding. |
| `media.audioPadding` | number (seconds) | Optional padding around generated sentence media timing (default: `0`). Animated AVIF clips include the same padded source range as sentence audio. |
| `media.fallbackDuration` | number (seconds) | Default duration if timing unavailable (default: `3.0`) |
| `media.maxMediaDuration` | number (seconds) | Max duration for generated media from multi-line copy (default: `30`, `0` to disable) |
| `behavior.overwriteAudio` | `true`, `false` | Replace existing audio on updates; when `false`, new audio is appended/prepended using the configured media insert mode; manual clipboard updates always replace generated sentence audio (default: `true`) |
+1 -1
View File
@@ -523,7 +523,7 @@
"animatedMaxHeight": 0, // Maximum height for animated AVIF captures, in pixels. Set to 0 to preserve aspect ratio.
"animatedCrf": 35, // Animated AVIF CRF quality target. Lower values produce larger, higher-quality files.
"syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false
"audioPadding": 0, // Seconds of padding appended to both ends of generated sentence audio.
"audioPadding": 0, // Seconds of padding appended to both ends of generated sentence audio and animated AVIF clips.
"fallbackDuration": 3, // Fallback clip duration in seconds when subtitle timing data is unavailable.
"maxMediaDuration": 30 // Maximum allowed media clip duration in seconds.
}, // Media setting.
@@ -246,7 +246,8 @@ export function buildIntegrationConfigOptionRegistry(
path: 'ankiConnect.media.audioPadding',
kind: 'number',
defaultValue: defaultConfig.ankiConnect.media.audioPadding,
description: 'Seconds of padding appended to both ends of generated sentence audio.',
description:
'Seconds of padding appended to both ends of generated sentence audio and animated AVIF clips.',
},
{
path: 'ankiConnect.media.fallbackDuration',
+12 -18
View File
@@ -67,7 +67,7 @@ test('buildAnimatedImageVideoFilter prepends a cloned first frame when lead-in i
);
});
test('generateAnimatedImage freezes first frame for leading audio padding', async () => {
test('generateAnimatedImage includes leading audio padding in the source range', async () => {
await withStubbedFfmpeg(async (generator, argsPath) => {
await generator.generateAnimatedImage('/video.mp4', 10, 12, 0.5, {
fps: 10,
@@ -75,12 +75,9 @@ test('generateAnimatedImage freezes first frame for leading audio padding', asyn
});
const args = readFfmpegArgs(argsPath);
assert.equal(args[args.indexOf('-ss') + 1], '10');
assert.equal(args[args.indexOf('-t') + 1], '2.5');
assert.equal(
args[args.indexOf('-vf') + 1],
'tpad=start_duration=0.5:start_mode=clone,fps=10,scale=w=640:h=-2',
);
assert.equal(args[args.indexOf('-ss') + 1], '9.5');
assert.equal(args[args.indexOf('-t') + 1], '3');
assert.equal(args[args.indexOf('-vf') + 1], 'fps=10,scale=w=640:h=-2');
});
});
@@ -98,7 +95,7 @@ test('generateAnimatedImage defaults to unpadded sentence timing', async () => {
});
});
test('generateAnimatedImage adds audio lead padding to existing word-audio lead-in', async () => {
test('generateAnimatedImage keeps word-audio lead-in separate from audio padding', async () => {
await withStubbedFfmpeg(async (generator, argsPath) => {
await generator.generateAnimatedImage('/video.mp4', 10, 12, 0.5, {
fps: 10,
@@ -107,16 +104,16 @@ test('generateAnimatedImage adds audio lead padding to existing word-audio lead-
});
const args = readFfmpegArgs(argsPath);
assert.equal(args[args.indexOf('-ss') + 1], '10');
assert.equal(args[args.indexOf('-t') + 1], '2.5');
assert.equal(args[args.indexOf('-ss') + 1], '9.5');
assert.equal(args[args.indexOf('-t') + 1], '3');
assert.equal(
args[args.indexOf('-vf') + 1],
'tpad=start_duration=1.75:start_mode=clone,fps=10,scale=w=640:h=-2',
'tpad=start_duration=1.25:start_mode=clone,fps=10,scale=w=640:h=-2',
);
});
});
test('generateAnimatedImage clips leading audio padding at the start of media', async () => {
test('generateAnimatedImage clips padded source range at the start of media', async () => {
await withStubbedFfmpeg(async (generator, argsPath) => {
await generator.generateAnimatedImage('/video.mp4', 0.2, 1.2, 0.5, {
fps: 10,
@@ -124,12 +121,9 @@ test('generateAnimatedImage clips leading audio padding at the start of media',
});
const args = readFfmpegArgs(argsPath);
assert.equal(args[args.indexOf('-ss') + 1], '0.2');
assert.equal(args[args.indexOf('-t') + 1], '1.5');
assert.equal(
args[args.indexOf('-vf') + 1],
'tpad=start_duration=0.2:start_mode=clone,fps=10,scale=w=640:h=-2',
);
assert.equal(args[args.indexOf('-ss') + 1], '0');
assert.equal(args[args.indexOf('-t') + 1], '1.7');
assert.equal(args[args.indexOf('-vf') + 1], 'fps=10,scale=w=640:h=-2');
});
});
+3 -4
View File
@@ -322,10 +322,9 @@ export class MediaGenerator {
): Promise<Buffer> {
const { fps = 10, maxWidth = 640, maxHeight, crf = 35, leadingStillDuration = 0 } = options;
const safePadding = Number.isFinite(padding) ? Math.max(0, padding) : 0;
const start = Math.max(0, startTime);
const duration = endTime - startTime + safePadding;
const effectiveLeadingPadding = Math.min(safePadding, start);
const totalLeadingStillDuration = Math.max(0, leadingStillDuration) + effectiveLeadingPadding;
const start = Math.max(0, startTime - safePadding);
const duration = endTime - start + safePadding;
const totalLeadingStillDuration = Math.max(0, leadingStillDuration);
const clampedCrf = Math.max(0, Math.min(63, crf));