fix: include audio padding in animated AVIF source range (#94)

2026-07-27 16:49:51 -07:00 · 2026-05-27 14:06:48 -07:00
parent f033f87329
commit 3e6591e390
7 changed files with 21 additions and 27 deletions
@@ -1,5 +1,5 @@
 type: fixed
 area: anki

- Made sentence-audio padding opt-in by default, and kept animated AVIF motion aligned when padding is configured by freezing the first frame during leading audio padding.
+- Made sentence-audio padding opt-in by default, and kept animated AVIF motion aligned when padding is configured by generating the same padded source range as sentence audio.
 - Kept multi-line sentence mining aligned when repeated subtitle text appears in the selected history range.
@@ -523,7 +523,7 @@
      "animatedMaxHeight": 0, // Maximum height for animated AVIF captures, in pixels. Set to 0 to preserve aspect ratio.
      "animatedCrf": 35, // Animated AVIF CRF quality target. Lower values produce larger, higher-quality files.
      "syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false
-      "audioPadding": 0, // Seconds of padding appended to both ends of generated sentence audio.
+      "audioPadding": 0, // Seconds of padding appended to both ends of generated sentence audio and animated AVIF clips.
      "fallbackDuration": 3, // Fallback clip duration in seconds when subtitle timing data is unavailable.
      "maxMediaDuration": 30 // Maximum allowed media clip duration in seconds.
    }, // Media setting.
@@ -973,7 +973,7 @@ This example is intentionally compact. The option table below documents availabl
 | `media.animatedMaxHeight`                         | number (px)                             | Optional max height for animated AVIF. Unset keeps source aspect-constrained height.                                                                                                                        |
 | `media.animatedCrf`                               | number (0-63)                           | CRF quality for AVIF; lower = higher quality (default: `35`)                                                                                                                                                |
 | `media.syncAnimatedImageToWordAudio`              | `true`, `false`                         | Whether animated AVIF includes an opening frame synced to sentence word-audio timing (default: `true`).                                                                                                     |
-| `media.audioPadding`                              | number (seconds)                        | Optional padding around audio clip timing (default: `0`). Animated AVIF clips freeze the first frame during leading audio padding.                                                                          |
+| `media.audioPadding`                              | number (seconds)                        | Optional padding around generated sentence media timing (default: `0`). Animated AVIF clips include the same padded source range as sentence audio.                                                         |
 | `media.fallbackDuration`                          | number (seconds)                        | Default duration if timing unavailable (default: `3.0`)                                                                                                                                                     |
 | `media.maxMediaDuration`                          | number (seconds)                        | Max duration for generated media from multi-line copy (default: `30`, `0` to disable)                                                                                                                       |
 | `behavior.overwriteAudio`                         | `true`, `false`                         | Replace existing audio on updates; when `false`, new audio is appended/prepended using the configured media insert mode; manual clipboard updates always replace generated sentence audio (default: `true`) |
@@ -523,7 +523,7 @@
      "animatedMaxHeight": 0, // Maximum height for animated AVIF captures, in pixels. Set to 0 to preserve aspect ratio.
      "animatedCrf": 35, // Animated AVIF CRF quality target. Lower values produce larger, higher-quality files.
      "syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false
-      "audioPadding": 0, // Seconds of padding appended to both ends of generated sentence audio.
+      "audioPadding": 0, // Seconds of padding appended to both ends of generated sentence audio and animated AVIF clips.
      "fallbackDuration": 3, // Fallback clip duration in seconds when subtitle timing data is unavailable.
      "maxMediaDuration": 30 // Maximum allowed media clip duration in seconds.
    }, // Media setting.
@@ -246,7 +246,8 @@ export function buildIntegrationConfigOptionRegistry(
      path: 'ankiConnect.media.audioPadding',
      kind: 'number',
      defaultValue: defaultConfig.ankiConnect.media.audioPadding,
-      description: 'Seconds of padding appended to both ends of generated sentence audio.',
+      description:
+        'Seconds of padding appended to both ends of generated sentence audio and animated AVIF clips.',
    },
    {
      path: 'ankiConnect.media.fallbackDuration',
@@ -67,7 +67,7 @@ test('buildAnimatedImageVideoFilter prepends a cloned first frame when lead-in i
  );
 });

-test('generateAnimatedImage freezes first frame for leading audio padding', async () => {
+test('generateAnimatedImage includes leading audio padding in the source range', async () => {
  await withStubbedFfmpeg(async (generator, argsPath) => {
    await generator.generateAnimatedImage('/video.mp4', 10, 12, 0.5, {
      fps: 10,
@@ -75,12 +75,9 @@ test('generateAnimatedImage freezes first frame for leading audio padding', asyn
    });

    const args = readFfmpegArgs(argsPath);
-    assert.equal(args[args.indexOf('-ss') + 1], '10');
-    assert.equal(args[args.indexOf('-t') + 1], '2.5');
-    assert.equal(
-      args[args.indexOf('-vf') + 1],
-      'tpad=start_duration=0.5:start_mode=clone,fps=10,scale=w=640:h=-2',
-    );
+    assert.equal(args[args.indexOf('-ss') + 1], '9.5');
+    assert.equal(args[args.indexOf('-t') + 1], '3');
+    assert.equal(args[args.indexOf('-vf') + 1], 'fps=10,scale=w=640:h=-2');
  });
 });

@@ -98,7 +95,7 @@ test('generateAnimatedImage defaults to unpadded sentence timing', async () => {
  });
 });

-test('generateAnimatedImage adds audio lead padding to existing word-audio lead-in', async () => {
+test('generateAnimatedImage keeps word-audio lead-in separate from audio padding', async () => {
  await withStubbedFfmpeg(async (generator, argsPath) => {
    await generator.generateAnimatedImage('/video.mp4', 10, 12, 0.5, {
      fps: 10,
@@ -107,16 +104,16 @@ test('generateAnimatedImage adds audio lead padding to existing word-audio lead-
    });

    const args = readFfmpegArgs(argsPath);
-    assert.equal(args[args.indexOf('-ss') + 1], '10');
-    assert.equal(args[args.indexOf('-t') + 1], '2.5');
+    assert.equal(args[args.indexOf('-ss') + 1], '9.5');
+    assert.equal(args[args.indexOf('-t') + 1], '3');
    assert.equal(
      args[args.indexOf('-vf') + 1],
-      'tpad=start_duration=1.75:start_mode=clone,fps=10,scale=w=640:h=-2',
+      'tpad=start_duration=1.25:start_mode=clone,fps=10,scale=w=640:h=-2',
    );
  });
 });

-test('generateAnimatedImage clips leading audio padding at the start of media', async () => {
+test('generateAnimatedImage clips padded source range at the start of media', async () => {
  await withStubbedFfmpeg(async (generator, argsPath) => {
    await generator.generateAnimatedImage('/video.mp4', 0.2, 1.2, 0.5, {
      fps: 10,
@@ -124,12 +121,9 @@ test('generateAnimatedImage clips leading audio padding at the start of media',
    });

    const args = readFfmpegArgs(argsPath);
-    assert.equal(args[args.indexOf('-ss') + 1], '0.2');
-    assert.equal(args[args.indexOf('-t') + 1], '1.5');
-    assert.equal(
-      args[args.indexOf('-vf') + 1],
-      'tpad=start_duration=0.2:start_mode=clone,fps=10,scale=w=640:h=-2',
-    );
+    assert.equal(args[args.indexOf('-ss') + 1], '0');
+    assert.equal(args[args.indexOf('-t') + 1], '1.7');
+    assert.equal(args[args.indexOf('-vf') + 1], 'fps=10,scale=w=640:h=-2');
  });
 });

@@ -322,10 +322,9 @@ export class MediaGenerator {
  ): Promise<Buffer> {
    const { fps = 10, maxWidth = 640, maxHeight, crf = 35, leadingStillDuration = 0 } = options;
    const safePadding = Number.isFinite(padding) ? Math.max(0, padding) : 0;
-    const start = Math.max(0, startTime);
-    const duration = endTime - startTime + safePadding;
-    const effectiveLeadingPadding = Math.min(safePadding, start);
-    const totalLeadingStillDuration = Math.max(0, leadingStillDuration) + effectiveLeadingPadding;
+    const start = Math.max(0, startTime - safePadding);
+    const duration = endTime - start + safePadding;
+    const totalLeadingStillDuration = Math.max(0, leadingStillDuration);

    const clampedCrf = Math.max(0, Math.min(63, crf));