diff --git a/config.example.jsonc b/config.example.jsonc
index cf9f1ae..b4cebc7 100644
--- a/config.example.jsonc
+++ b/config.example.jsonc
@@ -340,6 +340,7 @@
       "animatedFps": 10, // Animated fps setting.
       "animatedMaxWidth": 640, // Animated max width setting.
       "animatedCrf": 35, // Animated crf setting.
+      "syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false
       "audioPadding": 0.5, // Audio padding setting.
       "fallbackDuration": 3, // Fallback duration setting.
       "maxMediaDuration": 30 // Max media duration setting.
diff --git a/docs-site/public/config.example.jsonc b/docs-site/public/config.example.jsonc
index cf9f1ae..b4cebc7 100644
--- a/docs-site/public/config.example.jsonc
+++ b/docs-site/public/config.example.jsonc
@@ -340,6 +340,7 @@
       "animatedFps": 10, // Animated fps setting.
       "animatedMaxWidth": 640, // Animated max width setting.
       "animatedCrf": 35, // Animated crf setting.
+      "syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false
       "audioPadding": 0.5, // Audio padding setting.
       "fallbackDuration": 3, // Fallback duration setting.
       "maxMediaDuration": 30 // Max media duration setting.
diff --git a/src/anki-integration.ts b/src/anki-integration.ts
index ad93798..2f867c7 100644
--- a/src/anki-integration.ts
+++ b/src/anki-integration.ts
@@ -54,6 +54,7 @@ import { FieldGroupingService } from './anki-integration/field-grouping';
 import { FieldGroupingMergeCollaborator } from './anki-integration/field-grouping-merge';
 import { NoteUpdateWorkflow } from './anki-integration/note-update-workflow';
 import { FieldGroupingWorkflow } from './anki-integration/field-grouping-workflow';
+import { resolveAnimatedImageLeadInSeconds } from './anki-integration/animated-image-sync';
 import { AnkiIntegrationRuntime, normalizeAnkiIntegrationConfig } from './anki-integration/runtime';
 
 const log = createLogger('anki').child('integration');
@@ -190,7 +191,7 @@ export class AnkiIntegration {
         this.resolveNoteFieldName(noteInfo, preferredName),
       extractFields: (fields) => this.extractFields(fields),
       processSentence: (mpvSentence, noteFields) => this.processSentence(mpvSentence, noteFields),
-      generateMediaForMerge: () => this.generateMediaForMerge(),
+      generateMediaForMerge: (noteInfo) => this.generateMediaForMerge(noteInfo),
       warnFieldParseOnce: (fieldName, reason, detail) =>
         this.warnFieldParseOnce(fieldName, reason, detail),
     });
@@ -286,6 +287,7 @@ export class AnkiIntegration {
         storeMediaFile: (filename, data) => this.client.storeMediaFile(filename, data),
         findNotes: async (query, options) =>
           (await this.client.findNotes(query, options)) as number[],
+        retrieveMediaFile: (filename) => this.client.retrieveMediaFile(filename),
       },
       mediaGenerator: {
         generateAudio: (videoPath, startTime, endTime, audioPadding, audioStreamIndex) =>
@@ -319,6 +321,7 @@ export class AnkiIntegration {
         this.resolveConfiguredFieldName(noteInfo, ...preferredNames),
       resolveNoteFieldName: (noteInfo, preferredName) =>
         this.resolveNoteFieldName(noteInfo, preferredName),
+      getAnimatedImageLeadInSeconds: (noteInfo) => this.getAnimatedImageLeadInSeconds(noteInfo),
       extractFields: (fields) => this.extractFields(fields),
       processSentence: (mpvSentence, noteFields) => this.processSentence(mpvSentence, noteFields),
       setCardTypeFields: (updatedFields, availableFieldNames, cardKind) =>
@@ -407,12 +410,13 @@ export class AnkiIntegration {
         this.resolveConfiguredFieldName(noteInfo, ...preferredNames),
       getResolvedSentenceAudioFieldName: (noteInfo) =>
         this.getResolvedSentenceAudioFieldName(noteInfo),
+      getAnimatedImageLeadInSeconds: (noteInfo) => this.getAnimatedImageLeadInSeconds(noteInfo),
       mergeFieldValue: (existing, newValue, overwrite) =>
         this.mergeFieldValue(existing, newValue, overwrite),
       generateAudioFilename: () => this.generateAudioFilename(),
       generateAudio: () => this.generateAudio(),
       generateImageFilename: () => this.generateImageFilename(),
-      generateImage: () => this.generateImage(),
+      generateImage: (animatedLeadInSeconds) => this.generateImage(animatedLeadInSeconds),
       formatMiscInfoPattern: (fallbackFilename, startTimeSeconds) =>
         this.formatMiscInfoPattern(fallbackFilename, startTimeSeconds),
       addConfiguredTagsToNote: (noteId) => this.addConfiguredTagsToNote(noteId),
@@ -637,7 +641,7 @@ export class AnkiIntegration {
     );
   }
 
-  private async generateImage(): Promise<Buffer | null> {
+  private async generateImage(animatedLeadInSeconds = 0): Promise<Buffer | null> {
     if (!this.mpvClient || !this.mpvClient.currentVideoPath) {
       return null;
     }
@@ -665,6 +669,7 @@ export class AnkiIntegration {
           maxWidth: this.config.media?.animatedMaxWidth,
           maxHeight: this.config.media?.animatedMaxHeight,
           crf: this.config.media?.animatedCrf,
+          leadingStillDuration: animatedLeadInSeconds,
         },
       );
     } else {
@@ -1020,7 +1025,18 @@ export class AnkiIntegration {
     return getPreferredWordValueFromExtractedFields(fields, this.config);
   }
 
-  private async generateMediaForMerge(): Promise<{
+  private async getAnimatedImageLeadInSeconds(noteInfo: NoteInfo): Promise<number> {
+    return resolveAnimatedImageLeadInSeconds({
+      config: this.config,
+      noteInfo,
+      resolveConfiguredFieldName: (candidateNoteInfo, ...preferredNames) =>
+        this.resolveConfiguredFieldName(candidateNoteInfo, ...preferredNames),
+      retrieveMediaFileBase64: (filename) => this.client.retrieveMediaFile(filename),
+      logWarn: (message, ...args) => log.warn(message, ...args),
+    });
+  }
+
+  private async generateMediaForMerge(noteInfo?: NoteInfo): Promise<{
     audioField?: string;
     audioValue?: string;
     imageField?: string;
@@ -1057,8 +1073,11 @@ export class AnkiIntegration {
 
     if (this.config.media?.generateImage && this.mpvClient?.currentVideoPath) {
       try {
+        const animatedLeadInSeconds = noteInfo
+          ? await this.getAnimatedImageLeadInSeconds(noteInfo)
+          : 0;
         const imageFilename = this.generateImageFilename();
-        const imageBuffer = await this.generateImage();
+        const imageBuffer = await this.generateImage(animatedLeadInSeconds);
         if (imageBuffer) {
           await this.client.storeMediaFile(imageFilename, imageBuffer);
           result.imageField = this.config.fields?.image || DEFAULT_ANKI_CONNECT_CONFIG.fields.image;
diff --git a/src/anki-integration/animated-image-sync.test.ts b/src/anki-integration/animated-image-sync.test.ts
new file mode 100644
index 0000000..c0d25cf
--- /dev/null
+++ b/src/anki-integration/animated-image-sync.test.ts
@@ -0,0 +1,82 @@
+import assert from 'node:assert/strict';
+import test from 'node:test';
+
+import { resolveAnimatedImageLeadInSeconds, extractSoundFilenames } from './animated-image-sync';
+
+test('extractSoundFilenames returns ordered sound filenames from an Anki field value', () => {
+  assert.deepEqual(
+    extractSoundFilenames('before [sound:word.mp3] middle [sound:alt.ogg] after'),
+    ['word.mp3', 'alt.ogg'],
+  );
+});
+
+test('resolveAnimatedImageLeadInSeconds sums configured word audio durations for animated images', async () => {
+  const leadInSeconds = await resolveAnimatedImageLeadInSeconds({
+    config: {
+      fields: {
+        audio: 'ExpressionAudio',
+      },
+      media: {
+        imageType: 'avif',
+        syncAnimatedImageToWordAudio: true,
+      },
+    },
+    noteInfo: {
+      noteId: 42,
+      fields: {
+        ExpressionAudio: {
+          value: '[sound:word.mp3][sound:alt.ogg]',
+        },
+      },
+    },
+    resolveConfiguredFieldName: (noteInfo, ...preferredNames) => {
+      for (const preferredName of preferredNames) {
+        if (!preferredName) continue;
+        const resolved = Object.keys(noteInfo.fields).find(
+          (fieldName) => fieldName.toLowerCase() === preferredName.toLowerCase(),
+        );
+        if (resolved) return resolved;
+      }
+      return null;
+    },
+    retrieveMediaFileBase64: async (filename) =>
+      filename === 'word.mp3' ? 'd29yZA==' : filename === 'alt.ogg' ? 'YWx0' : '',
+    probeAudioDurationSeconds: async (_buffer, filename) =>
+      filename === 'word.mp3' ? 0.41 : filename === 'alt.ogg' ? 0.84 : null,
+    logWarn: () => undefined,
+  });
+
+  assert.equal(leadInSeconds, 1.25);
+});
+
+test('resolveAnimatedImageLeadInSeconds falls back to zero when sync is disabled', async () => {
+  const leadInSeconds = await resolveAnimatedImageLeadInSeconds({
+    config: {
+      fields: {
+        audio: 'ExpressionAudio',
+      },
+      media: {
+        imageType: 'avif',
+        syncAnimatedImageToWordAudio: false,
+      },
+    },
+    noteInfo: {
+      noteId: 42,
+      fields: {
+        ExpressionAudio: {
+          value: '[sound:word.mp3]',
+        },
+      },
+    },
+    resolveConfiguredFieldName: () => 'ExpressionAudio',
+    retrieveMediaFileBase64: async () => {
+      throw new Error('should not be called');
+    },
+    probeAudioDurationSeconds: async () => {
+      throw new Error('should not be called');
+    },
+    logWarn: () => undefined,
+  });
+
+  assert.equal(leadInSeconds, 0);
+});
diff --git a/src/anki-integration/animated-image-sync.ts b/src/anki-integration/animated-image-sync.ts
new file mode 100644
index 0000000..9a53df9
--- /dev/null
+++ b/src/anki-integration/animated-image-sync.ts
@@ -0,0 +1,133 @@
+import { execFile as nodeExecFile } from 'node:child_process';
+import * as fs from 'node:fs';
+import * as os from 'node:os';
+import * as path from 'node:path';
+
+import { DEFAULT_ANKI_CONNECT_CONFIG } from '../config';
+import type { AnkiConnectConfig } from '../types';
+
+type NoteInfoLike = {
+  noteId: number;
+  fields: Record<string, { value: string }>;
+};
+
+interface ResolveAnimatedImageLeadInSecondsArgs<TNoteInfo extends NoteInfoLike> {
+  config: Pick<AnkiConnectConfig, 'fields' | 'media'>;
+  noteInfo: TNoteInfo;
+  resolveConfiguredFieldName: (
+    noteInfo: TNoteInfo,
+    ...preferredNames: (string | undefined)[]
+  ) => string | null;
+  retrieveMediaFileBase64: (filename: string) => Promise<string>;
+  probeAudioDurationSeconds?: (buffer: Buffer, filename: string) => Promise<number | null>;
+  logWarn?: (message: string, ...args: unknown[]) => void;
+}
+
+interface ProbeAudioDurationDeps {
+  execFile?: typeof nodeExecFile;
+  mkdtempSync?: typeof fs.mkdtempSync;
+  writeFileSync?: typeof fs.writeFileSync;
+  rmSync?: typeof fs.rmSync;
+}
+
+export function extractSoundFilenames(value: string): string[] {
+  const matches = value.matchAll(/\[sound:([^\]]+)\]/gi);
+  return Array.from(matches, (match) => match[1]?.trim() || '').filter((value) => value.length > 0);
+}
+
+function shouldSyncAnimatedImageToWordAudio(config: Pick<AnkiConnectConfig, 'media'>): boolean {
+  return (
+    config.media?.imageType === 'avif' && config.media?.syncAnimatedImageToWordAudio !== false
+  );
+}
+
+export async function probeAudioDurationSeconds(
+  buffer: Buffer,
+  filename: string,
+  deps: ProbeAudioDurationDeps = {},
+): Promise<number | null> {
+  const execFile = deps.execFile ?? nodeExecFile;
+  const mkdtempSync = deps.mkdtempSync ?? fs.mkdtempSync;
+  const writeFileSync = deps.writeFileSync ?? fs.writeFileSync;
+  const rmSync = deps.rmSync ?? fs.rmSync;
+
+  const tempDir = mkdtempSync(path.join(os.tmpdir(), 'subminer-audio-probe-'));
+  const ext = path.extname(filename) || '.bin';
+  const tempPath = path.join(tempDir, `probe${ext}`);
+  writeFileSync(tempPath, buffer);
+
+  return new Promise((resolve) => {
+    execFile(
+      'ffprobe',
+      [
+        '-v',
+        'error',
+        '-show_entries',
+        'format=duration',
+        '-of',
+        'default=noprint_wrappers=1:nokey=1',
+        tempPath,
+      ],
+      (error, stdout) => {
+        try {
+          if (error) {
+            resolve(null);
+            return;
+          }
+
+          const durationSeconds = Number.parseFloat((stdout || '').trim());
+          resolve(Number.isFinite(durationSeconds) && durationSeconds > 0 ? durationSeconds : null);
+        } finally {
+          rmSync(tempDir, { recursive: true, force: true });
+        }
+      },
+    );
+  });
+}
+
+export async function resolveAnimatedImageLeadInSeconds<TNoteInfo extends NoteInfoLike>({
+  config,
+  noteInfo,
+  resolveConfiguredFieldName,
+  retrieveMediaFileBase64,
+  probeAudioDurationSeconds: probeDuration = probeAudioDurationSeconds,
+  logWarn,
+}: ResolveAnimatedImageLeadInSecondsArgs<TNoteInfo>): Promise<number> {
+  if (!shouldSyncAnimatedImageToWordAudio(config)) {
+    return 0;
+  }
+
+  const wordAudioFieldName = resolveConfiguredFieldName(
+    noteInfo,
+    config.fields?.audio,
+    DEFAULT_ANKI_CONNECT_CONFIG.fields.audio,
+  );
+  if (!wordAudioFieldName) {
+    return 0;
+  }
+
+  const wordAudioValue = noteInfo.fields[wordAudioFieldName]?.value || '';
+  const filenames = extractSoundFilenames(wordAudioValue);
+  if (filenames.length === 0) {
+    return 0;
+  }
+
+  let totalLeadInSeconds = 0;
+  for (const filename of filenames) {
+    const encoded = await retrieveMediaFileBase64(filename);
+    if (!encoded) {
+      logWarn?.('Animated image sync skipped: failed to retrieve word audio', filename);
+      return 0;
+    }
+
+    const durationSeconds = await probeDuration(Buffer.from(encoded, 'base64'), filename);
+    if (!(typeof durationSeconds === 'number' && Number.isFinite(durationSeconds))) {
+      logWarn?.('Animated image sync skipped: failed to probe word audio duration', filename);
+      return 0;
+    }
+
+    totalLeadInSeconds += durationSeconds;
+  }
+
+  return totalLeadInSeconds;
+}
diff --git a/src/anki-integration/card-creation.ts b/src/anki-integration/card-creation.ts
index f7f47cf..6820fe1 100644
--- a/src/anki-integration/card-creation.ts
+++ b/src/anki-integration/card-creation.ts
@@ -30,6 +30,7 @@ interface CardCreationClient {
   updateNoteFields(noteId: number, fields: Record<string, string>): Promise<void>;
   storeMediaFile(filename: string, data: Buffer): Promise<void>;
   findNotes(query: string, options?: { maxRetries?: number }): Promise<number[]>;
+  retrieveMediaFile(filename: string): Promise<string>;
 }
 
 interface CardCreationMediaGenerator {
@@ -60,6 +61,7 @@ interface CardCreationMediaGenerator {
       maxWidth?: number;
       maxHeight?: number;
       crf?: number;
+      leadingStillDuration?: number;
     },
   ): Promise<Buffer | null>;
 }
@@ -83,6 +85,7 @@ interface CardCreationDeps {
     ...preferredNames: (string | undefined)[]
   ) => string | null;
   resolveNoteFieldName: (noteInfo: CardCreationNoteInfo, preferredName?: string) => string | null;
+  getAnimatedImageLeadInSeconds: (noteInfo: CardCreationNoteInfo) => Promise<number>;
   extractFields: (fields: Record<string, { value: string }>) => Record<string, string>;
   processSentence: (mpvSentence: string, noteFields: Record<string, string>) => string;
   setCardTypeFields: (
@@ -258,11 +261,14 @@ export class CardCreationService {
 
         if (this.deps.getConfig().media?.generateImage) {
           try {
+            const animatedLeadInSeconds =
+              await this.deps.getAnimatedImageLeadInSeconds(noteInfo);
             const imageFilename = this.generateImageFilename();
             const imageBuffer = await this.generateImageBuffer(
               mpvClient.currentVideoPath,
               rangeStart,
               rangeEnd,
+              animatedLeadInSeconds,
             );
 
             if (imageBuffer) {
@@ -414,11 +420,14 @@ export class CardCreationService {
 
         if (this.deps.getConfig().media?.generateImage) {
           try {
+            const animatedLeadInSeconds =
+              await this.deps.getAnimatedImageLeadInSeconds(noteInfo);
             const imageFilename = this.generateImageFilename();
             const imageBuffer = await this.generateImageBuffer(
               mpvClient.currentVideoPath,
               startTime,
               endTime,
+              animatedLeadInSeconds,
             );
 
             const imageField = this.deps.getConfig().fields?.image;
@@ -679,6 +688,7 @@ export class CardCreationService {
     videoPath: string,
     startTime: number,
     endTime: number,
+    animatedLeadInSeconds = 0,
   ): Promise<Buffer | null> {
     const mpvClient = this.deps.getMpvClient();
     if (!mpvClient) {
@@ -707,6 +717,7 @@ export class CardCreationService {
           maxWidth: this.deps.getConfig().media?.animatedMaxWidth,
           maxHeight: this.deps.getConfig().media?.animatedMaxHeight,
           crf: this.deps.getConfig().media?.animatedCrf,
+          leadingStillDuration: animatedLeadInSeconds,
         },
       );
     }
diff --git a/src/anki-integration/field-grouping-merge.ts b/src/anki-integration/field-grouping-merge.ts
index 043f1e7..4384b49 100644
--- a/src/anki-integration/field-grouping-merge.ts
+++ b/src/anki-integration/field-grouping-merge.ts
@@ -28,7 +28,7 @@ interface FieldGroupingMergeDeps {
   ) => string | null;
   extractFields: (fields: Record<string, { value: string }>) => Record<string, string>;
   processSentence: (mpvSentence: string, noteFields: Record<string, string>) => string;
-  generateMediaForMerge: () => Promise<FieldGroupingMergeMedia>;
+  generateMediaForMerge: (noteInfo: FieldGroupingMergeNoteInfo) => Promise<FieldGroupingMergeMedia>;
   warnFieldParseOnce: (fieldName: string, reason: string, detail?: string) => void;
 }
 
@@ -132,7 +132,7 @@ export class FieldGroupingMergeCollaborator {
     }
 
     if (includeGeneratedMedia) {
-      const media = await this.deps.generateMediaForMerge();
+      const media = await this.deps.generateMediaForMerge(keepNoteInfo);
       if (media.audioField && media.audioValue && !sourceFields[media.audioField]) {
         sourceFields[media.audioField] = media.audioValue;
       }
diff --git a/src/anki-integration/note-update-workflow.test.ts b/src/anki-integration/note-update-workflow.test.ts
index dc69c63..49e259f 100644
--- a/src/anki-integration/note-update-workflow.test.ts
+++ b/src/anki-integration/note-update-workflow.test.ts
@@ -62,6 +62,7 @@ function createWorkflowHarness() {
       return names.find((name) => name.toLowerCase() === preferred.toLowerCase()) ?? null;
     },
     getResolvedSentenceAudioFieldName: () => null,
+    getAnimatedImageLeadInSeconds: async () => 0,
     mergeFieldValue: (_existing: string, next: string, _overwrite: boolean) => next,
     generateAudioFilename: () => 'audio_1.mp3',
     generateAudio: async () => null,
@@ -163,3 +164,42 @@ test('NoteUpdateWorkflow updates note before auto field grouping merge', async (
   assert.deepEqual(callOrder, ['update', 'auto']);
   assert.equal(harness.updates.length, 1);
 });
+
+test('NoteUpdateWorkflow passes animated image lead-in when syncing avif to word audio', async () => {
+  const harness = createWorkflowHarness();
+  let receivedLeadInSeconds = 0;
+
+  harness.deps.client.notesInfo = async () =>
+    [
+      {
+        noteId: 42,
+        fields: {
+          Expression: { value: 'taberu' },
+          ExpressionAudio: { value: '[sound:word.mp3]' },
+          Sentence: { value: '' },
+          Picture: { value: '' },
+        },
+      },
+    ] satisfies NoteUpdateWorkflowNoteInfo[];
+  harness.deps.getConfig = () => ({
+    fields: {
+      sentence: 'Sentence',
+      image: 'Picture',
+    },
+    media: {
+      generateImage: true,
+      imageType: 'avif',
+      syncAnimatedImageToWordAudio: true,
+    },
+    behavior: {},
+  });
+  harness.deps.getAnimatedImageLeadInSeconds = async () => 1.25;
+  harness.deps.generateImage = async (leadInSeconds?: number) => {
+    receivedLeadInSeconds = leadInSeconds ?? 0;
+    return Buffer.from('image');
+  };
+
+  await harness.workflow.execute(42);
+
+  assert.equal(receivedLeadInSeconds, 1.25);
+});
diff --git a/src/anki-integration/note-update-workflow.ts b/src/anki-integration/note-update-workflow.ts
index ca6ceb0..26613ff 100644
--- a/src/anki-integration/note-update-workflow.ts
+++ b/src/anki-integration/note-update-workflow.ts
@@ -22,6 +22,8 @@ export interface NoteUpdateWorkflowDeps {
     media?: {
       generateAudio?: boolean;
       generateImage?: boolean;
+      imageType?: 'static' | 'avif';
+      syncAnimatedImageToWordAudio?: boolean;
     };
     behavior?: {
       overwriteAudio?: boolean;
@@ -60,11 +62,12 @@ export interface NoteUpdateWorkflowDeps {
     ...preferredNames: (string | undefined)[]
   ) => string | null;
   getResolvedSentenceAudioFieldName: (noteInfo: NoteUpdateWorkflowNoteInfo) => string | null;
+  getAnimatedImageLeadInSeconds: (noteInfo: NoteUpdateWorkflowNoteInfo) => Promise<number>;
   mergeFieldValue: (existing: string, newValue: string, overwrite: boolean) => string;
   generateAudioFilename: () => string;
   generateAudio: () => Promise<Buffer | null>;
   generateImageFilename: () => string;
-  generateImage: () => Promise<Buffer | null>;
+  generateImage: (animatedLeadInSeconds?: number) => Promise<Buffer | null>;
   formatMiscInfoPattern: (fallbackFilename: string, startTimeSeconds?: number) => string;
   addConfiguredTagsToNote: (noteId: number) => Promise<void>;
   showNotification: (noteId: number, label: string | number) => Promise<void>;
@@ -153,8 +156,9 @@ export class NoteUpdateWorkflow {
 
       if (config.media?.generateImage) {
         try {
+          const animatedLeadInSeconds = await this.deps.getAnimatedImageLeadInSeconds(noteInfo);
           const imageFilename = this.deps.generateImageFilename();
-          const imageBuffer = await this.deps.generateImage();
+          const imageBuffer = await this.deps.generateImage(animatedLeadInSeconds);
 
           if (imageBuffer) {
             await this.deps.client.storeMediaFile(imageFilename, imageBuffer);
diff --git a/src/anki-integration/runtime.test.ts b/src/anki-integration/runtime.test.ts
index dbde606..20bbbd2 100644
--- a/src/anki-integration/runtime.test.ts
+++ b/src/anki-integration/runtime.test.ts
@@ -59,6 +59,10 @@ test('AnkiIntegrationRuntime normalizes url and proxy defaults', () => {
     normalized.media?.fallbackDuration,
     DEFAULT_ANKI_CONNECT_CONFIG.media.fallbackDuration,
   );
+  assert.equal(
+    normalized.media?.syncAnimatedImageToWordAudio,
+    DEFAULT_ANKI_CONNECT_CONFIG.media.syncAnimatedImageToWordAudio,
+  );
 });
 
 test('AnkiIntegrationRuntime starts proxy transport when proxy mode is enabled', () => {
diff --git a/src/config/definitions/defaults-integrations.ts b/src/config/definitions/defaults-integrations.ts
index a10e27b..dfd58d3 100644
--- a/src/config/definitions/defaults-integrations.ts
+++ b/src/config/definitions/defaults-integrations.ts
@@ -47,6 +47,7 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick<
       animatedMaxWidth: 640,
       animatedMaxHeight: undefined,
       animatedCrf: 35,
+      syncAnimatedImageToWordAudio: true,
       audioPadding: 0.5,
       fallbackDuration: 3.0,
       maxMediaDuration: 30,
diff --git a/src/config/definitions/options-integrations.ts b/src/config/definitions/options-integrations.ts
index 275ffd5..f0470f4 100644
--- a/src/config/definitions/options-integrations.ts
+++ b/src/config/definitions/options-integrations.ts
@@ -82,6 +82,13 @@ export function buildIntegrationConfigOptionRegistry(
       description: 'Automatically update newly added cards.',
       runtime: runtimeOptionById.get('anki.autoUpdateNewCards'),
     },
+    {
+      path: 'ankiConnect.media.syncAnimatedImageToWordAudio',
+      kind: 'boolean',
+      defaultValue: defaultConfig.ankiConnect.media.syncAnimatedImageToWordAudio,
+      description:
+        'For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio.',
+    },
     {
       path: 'ankiConnect.knownWords.matchMode',
       kind: 'enum',
diff --git a/src/config/resolve/anki-connect.test.ts b/src/config/resolve/anki-connect.test.ts
index 490cefc..0c0a944 100644
--- a/src/config/resolve/anki-connect.test.ts
+++ b/src/config/resolve/anki-connect.test.ts
@@ -121,6 +121,22 @@ test('accepts configured ankiConnect.fields.word override', () => {
   );
 });
 
+test('accepts ankiConnect.media.syncAnimatedImageToWordAudio override', () => {
+  const { context, warnings } = makeContext({
+    media: {
+      syncAnimatedImageToWordAudio: false,
+    },
+  });
+
+  applyAnkiConnectResolution(context);
+
+  assert.equal(context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio, false);
+  assert.equal(
+    warnings.some((warning) => warning.path === 'ankiConnect.media.syncAnimatedImageToWordAudio'),
+    false,
+  );
+});
+
 test('maps legacy ankiConnect.wordField to modern ankiConnect.fields.word', () => {
   const { context, warnings } = makeContext({
     wordField: 'TargetWordLegacy',
diff --git a/src/config/resolve/anki-connect.ts b/src/config/resolve/anki-connect.ts
index 540d875..b306ab6 100644
--- a/src/config/resolve/anki-connect.ts
+++ b/src/config/resolve/anki-connect.ts
@@ -31,6 +31,7 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
     'animatedMaxWidth',
     'animatedMaxHeight',
     'animatedCrf',
+    'syncAnimatedImageToWordAudio',
     'audioPadding',
     'fallbackDuration',
     'maxMediaDuration',
@@ -536,6 +537,17 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
       'Expected integer between 0 and 63.',
     );
   }
+  if (!hasOwn(media, 'syncAnimatedImageToWordAudio')) {
+    mapLegacy(
+      'syncAnimatedImageToWordAudio',
+      asBoolean,
+      (value) => {
+        context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio = value;
+      },
+      context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio,
+      'Expected boolean.',
+    );
+  }
   if (!hasOwn(media, 'audioPadding')) {
     mapLegacy(
       'audioPadding',
diff --git a/src/core/services/stats-server.ts b/src/core/services/stats-server.ts
index c029907..2303bc9 100644
--- a/src/core/services/stats-server.ts
+++ b/src/core/services/stats-server.ts
@@ -12,6 +12,12 @@ import {
   getConfiguredWordFieldName,
   getPreferredNoteFieldValue,
 } from '../../anki-field-config.js';
+import { resolveAnimatedImageLeadInSeconds } from '../../anki-integration/animated-image-sync.js';
+
+type StatsServerNoteInfo = {
+  noteId: number;
+  fields: Record<string, { value: string }>;
+};
 
 function parseIntQuery(raw: string | undefined, fallback: number, maxLimit?: number): number {
   if (raw === undefined) return fallback;
@@ -40,6 +46,20 @@ function parseEventTypesQuery(raw: string | undefined): number[] | undefined {
   return parsed.length > 0 ? parsed : undefined;
 }
 
+function resolveStatsNoteFieldName(
+  noteInfo: StatsServerNoteInfo,
+  ...preferredNames: (string | undefined)[]
+): string | null {
+  for (const preferredName of preferredNames) {
+    if (!preferredName) continue;
+    const resolved = Object.keys(noteInfo.fields).find(
+      (fieldName) => fieldName.toLowerCase() === preferredName.toLowerCase(),
+    );
+    if (resolved) return resolved;
+  }
+  return null;
+}
+
 /** Load known words cache from disk into a Set. Returns null if unavailable. */
 function loadKnownWordsSet(cachePath: string | undefined): Set<string> | null {
   if (!cachePath || !existsSync(cachePath)) return null;
@@ -621,36 +641,41 @@ export function createStatsApp(
     const generateAudio = ankiConfig.media?.generateAudio !== false;
     const generateImage = ankiConfig.media?.generateImage !== false && mode !== 'audio';
     const imageType = ankiConfig.media?.imageType ?? 'static';
+    const syncAnimatedImageToWordAudio =
+      imageType === 'avif' && ankiConfig.media?.syncAnimatedImageToWordAudio !== false;
 
     const audioPromise = generateAudio
       ? mediaGen.generateAudio(sourcePath, startSec, clampedEndSec, audioPadding)
       : Promise.resolve(null);
 
-    let imagePromise: Promise<Buffer | null>;
-    if (!generateImage) {
-      imagePromise = Promise.resolve(null);
-    } else if (imageType === 'avif') {
-      imagePromise = mediaGen.generateAnimatedImage(
-        sourcePath,
-        startSec,
-        clampedEndSec,
-        audioPadding,
-        {
+    const createImagePromise = (animatedLeadInSeconds = 0): Promise<Buffer | null> => {
+      if (!generateImage) {
+        return Promise.resolve(null);
+      }
+
+      if (imageType === 'avif') {
+        return mediaGen.generateAnimatedImage(sourcePath, startSec, clampedEndSec, audioPadding, {
           fps: ankiConfig.media?.animatedFps ?? 10,
           maxWidth: ankiConfig.media?.animatedMaxWidth ?? 640,
           maxHeight: ankiConfig.media?.animatedMaxHeight,
           crf: ankiConfig.media?.animatedCrf ?? 35,
-        },
-      );
-    } else {
+          leadingStillDuration: animatedLeadInSeconds,
+        });
+      }
+
       const midpointSec = (startSec + clampedEndSec) / 2;
-      imagePromise = mediaGen.generateScreenshot(sourcePath, midpointSec, {
+      return mediaGen.generateScreenshot(sourcePath, midpointSec, {
         format: ankiConfig.media?.imageFormat ?? 'jpg',
         quality: ankiConfig.media?.imageQuality ?? 92,
         maxWidth: ankiConfig.media?.imageMaxWidth,
         maxHeight: ankiConfig.media?.imageMaxHeight,
       });
-    }
+    };
+
+    const imagePromise =
+      mode === 'word' && syncAnimatedImageToWordAudio
+        ? Promise.resolve<Buffer | null>(null)
+        : createImagePromise();
 
     const errors: string[] = [];
     let noteId: number;
@@ -677,12 +702,31 @@ export function createStatsApp(
 
       noteId = yomitanResult.value;
       const audioBuffer = audioResult.status === 'fulfilled' ? audioResult.value : null;
-      const imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null;
       if (audioResult.status === 'rejected')
         errors.push(`audio: ${(audioResult.reason as Error).message}`);
       if (imageResult.status === 'rejected')
         errors.push(`image: ${(imageResult.reason as Error).message}`);
 
+      let imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null;
+      if (syncAnimatedImageToWordAudio && generateImage) {
+        try {
+          const noteInfoResult = (await client.notesInfo([noteId])) as StatsServerNoteInfo[];
+          const noteInfo = noteInfoResult[0] ?? null;
+          const animatedLeadInSeconds = noteInfo
+            ? await resolveAnimatedImageLeadInSeconds({
+                config: ankiConfig,
+                noteInfo,
+                resolveConfiguredFieldName: (candidateNoteInfo, ...preferredNames) =>
+                  resolveStatsNoteFieldName(candidateNoteInfo, ...preferredNames),
+                retrieveMediaFileBase64: (filename) => client.retrieveMediaFile(filename),
+              })
+            : 0;
+          imageBuffer = await createImagePromise(animatedLeadInSeconds);
+        } catch (err) {
+          errors.push(`image: ${(err as Error).message}`);
+        }
+      }
+
       const mediaFields: Record<string, string> = {};
       const timestamp = Date.now();
       const sentenceFieldName = ankiConfig.fields?.sentence ?? 'Sentence';
diff --git a/src/media-generator.test.ts b/src/media-generator.test.ts
new file mode 100644
index 0000000..fb1cfab
--- /dev/null
+++ b/src/media-generator.test.ts
@@ -0,0 +1,15 @@
+import assert from 'node:assert/strict';
+import test from 'node:test';
+
+import { buildAnimatedImageVideoFilter } from './media-generator';
+
+test('buildAnimatedImageVideoFilter prepends a cloned first frame when lead-in is provided', () => {
+  assert.equal(
+    buildAnimatedImageVideoFilter({
+      fps: 10,
+      maxWidth: 640,
+      leadingStillDuration: 1.25,
+    }),
+    'tpad=start_duration=1.25:start_mode=clone,fps=10,scale=w=640:h=-2',
+  );
+});
diff --git a/src/media-generator.ts b/src/media-generator.ts
index 8268b27..479b98a 100644
--- a/src/media-generator.ts
+++ b/src/media-generator.ts
@@ -24,6 +24,33 @@ import { createLogger } from './logger';
 
 const log = createLogger('media');
 
+export function buildAnimatedImageVideoFilter(options: {
+  fps?: number;
+  maxWidth?: number;
+  maxHeight?: number;
+  leadingStillDuration?: number;
+}): string {
+  const { fps = 10, maxWidth = 640, maxHeight, leadingStillDuration = 0 } = options;
+  const clampedFps = Math.max(1, Math.min(60, fps));
+  const vfParts: string[] = [];
+
+  if (leadingStillDuration > 0) {
+    vfParts.push(`tpad=start_duration=${leadingStillDuration}:start_mode=clone`);
+  }
+
+  vfParts.push(`fps=${clampedFps}`);
+
+  if (maxWidth && maxWidth > 0 && maxHeight && maxHeight > 0) {
+    vfParts.push(`scale=w=${maxWidth}:h=${maxHeight}:force_original_aspect_ratio=decrease`);
+  } else if (maxWidth && maxWidth > 0) {
+    vfParts.push(`scale=w=${maxWidth}:h=-2`);
+  } else if (maxHeight && maxHeight > 0) {
+    vfParts.push(`scale=w=-2:h=${maxHeight}`);
+  }
+
+  return vfParts.join(',');
+}
+
 export class MediaGenerator {
   private tempDir: string;
   private notifyIconDir: string;
@@ -289,25 +316,15 @@ export class MediaGenerator {
       maxWidth?: number;
       maxHeight?: number;
       crf?: number;
+      leadingStillDuration?: number;
     } = {},
   ): Promise<Buffer> {
     const start = Math.max(0, startTime - padding);
     const duration = endTime - startTime + 2 * padding;
-    const { fps = 10, maxWidth = 640, maxHeight, crf = 35 } = options;
+    const { fps = 10, maxWidth = 640, maxHeight, crf = 35, leadingStillDuration = 0 } = options;
 
-    const clampedFps = Math.max(1, Math.min(60, fps));
     const clampedCrf = Math.max(0, Math.min(63, crf));
 
-    const vfParts: string[] = [];
-    vfParts.push(`fps=${clampedFps}`);
-    if (maxWidth && maxWidth > 0 && maxHeight && maxHeight > 0) {
-      vfParts.push(`scale=w=${maxWidth}:h=${maxHeight}:force_original_aspect_ratio=decrease`);
-    } else if (maxWidth && maxWidth > 0) {
-      vfParts.push(`scale=w=${maxWidth}:h=-2`);
-    } else if (maxHeight && maxHeight > 0) {
-      vfParts.push(`scale=w=-2:h=${maxHeight}`);
-    }
-
     const av1Encoder = await this.detectAv1Encoder();
     if (!av1Encoder) {
       throw new Error(
@@ -338,7 +355,12 @@ export class MediaGenerator {
           '-i',
           videoPath,
           '-vf',
-          vfParts.join(','),
+          buildAnimatedImageVideoFilter({
+            fps,
+            maxWidth,
+            maxHeight,
+            leadingStillDuration,
+          }),
           ...encoderArgs,
           '-y',
           outputPath,
diff --git a/src/types.ts b/src/types.ts
index c8a743b..899caaf 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -241,6 +241,7 @@ export interface AnkiConnectConfig {
     animatedMaxWidth?: number;
     animatedMaxHeight?: number;
     animatedCrf?: number;
+    syncAnimatedImageToWordAudio?: boolean;
     audioPadding?: number;
     fallbackDuration?: number;
     maxMediaDuration?: number;
@@ -745,6 +746,7 @@ export interface ResolvedConfig {
       animatedMaxWidth: number;
       animatedMaxHeight?: number;
       animatedCrf: number;
+      syncAnimatedImageToWordAudio: boolean;
       audioPadding: number;
       fallbackDuration: number;
       maxMediaDuration: number;