mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-20 12:11:28 -07:00
feat: sync animated anki images to sentence audio
This commit is contained in:
@@ -340,6 +340,7 @@
|
||||
"animatedFps": 10, // Animated fps setting.
|
||||
"animatedMaxWidth": 640, // Animated max width setting.
|
||||
"animatedCrf": 35, // Animated crf setting.
|
||||
"syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false
|
||||
"audioPadding": 0.5, // Audio padding setting.
|
||||
"fallbackDuration": 3, // Fallback duration setting.
|
||||
"maxMediaDuration": 30 // Max media duration setting.
|
||||
|
||||
@@ -340,6 +340,7 @@
|
||||
"animatedFps": 10, // Animated fps setting.
|
||||
"animatedMaxWidth": 640, // Animated max width setting.
|
||||
"animatedCrf": 35, // Animated crf setting.
|
||||
"syncAnimatedImageToWordAudio": true, // For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio. Values: true | false
|
||||
"audioPadding": 0.5, // Audio padding setting.
|
||||
"fallbackDuration": 3, // Fallback duration setting.
|
||||
"maxMediaDuration": 30 // Max media duration setting.
|
||||
|
||||
@@ -54,6 +54,7 @@ import { FieldGroupingService } from './anki-integration/field-grouping';
|
||||
import { FieldGroupingMergeCollaborator } from './anki-integration/field-grouping-merge';
|
||||
import { NoteUpdateWorkflow } from './anki-integration/note-update-workflow';
|
||||
import { FieldGroupingWorkflow } from './anki-integration/field-grouping-workflow';
|
||||
import { resolveAnimatedImageLeadInSeconds } from './anki-integration/animated-image-sync';
|
||||
import { AnkiIntegrationRuntime, normalizeAnkiIntegrationConfig } from './anki-integration/runtime';
|
||||
|
||||
const log = createLogger('anki').child('integration');
|
||||
@@ -190,7 +191,7 @@ export class AnkiIntegration {
|
||||
this.resolveNoteFieldName(noteInfo, preferredName),
|
||||
extractFields: (fields) => this.extractFields(fields),
|
||||
processSentence: (mpvSentence, noteFields) => this.processSentence(mpvSentence, noteFields),
|
||||
generateMediaForMerge: () => this.generateMediaForMerge(),
|
||||
generateMediaForMerge: (noteInfo) => this.generateMediaForMerge(noteInfo),
|
||||
warnFieldParseOnce: (fieldName, reason, detail) =>
|
||||
this.warnFieldParseOnce(fieldName, reason, detail),
|
||||
});
|
||||
@@ -286,6 +287,7 @@ export class AnkiIntegration {
|
||||
storeMediaFile: (filename, data) => this.client.storeMediaFile(filename, data),
|
||||
findNotes: async (query, options) =>
|
||||
(await this.client.findNotes(query, options)) as number[],
|
||||
retrieveMediaFile: (filename) => this.client.retrieveMediaFile(filename),
|
||||
},
|
||||
mediaGenerator: {
|
||||
generateAudio: (videoPath, startTime, endTime, audioPadding, audioStreamIndex) =>
|
||||
@@ -319,6 +321,7 @@ export class AnkiIntegration {
|
||||
this.resolveConfiguredFieldName(noteInfo, ...preferredNames),
|
||||
resolveNoteFieldName: (noteInfo, preferredName) =>
|
||||
this.resolveNoteFieldName(noteInfo, preferredName),
|
||||
getAnimatedImageLeadInSeconds: (noteInfo) => this.getAnimatedImageLeadInSeconds(noteInfo),
|
||||
extractFields: (fields) => this.extractFields(fields),
|
||||
processSentence: (mpvSentence, noteFields) => this.processSentence(mpvSentence, noteFields),
|
||||
setCardTypeFields: (updatedFields, availableFieldNames, cardKind) =>
|
||||
@@ -407,12 +410,13 @@ export class AnkiIntegration {
|
||||
this.resolveConfiguredFieldName(noteInfo, ...preferredNames),
|
||||
getResolvedSentenceAudioFieldName: (noteInfo) =>
|
||||
this.getResolvedSentenceAudioFieldName(noteInfo),
|
||||
getAnimatedImageLeadInSeconds: (noteInfo) => this.getAnimatedImageLeadInSeconds(noteInfo),
|
||||
mergeFieldValue: (existing, newValue, overwrite) =>
|
||||
this.mergeFieldValue(existing, newValue, overwrite),
|
||||
generateAudioFilename: () => this.generateAudioFilename(),
|
||||
generateAudio: () => this.generateAudio(),
|
||||
generateImageFilename: () => this.generateImageFilename(),
|
||||
generateImage: () => this.generateImage(),
|
||||
generateImage: (animatedLeadInSeconds) => this.generateImage(animatedLeadInSeconds),
|
||||
formatMiscInfoPattern: (fallbackFilename, startTimeSeconds) =>
|
||||
this.formatMiscInfoPattern(fallbackFilename, startTimeSeconds),
|
||||
addConfiguredTagsToNote: (noteId) => this.addConfiguredTagsToNote(noteId),
|
||||
@@ -637,7 +641,7 @@ export class AnkiIntegration {
|
||||
);
|
||||
}
|
||||
|
||||
private async generateImage(): Promise<Buffer | null> {
|
||||
private async generateImage(animatedLeadInSeconds = 0): Promise<Buffer | null> {
|
||||
if (!this.mpvClient || !this.mpvClient.currentVideoPath) {
|
||||
return null;
|
||||
}
|
||||
@@ -665,6 +669,7 @@ export class AnkiIntegration {
|
||||
maxWidth: this.config.media?.animatedMaxWidth,
|
||||
maxHeight: this.config.media?.animatedMaxHeight,
|
||||
crf: this.config.media?.animatedCrf,
|
||||
leadingStillDuration: animatedLeadInSeconds,
|
||||
},
|
||||
);
|
||||
} else {
|
||||
@@ -1020,7 +1025,18 @@ export class AnkiIntegration {
|
||||
return getPreferredWordValueFromExtractedFields(fields, this.config);
|
||||
}
|
||||
|
||||
private async generateMediaForMerge(): Promise<{
|
||||
private async getAnimatedImageLeadInSeconds(noteInfo: NoteInfo): Promise<number> {
|
||||
return resolveAnimatedImageLeadInSeconds({
|
||||
config: this.config,
|
||||
noteInfo,
|
||||
resolveConfiguredFieldName: (candidateNoteInfo, ...preferredNames) =>
|
||||
this.resolveConfiguredFieldName(candidateNoteInfo, ...preferredNames),
|
||||
retrieveMediaFileBase64: (filename) => this.client.retrieveMediaFile(filename),
|
||||
logWarn: (message, ...args) => log.warn(message, ...args),
|
||||
});
|
||||
}
|
||||
|
||||
private async generateMediaForMerge(noteInfo?: NoteInfo): Promise<{
|
||||
audioField?: string;
|
||||
audioValue?: string;
|
||||
imageField?: string;
|
||||
@@ -1057,8 +1073,11 @@ export class AnkiIntegration {
|
||||
|
||||
if (this.config.media?.generateImage && this.mpvClient?.currentVideoPath) {
|
||||
try {
|
||||
const animatedLeadInSeconds = noteInfo
|
||||
? await this.getAnimatedImageLeadInSeconds(noteInfo)
|
||||
: 0;
|
||||
const imageFilename = this.generateImageFilename();
|
||||
const imageBuffer = await this.generateImage();
|
||||
const imageBuffer = await this.generateImage(animatedLeadInSeconds);
|
||||
if (imageBuffer) {
|
||||
await this.client.storeMediaFile(imageFilename, imageBuffer);
|
||||
result.imageField = this.config.fields?.image || DEFAULT_ANKI_CONNECT_CONFIG.fields.image;
|
||||
|
||||
82
src/anki-integration/animated-image-sync.test.ts
Normal file
82
src/anki-integration/animated-image-sync.test.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
|
||||
import { resolveAnimatedImageLeadInSeconds, extractSoundFilenames } from './animated-image-sync';
|
||||
|
||||
test('extractSoundFilenames returns ordered sound filenames from an Anki field value', () => {
|
||||
assert.deepEqual(
|
||||
extractSoundFilenames('before [sound:word.mp3] middle [sound:alt.ogg] after'),
|
||||
['word.mp3', 'alt.ogg'],
|
||||
);
|
||||
});
|
||||
|
||||
test('resolveAnimatedImageLeadInSeconds sums configured word audio durations for animated images', async () => {
|
||||
const leadInSeconds = await resolveAnimatedImageLeadInSeconds({
|
||||
config: {
|
||||
fields: {
|
||||
audio: 'ExpressionAudio',
|
||||
},
|
||||
media: {
|
||||
imageType: 'avif',
|
||||
syncAnimatedImageToWordAudio: true,
|
||||
},
|
||||
},
|
||||
noteInfo: {
|
||||
noteId: 42,
|
||||
fields: {
|
||||
ExpressionAudio: {
|
||||
value: '[sound:word.mp3][sound:alt.ogg]',
|
||||
},
|
||||
},
|
||||
},
|
||||
resolveConfiguredFieldName: (noteInfo, ...preferredNames) => {
|
||||
for (const preferredName of preferredNames) {
|
||||
if (!preferredName) continue;
|
||||
const resolved = Object.keys(noteInfo.fields).find(
|
||||
(fieldName) => fieldName.toLowerCase() === preferredName.toLowerCase(),
|
||||
);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
return null;
|
||||
},
|
||||
retrieveMediaFileBase64: async (filename) =>
|
||||
filename === 'word.mp3' ? 'd29yZA==' : filename === 'alt.ogg' ? 'YWx0' : '',
|
||||
probeAudioDurationSeconds: async (_buffer, filename) =>
|
||||
filename === 'word.mp3' ? 0.41 : filename === 'alt.ogg' ? 0.84 : null,
|
||||
logWarn: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(leadInSeconds, 1.25);
|
||||
});
|
||||
|
||||
test('resolveAnimatedImageLeadInSeconds falls back to zero when sync is disabled', async () => {
|
||||
const leadInSeconds = await resolveAnimatedImageLeadInSeconds({
|
||||
config: {
|
||||
fields: {
|
||||
audio: 'ExpressionAudio',
|
||||
},
|
||||
media: {
|
||||
imageType: 'avif',
|
||||
syncAnimatedImageToWordAudio: false,
|
||||
},
|
||||
},
|
||||
noteInfo: {
|
||||
noteId: 42,
|
||||
fields: {
|
||||
ExpressionAudio: {
|
||||
value: '[sound:word.mp3]',
|
||||
},
|
||||
},
|
||||
},
|
||||
resolveConfiguredFieldName: () => 'ExpressionAudio',
|
||||
retrieveMediaFileBase64: async () => {
|
||||
throw new Error('should not be called');
|
||||
},
|
||||
probeAudioDurationSeconds: async () => {
|
||||
throw new Error('should not be called');
|
||||
},
|
||||
logWarn: () => undefined,
|
||||
});
|
||||
|
||||
assert.equal(leadInSeconds, 0);
|
||||
});
|
||||
133
src/anki-integration/animated-image-sync.ts
Normal file
133
src/anki-integration/animated-image-sync.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
import { execFile as nodeExecFile } from 'node:child_process';
|
||||
import * as fs from 'node:fs';
|
||||
import * as os from 'node:os';
|
||||
import * as path from 'node:path';
|
||||
|
||||
import { DEFAULT_ANKI_CONNECT_CONFIG } from '../config';
|
||||
import type { AnkiConnectConfig } from '../types';
|
||||
|
||||
type NoteInfoLike = {
|
||||
noteId: number;
|
||||
fields: Record<string, { value: string }>;
|
||||
};
|
||||
|
||||
interface ResolveAnimatedImageLeadInSecondsArgs<TNoteInfo extends NoteInfoLike> {
|
||||
config: Pick<AnkiConnectConfig, 'fields' | 'media'>;
|
||||
noteInfo: TNoteInfo;
|
||||
resolveConfiguredFieldName: (
|
||||
noteInfo: TNoteInfo,
|
||||
...preferredNames: (string | undefined)[]
|
||||
) => string | null;
|
||||
retrieveMediaFileBase64: (filename: string) => Promise<string>;
|
||||
probeAudioDurationSeconds?: (buffer: Buffer, filename: string) => Promise<number | null>;
|
||||
logWarn?: (message: string, ...args: unknown[]) => void;
|
||||
}
|
||||
|
||||
interface ProbeAudioDurationDeps {
|
||||
execFile?: typeof nodeExecFile;
|
||||
mkdtempSync?: typeof fs.mkdtempSync;
|
||||
writeFileSync?: typeof fs.writeFileSync;
|
||||
rmSync?: typeof fs.rmSync;
|
||||
}
|
||||
|
||||
export function extractSoundFilenames(value: string): string[] {
|
||||
const matches = value.matchAll(/\[sound:([^\]]+)\]/gi);
|
||||
return Array.from(matches, (match) => match[1]?.trim() || '').filter((value) => value.length > 0);
|
||||
}
|
||||
|
||||
function shouldSyncAnimatedImageToWordAudio(config: Pick<AnkiConnectConfig, 'media'>): boolean {
|
||||
return (
|
||||
config.media?.imageType === 'avif' && config.media?.syncAnimatedImageToWordAudio !== false
|
||||
);
|
||||
}
|
||||
|
||||
export async function probeAudioDurationSeconds(
|
||||
buffer: Buffer,
|
||||
filename: string,
|
||||
deps: ProbeAudioDurationDeps = {},
|
||||
): Promise<number | null> {
|
||||
const execFile = deps.execFile ?? nodeExecFile;
|
||||
const mkdtempSync = deps.mkdtempSync ?? fs.mkdtempSync;
|
||||
const writeFileSync = deps.writeFileSync ?? fs.writeFileSync;
|
||||
const rmSync = deps.rmSync ?? fs.rmSync;
|
||||
|
||||
const tempDir = mkdtempSync(path.join(os.tmpdir(), 'subminer-audio-probe-'));
|
||||
const ext = path.extname(filename) || '.bin';
|
||||
const tempPath = path.join(tempDir, `probe${ext}`);
|
||||
writeFileSync(tempPath, buffer);
|
||||
|
||||
return new Promise((resolve) => {
|
||||
execFile(
|
||||
'ffprobe',
|
||||
[
|
||||
'-v',
|
||||
'error',
|
||||
'-show_entries',
|
||||
'format=duration',
|
||||
'-of',
|
||||
'default=noprint_wrappers=1:nokey=1',
|
||||
tempPath,
|
||||
],
|
||||
(error, stdout) => {
|
||||
try {
|
||||
if (error) {
|
||||
resolve(null);
|
||||
return;
|
||||
}
|
||||
|
||||
const durationSeconds = Number.parseFloat((stdout || '').trim());
|
||||
resolve(Number.isFinite(durationSeconds) && durationSeconds > 0 ? durationSeconds : null);
|
||||
} finally {
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
export async function resolveAnimatedImageLeadInSeconds<TNoteInfo extends NoteInfoLike>({
|
||||
config,
|
||||
noteInfo,
|
||||
resolveConfiguredFieldName,
|
||||
retrieveMediaFileBase64,
|
||||
probeAudioDurationSeconds: probeDuration = probeAudioDurationSeconds,
|
||||
logWarn,
|
||||
}: ResolveAnimatedImageLeadInSecondsArgs<TNoteInfo>): Promise<number> {
|
||||
if (!shouldSyncAnimatedImageToWordAudio(config)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const wordAudioFieldName = resolveConfiguredFieldName(
|
||||
noteInfo,
|
||||
config.fields?.audio,
|
||||
DEFAULT_ANKI_CONNECT_CONFIG.fields.audio,
|
||||
);
|
||||
if (!wordAudioFieldName) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const wordAudioValue = noteInfo.fields[wordAudioFieldName]?.value || '';
|
||||
const filenames = extractSoundFilenames(wordAudioValue);
|
||||
if (filenames.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let totalLeadInSeconds = 0;
|
||||
for (const filename of filenames) {
|
||||
const encoded = await retrieveMediaFileBase64(filename);
|
||||
if (!encoded) {
|
||||
logWarn?.('Animated image sync skipped: failed to retrieve word audio', filename);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const durationSeconds = await probeDuration(Buffer.from(encoded, 'base64'), filename);
|
||||
if (!(typeof durationSeconds === 'number' && Number.isFinite(durationSeconds))) {
|
||||
logWarn?.('Animated image sync skipped: failed to probe word audio duration', filename);
|
||||
return 0;
|
||||
}
|
||||
|
||||
totalLeadInSeconds += durationSeconds;
|
||||
}
|
||||
|
||||
return totalLeadInSeconds;
|
||||
}
|
||||
@@ -30,6 +30,7 @@ interface CardCreationClient {
|
||||
updateNoteFields(noteId: number, fields: Record<string, string>): Promise<void>;
|
||||
storeMediaFile(filename: string, data: Buffer): Promise<void>;
|
||||
findNotes(query: string, options?: { maxRetries?: number }): Promise<number[]>;
|
||||
retrieveMediaFile(filename: string): Promise<string>;
|
||||
}
|
||||
|
||||
interface CardCreationMediaGenerator {
|
||||
@@ -60,6 +61,7 @@ interface CardCreationMediaGenerator {
|
||||
maxWidth?: number;
|
||||
maxHeight?: number;
|
||||
crf?: number;
|
||||
leadingStillDuration?: number;
|
||||
},
|
||||
): Promise<Buffer | null>;
|
||||
}
|
||||
@@ -83,6 +85,7 @@ interface CardCreationDeps {
|
||||
...preferredNames: (string | undefined)[]
|
||||
) => string | null;
|
||||
resolveNoteFieldName: (noteInfo: CardCreationNoteInfo, preferredName?: string) => string | null;
|
||||
getAnimatedImageLeadInSeconds: (noteInfo: CardCreationNoteInfo) => Promise<number>;
|
||||
extractFields: (fields: Record<string, { value: string }>) => Record<string, string>;
|
||||
processSentence: (mpvSentence: string, noteFields: Record<string, string>) => string;
|
||||
setCardTypeFields: (
|
||||
@@ -258,11 +261,14 @@ export class CardCreationService {
|
||||
|
||||
if (this.deps.getConfig().media?.generateImage) {
|
||||
try {
|
||||
const animatedLeadInSeconds =
|
||||
await this.deps.getAnimatedImageLeadInSeconds(noteInfo);
|
||||
const imageFilename = this.generateImageFilename();
|
||||
const imageBuffer = await this.generateImageBuffer(
|
||||
mpvClient.currentVideoPath,
|
||||
rangeStart,
|
||||
rangeEnd,
|
||||
animatedLeadInSeconds,
|
||||
);
|
||||
|
||||
if (imageBuffer) {
|
||||
@@ -414,11 +420,14 @@ export class CardCreationService {
|
||||
|
||||
if (this.deps.getConfig().media?.generateImage) {
|
||||
try {
|
||||
const animatedLeadInSeconds =
|
||||
await this.deps.getAnimatedImageLeadInSeconds(noteInfo);
|
||||
const imageFilename = this.generateImageFilename();
|
||||
const imageBuffer = await this.generateImageBuffer(
|
||||
mpvClient.currentVideoPath,
|
||||
startTime,
|
||||
endTime,
|
||||
animatedLeadInSeconds,
|
||||
);
|
||||
|
||||
const imageField = this.deps.getConfig().fields?.image;
|
||||
@@ -679,6 +688,7 @@ export class CardCreationService {
|
||||
videoPath: string,
|
||||
startTime: number,
|
||||
endTime: number,
|
||||
animatedLeadInSeconds = 0,
|
||||
): Promise<Buffer | null> {
|
||||
const mpvClient = this.deps.getMpvClient();
|
||||
if (!mpvClient) {
|
||||
@@ -707,6 +717,7 @@ export class CardCreationService {
|
||||
maxWidth: this.deps.getConfig().media?.animatedMaxWidth,
|
||||
maxHeight: this.deps.getConfig().media?.animatedMaxHeight,
|
||||
crf: this.deps.getConfig().media?.animatedCrf,
|
||||
leadingStillDuration: animatedLeadInSeconds,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ interface FieldGroupingMergeDeps {
|
||||
) => string | null;
|
||||
extractFields: (fields: Record<string, { value: string }>) => Record<string, string>;
|
||||
processSentence: (mpvSentence: string, noteFields: Record<string, string>) => string;
|
||||
generateMediaForMerge: () => Promise<FieldGroupingMergeMedia>;
|
||||
generateMediaForMerge: (noteInfo: FieldGroupingMergeNoteInfo) => Promise<FieldGroupingMergeMedia>;
|
||||
warnFieldParseOnce: (fieldName: string, reason: string, detail?: string) => void;
|
||||
}
|
||||
|
||||
@@ -132,7 +132,7 @@ export class FieldGroupingMergeCollaborator {
|
||||
}
|
||||
|
||||
if (includeGeneratedMedia) {
|
||||
const media = await this.deps.generateMediaForMerge();
|
||||
const media = await this.deps.generateMediaForMerge(keepNoteInfo);
|
||||
if (media.audioField && media.audioValue && !sourceFields[media.audioField]) {
|
||||
sourceFields[media.audioField] = media.audioValue;
|
||||
}
|
||||
|
||||
@@ -62,6 +62,7 @@ function createWorkflowHarness() {
|
||||
return names.find((name) => name.toLowerCase() === preferred.toLowerCase()) ?? null;
|
||||
},
|
||||
getResolvedSentenceAudioFieldName: () => null,
|
||||
getAnimatedImageLeadInSeconds: async () => 0,
|
||||
mergeFieldValue: (_existing: string, next: string, _overwrite: boolean) => next,
|
||||
generateAudioFilename: () => 'audio_1.mp3',
|
||||
generateAudio: async () => null,
|
||||
@@ -163,3 +164,42 @@ test('NoteUpdateWorkflow updates note before auto field grouping merge', async (
|
||||
assert.deepEqual(callOrder, ['update', 'auto']);
|
||||
assert.equal(harness.updates.length, 1);
|
||||
});
|
||||
|
||||
test('NoteUpdateWorkflow passes animated image lead-in when syncing avif to word audio', async () => {
|
||||
const harness = createWorkflowHarness();
|
||||
let receivedLeadInSeconds = 0;
|
||||
|
||||
harness.deps.client.notesInfo = async () =>
|
||||
[
|
||||
{
|
||||
noteId: 42,
|
||||
fields: {
|
||||
Expression: { value: 'taberu' },
|
||||
ExpressionAudio: { value: '[sound:word.mp3]' },
|
||||
Sentence: { value: '' },
|
||||
Picture: { value: '' },
|
||||
},
|
||||
},
|
||||
] satisfies NoteUpdateWorkflowNoteInfo[];
|
||||
harness.deps.getConfig = () => ({
|
||||
fields: {
|
||||
sentence: 'Sentence',
|
||||
image: 'Picture',
|
||||
},
|
||||
media: {
|
||||
generateImage: true,
|
||||
imageType: 'avif',
|
||||
syncAnimatedImageToWordAudio: true,
|
||||
},
|
||||
behavior: {},
|
||||
});
|
||||
harness.deps.getAnimatedImageLeadInSeconds = async () => 1.25;
|
||||
harness.deps.generateImage = async (leadInSeconds?: number) => {
|
||||
receivedLeadInSeconds = leadInSeconds ?? 0;
|
||||
return Buffer.from('image');
|
||||
};
|
||||
|
||||
await harness.workflow.execute(42);
|
||||
|
||||
assert.equal(receivedLeadInSeconds, 1.25);
|
||||
});
|
||||
|
||||
@@ -22,6 +22,8 @@ export interface NoteUpdateWorkflowDeps {
|
||||
media?: {
|
||||
generateAudio?: boolean;
|
||||
generateImage?: boolean;
|
||||
imageType?: 'static' | 'avif';
|
||||
syncAnimatedImageToWordAudio?: boolean;
|
||||
};
|
||||
behavior?: {
|
||||
overwriteAudio?: boolean;
|
||||
@@ -60,11 +62,12 @@ export interface NoteUpdateWorkflowDeps {
|
||||
...preferredNames: (string | undefined)[]
|
||||
) => string | null;
|
||||
getResolvedSentenceAudioFieldName: (noteInfo: NoteUpdateWorkflowNoteInfo) => string | null;
|
||||
getAnimatedImageLeadInSeconds: (noteInfo: NoteUpdateWorkflowNoteInfo) => Promise<number>;
|
||||
mergeFieldValue: (existing: string, newValue: string, overwrite: boolean) => string;
|
||||
generateAudioFilename: () => string;
|
||||
generateAudio: () => Promise<Buffer | null>;
|
||||
generateImageFilename: () => string;
|
||||
generateImage: () => Promise<Buffer | null>;
|
||||
generateImage: (animatedLeadInSeconds?: number) => Promise<Buffer | null>;
|
||||
formatMiscInfoPattern: (fallbackFilename: string, startTimeSeconds?: number) => string;
|
||||
addConfiguredTagsToNote: (noteId: number) => Promise<void>;
|
||||
showNotification: (noteId: number, label: string | number) => Promise<void>;
|
||||
@@ -153,8 +156,9 @@ export class NoteUpdateWorkflow {
|
||||
|
||||
if (config.media?.generateImage) {
|
||||
try {
|
||||
const animatedLeadInSeconds = await this.deps.getAnimatedImageLeadInSeconds(noteInfo);
|
||||
const imageFilename = this.deps.generateImageFilename();
|
||||
const imageBuffer = await this.deps.generateImage();
|
||||
const imageBuffer = await this.deps.generateImage(animatedLeadInSeconds);
|
||||
|
||||
if (imageBuffer) {
|
||||
await this.deps.client.storeMediaFile(imageFilename, imageBuffer);
|
||||
|
||||
@@ -59,6 +59,10 @@ test('AnkiIntegrationRuntime normalizes url and proxy defaults', () => {
|
||||
normalized.media?.fallbackDuration,
|
||||
DEFAULT_ANKI_CONNECT_CONFIG.media.fallbackDuration,
|
||||
);
|
||||
assert.equal(
|
||||
normalized.media?.syncAnimatedImageToWordAudio,
|
||||
DEFAULT_ANKI_CONNECT_CONFIG.media.syncAnimatedImageToWordAudio,
|
||||
);
|
||||
});
|
||||
|
||||
test('AnkiIntegrationRuntime starts proxy transport when proxy mode is enabled', () => {
|
||||
|
||||
@@ -47,6 +47,7 @@ export const INTEGRATIONS_DEFAULT_CONFIG: Pick<
|
||||
animatedMaxWidth: 640,
|
||||
animatedMaxHeight: undefined,
|
||||
animatedCrf: 35,
|
||||
syncAnimatedImageToWordAudio: true,
|
||||
audioPadding: 0.5,
|
||||
fallbackDuration: 3.0,
|
||||
maxMediaDuration: 30,
|
||||
|
||||
@@ -82,6 +82,13 @@ export function buildIntegrationConfigOptionRegistry(
|
||||
description: 'Automatically update newly added cards.',
|
||||
runtime: runtimeOptionById.get('anki.autoUpdateNewCards'),
|
||||
},
|
||||
{
|
||||
path: 'ankiConnect.media.syncAnimatedImageToWordAudio',
|
||||
kind: 'boolean',
|
||||
defaultValue: defaultConfig.ankiConnect.media.syncAnimatedImageToWordAudio,
|
||||
description:
|
||||
'For animated AVIF images, prepend a frozen first frame matching the existing word-audio duration so motion starts with sentence audio.',
|
||||
},
|
||||
{
|
||||
path: 'ankiConnect.knownWords.matchMode',
|
||||
kind: 'enum',
|
||||
|
||||
@@ -121,6 +121,22 @@ test('accepts configured ankiConnect.fields.word override', () => {
|
||||
);
|
||||
});
|
||||
|
||||
test('accepts ankiConnect.media.syncAnimatedImageToWordAudio override', () => {
|
||||
const { context, warnings } = makeContext({
|
||||
media: {
|
||||
syncAnimatedImageToWordAudio: false,
|
||||
},
|
||||
});
|
||||
|
||||
applyAnkiConnectResolution(context);
|
||||
|
||||
assert.equal(context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio, false);
|
||||
assert.equal(
|
||||
warnings.some((warning) => warning.path === 'ankiConnect.media.syncAnimatedImageToWordAudio'),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test('maps legacy ankiConnect.wordField to modern ankiConnect.fields.word', () => {
|
||||
const { context, warnings } = makeContext({
|
||||
wordField: 'TargetWordLegacy',
|
||||
|
||||
@@ -31,6 +31,7 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
|
||||
'animatedMaxWidth',
|
||||
'animatedMaxHeight',
|
||||
'animatedCrf',
|
||||
'syncAnimatedImageToWordAudio',
|
||||
'audioPadding',
|
||||
'fallbackDuration',
|
||||
'maxMediaDuration',
|
||||
@@ -536,6 +537,17 @@ export function applyAnkiConnectResolution(context: ResolveContext): void {
|
||||
'Expected integer between 0 and 63.',
|
||||
);
|
||||
}
|
||||
if (!hasOwn(media, 'syncAnimatedImageToWordAudio')) {
|
||||
mapLegacy(
|
||||
'syncAnimatedImageToWordAudio',
|
||||
asBoolean,
|
||||
(value) => {
|
||||
context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio = value;
|
||||
},
|
||||
context.resolved.ankiConnect.media.syncAnimatedImageToWordAudio,
|
||||
'Expected boolean.',
|
||||
);
|
||||
}
|
||||
if (!hasOwn(media, 'audioPadding')) {
|
||||
mapLegacy(
|
||||
'audioPadding',
|
||||
|
||||
@@ -12,6 +12,12 @@ import {
|
||||
getConfiguredWordFieldName,
|
||||
getPreferredNoteFieldValue,
|
||||
} from '../../anki-field-config.js';
|
||||
import { resolveAnimatedImageLeadInSeconds } from '../../anki-integration/animated-image-sync.js';
|
||||
|
||||
type StatsServerNoteInfo = {
|
||||
noteId: number;
|
||||
fields: Record<string, { value: string }>;
|
||||
};
|
||||
|
||||
function parseIntQuery(raw: string | undefined, fallback: number, maxLimit?: number): number {
|
||||
if (raw === undefined) return fallback;
|
||||
@@ -40,6 +46,20 @@ function parseEventTypesQuery(raw: string | undefined): number[] | undefined {
|
||||
return parsed.length > 0 ? parsed : undefined;
|
||||
}
|
||||
|
||||
function resolveStatsNoteFieldName(
|
||||
noteInfo: StatsServerNoteInfo,
|
||||
...preferredNames: (string | undefined)[]
|
||||
): string | null {
|
||||
for (const preferredName of preferredNames) {
|
||||
if (!preferredName) continue;
|
||||
const resolved = Object.keys(noteInfo.fields).find(
|
||||
(fieldName) => fieldName.toLowerCase() === preferredName.toLowerCase(),
|
||||
);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Load known words cache from disk into a Set. Returns null if unavailable. */
|
||||
function loadKnownWordsSet(cachePath: string | undefined): Set<string> | null {
|
||||
if (!cachePath || !existsSync(cachePath)) return null;
|
||||
@@ -621,36 +641,41 @@ export function createStatsApp(
|
||||
const generateAudio = ankiConfig.media?.generateAudio !== false;
|
||||
const generateImage = ankiConfig.media?.generateImage !== false && mode !== 'audio';
|
||||
const imageType = ankiConfig.media?.imageType ?? 'static';
|
||||
const syncAnimatedImageToWordAudio =
|
||||
imageType === 'avif' && ankiConfig.media?.syncAnimatedImageToWordAudio !== false;
|
||||
|
||||
const audioPromise = generateAudio
|
||||
? mediaGen.generateAudio(sourcePath, startSec, clampedEndSec, audioPadding)
|
||||
: Promise.resolve(null);
|
||||
|
||||
let imagePromise: Promise<Buffer | null>;
|
||||
if (!generateImage) {
|
||||
imagePromise = Promise.resolve(null);
|
||||
} else if (imageType === 'avif') {
|
||||
imagePromise = mediaGen.generateAnimatedImage(
|
||||
sourcePath,
|
||||
startSec,
|
||||
clampedEndSec,
|
||||
audioPadding,
|
||||
{
|
||||
const createImagePromise = (animatedLeadInSeconds = 0): Promise<Buffer | null> => {
|
||||
if (!generateImage) {
|
||||
return Promise.resolve(null);
|
||||
}
|
||||
|
||||
if (imageType === 'avif') {
|
||||
return mediaGen.generateAnimatedImage(sourcePath, startSec, clampedEndSec, audioPadding, {
|
||||
fps: ankiConfig.media?.animatedFps ?? 10,
|
||||
maxWidth: ankiConfig.media?.animatedMaxWidth ?? 640,
|
||||
maxHeight: ankiConfig.media?.animatedMaxHeight,
|
||||
crf: ankiConfig.media?.animatedCrf ?? 35,
|
||||
},
|
||||
);
|
||||
} else {
|
||||
leadingStillDuration: animatedLeadInSeconds,
|
||||
});
|
||||
}
|
||||
|
||||
const midpointSec = (startSec + clampedEndSec) / 2;
|
||||
imagePromise = mediaGen.generateScreenshot(sourcePath, midpointSec, {
|
||||
return mediaGen.generateScreenshot(sourcePath, midpointSec, {
|
||||
format: ankiConfig.media?.imageFormat ?? 'jpg',
|
||||
quality: ankiConfig.media?.imageQuality ?? 92,
|
||||
maxWidth: ankiConfig.media?.imageMaxWidth,
|
||||
maxHeight: ankiConfig.media?.imageMaxHeight,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const imagePromise =
|
||||
mode === 'word' && syncAnimatedImageToWordAudio
|
||||
? Promise.resolve<Buffer | null>(null)
|
||||
: createImagePromise();
|
||||
|
||||
const errors: string[] = [];
|
||||
let noteId: number;
|
||||
@@ -677,12 +702,31 @@ export function createStatsApp(
|
||||
|
||||
noteId = yomitanResult.value;
|
||||
const audioBuffer = audioResult.status === 'fulfilled' ? audioResult.value : null;
|
||||
const imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null;
|
||||
if (audioResult.status === 'rejected')
|
||||
errors.push(`audio: ${(audioResult.reason as Error).message}`);
|
||||
if (imageResult.status === 'rejected')
|
||||
errors.push(`image: ${(imageResult.reason as Error).message}`);
|
||||
|
||||
let imageBuffer = imageResult.status === 'fulfilled' ? imageResult.value : null;
|
||||
if (syncAnimatedImageToWordAudio && generateImage) {
|
||||
try {
|
||||
const noteInfoResult = (await client.notesInfo([noteId])) as StatsServerNoteInfo[];
|
||||
const noteInfo = noteInfoResult[0] ?? null;
|
||||
const animatedLeadInSeconds = noteInfo
|
||||
? await resolveAnimatedImageLeadInSeconds({
|
||||
config: ankiConfig,
|
||||
noteInfo,
|
||||
resolveConfiguredFieldName: (candidateNoteInfo, ...preferredNames) =>
|
||||
resolveStatsNoteFieldName(candidateNoteInfo, ...preferredNames),
|
||||
retrieveMediaFileBase64: (filename) => client.retrieveMediaFile(filename),
|
||||
})
|
||||
: 0;
|
||||
imageBuffer = await createImagePromise(animatedLeadInSeconds);
|
||||
} catch (err) {
|
||||
errors.push(`image: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const mediaFields: Record<string, string> = {};
|
||||
const timestamp = Date.now();
|
||||
const sentenceFieldName = ankiConfig.fields?.sentence ?? 'Sentence';
|
||||
|
||||
15
src/media-generator.test.ts
Normal file
15
src/media-generator.test.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
|
||||
import { buildAnimatedImageVideoFilter } from './media-generator';
|
||||
|
||||
test('buildAnimatedImageVideoFilter prepends a cloned first frame when lead-in is provided', () => {
|
||||
assert.equal(
|
||||
buildAnimatedImageVideoFilter({
|
||||
fps: 10,
|
||||
maxWidth: 640,
|
||||
leadingStillDuration: 1.25,
|
||||
}),
|
||||
'tpad=start_duration=1.25:start_mode=clone,fps=10,scale=w=640:h=-2',
|
||||
);
|
||||
});
|
||||
@@ -24,6 +24,33 @@ import { createLogger } from './logger';
|
||||
|
||||
const log = createLogger('media');
|
||||
|
||||
export function buildAnimatedImageVideoFilter(options: {
|
||||
fps?: number;
|
||||
maxWidth?: number;
|
||||
maxHeight?: number;
|
||||
leadingStillDuration?: number;
|
||||
}): string {
|
||||
const { fps = 10, maxWidth = 640, maxHeight, leadingStillDuration = 0 } = options;
|
||||
const clampedFps = Math.max(1, Math.min(60, fps));
|
||||
const vfParts: string[] = [];
|
||||
|
||||
if (leadingStillDuration > 0) {
|
||||
vfParts.push(`tpad=start_duration=${leadingStillDuration}:start_mode=clone`);
|
||||
}
|
||||
|
||||
vfParts.push(`fps=${clampedFps}`);
|
||||
|
||||
if (maxWidth && maxWidth > 0 && maxHeight && maxHeight > 0) {
|
||||
vfParts.push(`scale=w=${maxWidth}:h=${maxHeight}:force_original_aspect_ratio=decrease`);
|
||||
} else if (maxWidth && maxWidth > 0) {
|
||||
vfParts.push(`scale=w=${maxWidth}:h=-2`);
|
||||
} else if (maxHeight && maxHeight > 0) {
|
||||
vfParts.push(`scale=w=-2:h=${maxHeight}`);
|
||||
}
|
||||
|
||||
return vfParts.join(',');
|
||||
}
|
||||
|
||||
export class MediaGenerator {
|
||||
private tempDir: string;
|
||||
private notifyIconDir: string;
|
||||
@@ -289,25 +316,15 @@ export class MediaGenerator {
|
||||
maxWidth?: number;
|
||||
maxHeight?: number;
|
||||
crf?: number;
|
||||
leadingStillDuration?: number;
|
||||
} = {},
|
||||
): Promise<Buffer> {
|
||||
const start = Math.max(0, startTime - padding);
|
||||
const duration = endTime - startTime + 2 * padding;
|
||||
const { fps = 10, maxWidth = 640, maxHeight, crf = 35 } = options;
|
||||
const { fps = 10, maxWidth = 640, maxHeight, crf = 35, leadingStillDuration = 0 } = options;
|
||||
|
||||
const clampedFps = Math.max(1, Math.min(60, fps));
|
||||
const clampedCrf = Math.max(0, Math.min(63, crf));
|
||||
|
||||
const vfParts: string[] = [];
|
||||
vfParts.push(`fps=${clampedFps}`);
|
||||
if (maxWidth && maxWidth > 0 && maxHeight && maxHeight > 0) {
|
||||
vfParts.push(`scale=w=${maxWidth}:h=${maxHeight}:force_original_aspect_ratio=decrease`);
|
||||
} else if (maxWidth && maxWidth > 0) {
|
||||
vfParts.push(`scale=w=${maxWidth}:h=-2`);
|
||||
} else if (maxHeight && maxHeight > 0) {
|
||||
vfParts.push(`scale=w=-2:h=${maxHeight}`);
|
||||
}
|
||||
|
||||
const av1Encoder = await this.detectAv1Encoder();
|
||||
if (!av1Encoder) {
|
||||
throw new Error(
|
||||
@@ -338,7 +355,12 @@ export class MediaGenerator {
|
||||
'-i',
|
||||
videoPath,
|
||||
'-vf',
|
||||
vfParts.join(','),
|
||||
buildAnimatedImageVideoFilter({
|
||||
fps,
|
||||
maxWidth,
|
||||
maxHeight,
|
||||
leadingStillDuration,
|
||||
}),
|
||||
...encoderArgs,
|
||||
'-y',
|
||||
outputPath,
|
||||
|
||||
@@ -241,6 +241,7 @@ export interface AnkiConnectConfig {
|
||||
animatedMaxWidth?: number;
|
||||
animatedMaxHeight?: number;
|
||||
animatedCrf?: number;
|
||||
syncAnimatedImageToWordAudio?: boolean;
|
||||
audioPadding?: number;
|
||||
fallbackDuration?: number;
|
||||
maxMediaDuration?: number;
|
||||
@@ -745,6 +746,7 @@ export interface ResolvedConfig {
|
||||
animatedMaxWidth: number;
|
||||
animatedMaxHeight?: number;
|
||||
animatedCrf: number;
|
||||
syncAnimatedImageToWordAudio: boolean;
|
||||
audioPadding: number;
|
||||
fallbackDuration: number;
|
||||
maxMediaDuration: number;
|
||||
|
||||
Reference in New Issue
Block a user