mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-06-09 15:13:32 -07:00
feat: use cached annotations on subtitle change and skip pre-warmed cues (#97)
This commit is contained in:
@@ -242,3 +242,59 @@ test('prefetch service pause/resume halts and continues tokenization', async ()
|
||||
|
||||
assert.ok(tokenizeCalls > callsWhenPaused + 1, 'Should resume tokenizing after unpause');
|
||||
});
|
||||
|
||||
test('prefetch service skips cues already present in tokenization cache', async () => {
|
||||
const cues = makeCues(5);
|
||||
const tokenizedTexts: string[] = [];
|
||||
|
||||
const service = createSubtitlePrefetchService({
|
||||
cues,
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizedTexts.push(text);
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
preCacheTokenization: () => {},
|
||||
hasCachedTokenization: (text) => text === 'line-0' || text === 'line-1',
|
||||
isCacheFull: () => false,
|
||||
priorityWindowSize: 3,
|
||||
});
|
||||
|
||||
service.start(0);
|
||||
for (let i = 0; i < 10; i += 1) {
|
||||
await flushMicrotasks();
|
||||
}
|
||||
service.stop();
|
||||
|
||||
assert.ok(!tokenizedTexts.includes('line-0'));
|
||||
assert.ok(!tokenizedTexts.includes('line-1'));
|
||||
assert.ok(tokenizedTexts.includes('line-2'));
|
||||
});
|
||||
|
||||
test('prefetch service deduplicates repeated cue text within a run', async () => {
|
||||
const cues: SubtitleCue[] = [
|
||||
{ startTime: 0, endTime: 1, text: 'same' },
|
||||
{ startTime: 1, endTime: 2, text: 'same' },
|
||||
{ startTime: 2, endTime: 3, text: 'other' },
|
||||
];
|
||||
const tokenizedTexts: string[] = [];
|
||||
|
||||
const service = createSubtitlePrefetchService({
|
||||
cues,
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizedTexts.push(text);
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
preCacheTokenization: () => {},
|
||||
isCacheFull: () => false,
|
||||
priorityWindowSize: 3,
|
||||
});
|
||||
|
||||
service.start(0);
|
||||
for (let i = 0; i < 10; i += 1) {
|
||||
await flushMicrotasks();
|
||||
}
|
||||
service.stop();
|
||||
|
||||
assert.deepEqual(tokenizedTexts.filter((text) => text === 'same'), ['same']);
|
||||
assert.ok(tokenizedTexts.includes('other'));
|
||||
});
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import type { SubtitleData } from '../../types';
|
||||
import type { SubtitleCue } from '../../types';
|
||||
import { normalizeSubtitleCacheKey } from './subtitle-processing-controller';
|
||||
|
||||
export interface SubtitlePrefetchServiceDeps {
|
||||
cues: SubtitleCue[];
|
||||
tokenizeSubtitle: (text: string) => Promise<SubtitleData | null>;
|
||||
preCacheTokenization: (text: string, data: SubtitleData) => void;
|
||||
hasCachedTokenization?: (text: string) => boolean;
|
||||
isCacheFull: () => boolean;
|
||||
priorityWindowSize?: number;
|
||||
}
|
||||
@@ -58,6 +60,7 @@ export function createSubtitlePrefetchService(
|
||||
async function tokenizeCueList(
|
||||
cuesToProcess: SubtitleCue[],
|
||||
runId: number,
|
||||
warmedKeys: Set<string>,
|
||||
options: { allowWhenCacheFull?: boolean } = {},
|
||||
): Promise<void> {
|
||||
for (const cue of cuesToProcess) {
|
||||
@@ -78,6 +81,15 @@ export function createSubtitlePrefetchService(
|
||||
return;
|
||||
}
|
||||
|
||||
const cacheKey = normalizeSubtitleCacheKey(cue.text);
|
||||
if (!cacheKey || warmedKeys.has(cacheKey) || deps.hasCachedTokenization?.(cue.text)) {
|
||||
if (cacheKey) {
|
||||
warmedKeys.add(cacheKey);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
warmedKeys.add(cacheKey);
|
||||
|
||||
try {
|
||||
const result = await deps.tokenizeSubtitle(cue.text);
|
||||
if (result && !stopped && runId === currentRunId) {
|
||||
@@ -94,10 +106,11 @@ export function createSubtitlePrefetchService(
|
||||
|
||||
async function startPrefetching(currentTimeSeconds: number, runId: number): Promise<void> {
|
||||
const cues = deps.cues;
|
||||
const warmedKeys = new Set<string>();
|
||||
|
||||
// Phase 1: Priority window
|
||||
const priorityCues = computePriorityWindow(cues, currentTimeSeconds, windowSize);
|
||||
await tokenizeCueList(priorityCues, runId, { allowWhenCacheFull: true });
|
||||
await tokenizeCueList(priorityCues, runId, warmedKeys, { allowWhenCacheFull: true });
|
||||
|
||||
if (stopped || runId !== currentRunId) {
|
||||
return;
|
||||
@@ -108,7 +121,7 @@ export function createSubtitlePrefetchService(
|
||||
const remainingCues = cues.filter(
|
||||
(cue) => cue.startTime > currentTimeSeconds && !priorityTexts.has(cue.text),
|
||||
);
|
||||
await tokenizeCueList(remainingCues, runId);
|
||||
await tokenizeCueList(remainingCues, runId, warmedKeys);
|
||||
|
||||
if (stopped || runId !== currentRunId) {
|
||||
return;
|
||||
@@ -118,7 +131,7 @@ export function createSubtitlePrefetchService(
|
||||
const earlierCues = cues.filter(
|
||||
(cue) => cue.startTime <= currentTimeSeconds && !priorityTexts.has(cue.text),
|
||||
);
|
||||
await tokenizeCueList(earlierCues, runId);
|
||||
await tokenizeCueList(earlierCues, runId, warmedKeys);
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
@@ -236,6 +236,31 @@ test('consumeCachedSubtitle returns prefetched payload and prevents reprocessing
|
||||
assert.deepEqual(emitted, []);
|
||||
});
|
||||
|
||||
test('hasCachedSubtitle checks prefetched entries without consuming them', async () => {
|
||||
const emitted: SubtitleData[] = [];
|
||||
let tokenizeCalls = 0;
|
||||
const controller = createSubtitleProcessingController({
|
||||
tokenizeSubtitle: async (text) => {
|
||||
tokenizeCalls += 1;
|
||||
return { text, tokens: [] };
|
||||
},
|
||||
emitSubtitle: (payload) => emitted.push(payload),
|
||||
});
|
||||
|
||||
controller.preCacheTokenization('猫\\Nです', { text: '猫\nです', tokens: [] });
|
||||
|
||||
assert.equal(controller.hasCachedSubtitle('猫\nです'), true);
|
||||
|
||||
controller.onSubtitleChange('猫\nです');
|
||||
await flushMicrotasks();
|
||||
|
||||
assert.equal(tokenizeCalls, 0);
|
||||
assert.deepEqual(emitted, [{ text: '猫\nです', tokens: [] }]);
|
||||
|
||||
controller.invalidateTokenizationCache();
|
||||
assert.equal(controller.hasCachedSubtitle('猫\nです'), false);
|
||||
});
|
||||
|
||||
test('isCacheFull returns false when cache is below limit', () => {
|
||||
const controller = createSubtitleProcessingController({
|
||||
tokenizeSubtitle: async (text) => ({ text, tokens: null }),
|
||||
|
||||
@@ -13,10 +13,11 @@ export interface SubtitleProcessingController {
|
||||
invalidateTokenizationCache: () => void;
|
||||
preCacheTokenization: (text: string, data: SubtitleData) => void;
|
||||
consumeCachedSubtitle: (text: string) => SubtitleData | null;
|
||||
hasCachedSubtitle: (text: string) => boolean;
|
||||
isCacheFull: () => boolean;
|
||||
}
|
||||
|
||||
function normalizeSubtitleCacheKey(text: string): string {
|
||||
export function normalizeSubtitleCacheKey(text: string): string {
|
||||
return text.replace(/\r\n/g, '\n').replace(/\\N/g, '\n').replace(/\\n/g, '\n').trim();
|
||||
}
|
||||
|
||||
@@ -152,6 +153,9 @@ export function createSubtitleProcessingController(
|
||||
refreshRequested = false;
|
||||
return cached;
|
||||
},
|
||||
hasCachedSubtitle: (text: string) => {
|
||||
return tokenizationCache.has(normalizeSubtitleCacheKey(text));
|
||||
},
|
||||
isCacheFull: () => {
|
||||
return tokenizationCache.size >= SUBTITLE_TOKENIZATION_CACHE_LIMIT;
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user