Fix managed playback exit and tokenizer grammar splits

- Ignore background stats daemons during regular app startup
- Split standalone grammar endings before applying annotations
- Clear helper-span annotations for auxiliary-only tokens
This commit is contained in:
2026-05-02 18:36:41 -07:00
parent 2a06bfc989
commit 6607b06437
12 changed files with 583 additions and 88 deletions
+177 -24
View File
@@ -79,7 +79,7 @@ function createDeferred<T>() {
};
}
test('tokenizeSubtitle assigns JLPT level to parsed Yomitan tokens', async () => {
test('tokenizeSubtitle splits same-line grammar endings before applying annotations', async () => {
const result = await tokenizeSubtitle(
'猫です',
makeDeps({
@@ -88,35 +88,51 @@ test('tokenizeSubtitle assigns JLPT level to parsed Yomitan tokens', async () =>
({
isDestroyed: () => false,
webContents: {
executeJavaScript: async () => [
{
source: 'scanning-parser',
index: 0,
content: [
[
{
text: '',
reading: 'ねこ',
headwords: [[{ term: '猫' }]],
},
{
text: 'です',
reading: 'です',
headwords: [[{ term: 'です' }]],
},
executeJavaScript: async (script: string) => {
if (script.includes('getTermFrequencies')) {
return [];
}
return [
{
source: 'scanning-parser',
index: 0,
content: [
[
{
text: '',
reading: 'ねこ',
headwords: [[{ term: '' }]],
},
{
text: 'です',
reading: 'です',
headwords: [[{ term: 'です' }]],
},
],
],
],
},
],
},
];
},
},
}) as unknown as Electron.BrowserWindow,
tokenizeWithMecab: async () => null,
getJlptLevel: (text) => (text === '猫' ? 'N5' : null),
getFrequencyDictionaryEnabled: () => true,
getFrequencyRank: (text) => (text === '猫' ? 40 : text === 'です' ? 50 : null),
getJlptLevel: (text) => (text === '猫' || text === 'です' ? 'N5' : null),
isKnownWord: (text) => text === 'です',
}),
);
assert.equal(result.tokens?.length, 1);
assert.equal(result.tokens?.length, 2);
assert.equal(result.tokens?.[0]?.surface, '猫');
assert.equal(result.tokens?.[0]?.jlptLevel, 'N5');
assert.equal(result.tokens?.[0]?.frequencyRank, 40);
assert.equal(result.tokens?.[1]?.surface, 'です');
assert.equal(result.tokens?.[1]?.isKnown, false);
assert.equal(result.tokens?.[1]?.isNPlusOneTarget, false);
assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
assert.equal(result.tokens?.[1]?.jlptLevel, undefined);
});
test('tokenizeSubtitle preserves Yomitan name-match metadata on tokens', async () => {
@@ -204,7 +220,7 @@ test('tokenizeSubtitle applies frequency dictionary ranks', async () => {
assert.equal(result.tokens?.length, 2);
assert.equal(result.tokens?.[0]?.frequencyRank, 23);
assert.equal(result.tokens?.[1]?.frequencyRank, 1200);
assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
});
test('tokenizeSubtitle uses left-to-right yomitan scanning to keep full katakana name tokens', async () => {
@@ -2383,7 +2399,7 @@ test('tokenizeSubtitle applies N+1 target marking to Yomitan results', async ()
getYomitanParserWindow: () => parserWindow,
tokenizeWithMecab: async () => null,
isKnownWord: (text) => text === 'です',
getMinSentenceWordsForNPlusOne: () => 2,
getMinSentenceWordsForNPlusOne: () => 1,
}),
);
@@ -4759,6 +4775,143 @@ test('tokenizeSubtitle clears annotations for auxiliary inflection fragments whi
);
});
test('tokenizeSubtitle clears annotations for te-kureru auxiliary helper spans', async () => {
const result = await tokenizeSubtitle(
'ベアトリスがいてくれたから',
makeDepsFromYomitanTokens(
[
{ surface: 'ベアトリス', reading: 'べあとりす', headword: 'ベアトリス' },
{ surface: 'が', reading: 'が', headword: 'が' },
{ surface: 'い', reading: 'い', headword: 'いる' },
{ surface: 'てく', reading: 'てく', headword: 'てく' },
{ surface: 'れた', reading: 'れた', headword: 'れる' },
{ surface: 'から', reading: 'から', headword: 'から' },
],
{
getFrequencyDictionaryEnabled: () => true,
getFrequencyRank: (text) =>
text === 'ベアトリス' ? 1000 : text === 'てく' ? 140 : text === 'れる' ? 19 : null,
getJlptLevel: (text) =>
text === 'てく' || text === 'れる' || text === 'いる' ? 'N4' : null,
isKnownWord: (text) => text === 'てく' || text === 'れる',
getMinSentenceWordsForNPlusOne: () => 1,
tokenizeWithMecab: async () => [
{
headword: 'ベアトリス',
surface: 'ベアトリス',
reading: 'ベアトリス',
startPos: 0,
endPos: 5,
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
pos2: '固有名詞',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
{
headword: 'が',
surface: 'が',
reading: 'ガ',
startPos: 5,
endPos: 6,
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
pos2: '格助詞',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
{
headword: 'いる',
surface: 'い',
reading: 'イ',
startPos: 6,
endPos: 7,
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞',
pos2: '自立',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
{
headword: 'てく',
surface: 'てく',
reading: 'テク',
startPos: 7,
endPos: 9,
partOfSpeech: PartOfSpeech.verb,
pos1: '助詞|動詞',
pos2: '接続助詞|非自立',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
{
headword: 'れる',
surface: 'れた',
reading: 'レタ',
startPos: 9,
endPos: 11,
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞|助動詞',
pos2: '接尾|*',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
{
headword: 'から',
surface: 'から',
reading: 'カラ',
startPos: 11,
endPos: 13,
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
pos2: '接続助詞',
isMerged: false,
isKnown: false,
isNPlusOneTarget: false,
},
],
},
),
);
const tokenSummary = result.tokens?.map((token) => ({
surface: token.surface,
headword: token.headword,
isKnown: token.isKnown,
isNPlusOneTarget: token.isNPlusOneTarget,
frequencyRank: token.frequencyRank,
jlptLevel: token.jlptLevel,
}));
assert.deepEqual(
tokenSummary?.find((token) => token.surface === 'てく'),
{
surface: 'てく',
headword: 'てく',
isKnown: false,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,
},
);
assert.deepEqual(
tokenSummary?.find((token) => token.surface === 'れた'),
{
surface: 'れた',
headword: 'れる',
isKnown: false,
isNPlusOneTarget: false,
frequencyRank: undefined,
jlptLevel: undefined,
},
);
});
test('tokenizeSubtitle excludes default non-independent pos2 from N+1 when JLPT/frequency are disabled', async () => {
let mecabCalls = 0;
const result = await tokenizeSubtitle(
@@ -1371,6 +1371,49 @@ test('annotateTokens clears all annotations for standalone auxiliary inflection
}
});
test('annotateTokens clears all annotations for auxiliary-only te-kureru helper spans', () => {
const tokens = [
makeToken({
surface: 'てく',
headword: 'てく',
reading: 'テク',
partOfSpeech: PartOfSpeech.verb,
pos1: '助詞|動詞',
pos2: '接続助詞|非自立',
startPos: 0,
endPos: 2,
frequencyRank: 140,
}),
makeToken({
surface: 'れた',
headword: 'れる',
reading: 'レタ',
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞|助動詞',
pos2: '接尾|*',
startPos: 2,
endPos: 4,
frequencyRank: 19,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === 'てく' || text === 'れる',
getJlptLevel: (text) => (text === 'てく' || text === 'れる' ? 'N4' : null),
}),
{ minSentenceWordsForNPlusOne: 1 },
);
for (const token of result) {
assert.equal(token.isKnown, false, token.surface);
assert.equal(token.isNPlusOneTarget, false, token.surface);
assert.equal(token.frequencyRank, undefined, token.surface);
assert.equal(token.jlptLevel, undefined, token.surface);
}
});
test('annotateTokens keeps lexical くれる forms eligible for annotation', () => {
const tokens = [
makeToken({
@@ -155,7 +155,7 @@ test('prefers the longest dictionary headword across merged segments', () => {
);
});
test('keeps the first headword when later segments are standalone words', () => {
test('splits trailing grammar endings when later segments are standalone words', () => {
const parseResults = [
makeParseItem('scanning-parser', [
[
@@ -174,10 +174,47 @@ test('keeps the first headword when later segments are standalone words', () =>
})),
[
{
surface: '猫です',
reading: 'ねこです',
surface: '猫',
reading: 'ねこ',
headword: '猫',
},
{
surface: 'です',
reading: 'です',
headword: 'です',
},
],
);
});
test('splits trailing ja-nai grammar endings from preceding content', () => {
const parseResults = [
makeParseItem('scanning-parser', [
[
{ text: 'いる', reading: 'いる', headword: 'いる' },
{ text: 'じゃない', reading: 'じゃない', headword: 'じゃない' },
],
]),
];
const tokens = selectYomitanParseTokens(parseResults, () => false, 'headword');
assert.deepEqual(
tokens?.map((token) => ({
surface: token.surface,
reading: token.reading,
headword: token.headword,
})),
[
{
surface: 'いる',
reading: 'いる',
headword: 'いる',
},
{
surface: 'じゃない',
reading: 'じゃない',
headword: 'じゃない',
},
],
);
});
@@ -24,6 +24,24 @@ export interface YomitanParseCandidate {
tokens: MergedToken[];
}
const STANDALONE_GRAMMAR_ENDINGS = new Set([
'です',
'ですか',
'ですね',
'ですよ',
'ですな',
'じゃない',
'じゃないか',
'じゃないね',
'じゃないよ',
'じゃないな',
'じゃないです',
'じゃないですか',
'じゃないですね',
'じゃないですよ',
'じゃないですな',
]);
function isObject(value: unknown): value is Record<string, unknown> {
return Boolean(value && typeof value === 'object');
}
@@ -141,6 +159,15 @@ function isKanaOnlyText(text: string): boolean {
return text.length > 0 && Array.from(text).every((char) => isKanaChar(char));
}
function isStandaloneGrammarEndingSegment(segment: YomitanParseSegment): boolean {
const surface = segment.text?.trim() ?? '';
const headword = extractYomitanHeadword(segment).trim();
return (
headword.length > 0 &&
(STANDALONE_GRAMMAR_ENDINGS.has(surface) || STANDALONE_GRAMMAR_ENDINGS.has(headword))
);
}
function shouldMergeKanaContinuation(
previousToken: MergedToken | undefined,
continuationSurface: string,
@@ -186,20 +213,97 @@ export function mapYomitanParseResultItemToMergedTokens(
let combinedSurface = '';
let combinedReading = '';
let combinedStart = charOffset;
let firstHeadword = '';
const expandedHeadwords: string[] = [];
const pushToken = (
surface: string,
reading: string,
headword: string,
start: number,
end: number,
): void => {
tokens.push({
surface,
reading,
headword,
startPos: start,
endPos: end,
partOfSpeech: PartOfSpeech.other,
pos1: '',
isMerged: true,
isNPlusOneTarget: false,
isKnown: (() => {
const matchText = resolveKnownWordText(surface, headword, knownWordMatchMode);
return matchText ? isKnownWord(matchText) : false;
})(),
});
};
const flushCombinedToken = (end: number): void => {
if (!combinedSurface) {
combinedStart = end;
return;
}
const combinedHeadword = selectMergedHeadword(
firstHeadword,
expandedHeadwords,
combinedSurface,
);
if (!combinedHeadword) {
const previousToken = tokens[tokens.length - 1];
if (shouldMergeKanaContinuation(previousToken, combinedSurface)) {
previousToken.surface += combinedSurface;
previousToken.reading += combinedReading;
previousToken.endPos = end;
}
} else {
hasDictionaryMatch = true;
pushToken(combinedSurface, combinedReading, combinedHeadword, combinedStart, end);
}
combinedSurface = '';
combinedReading = '';
firstHeadword = '';
expandedHeadwords.length = 0;
combinedStart = end;
};
for (const segment of line) {
const segmentText = segment.text;
if (!segmentText || segmentText.length === 0) {
continue;
}
const segmentStart = charOffset;
const segmentEnd = segmentStart + segmentText.length;
charOffset = segmentEnd;
combinedSurface += segmentText;
if (typeof segment.reading === 'string') {
combinedReading += segment.reading;
}
const segmentHeadword = extractYomitanHeadword(segment);
if (isStandaloneGrammarEndingSegment(segment)) {
combinedSurface = combinedSurface.slice(0, -segmentText.length);
if (typeof segment.reading === 'string') {
combinedReading = combinedReading.slice(0, -segment.reading.length);
}
flushCombinedToken(segmentStart);
const grammarHeadword = segmentHeadword || segmentText;
hasDictionaryMatch = true;
pushToken(
segmentText,
typeof segment.reading === 'string' ? segment.reading : '',
grammarHeadword,
segmentStart,
segmentEnd,
);
combinedStart = segmentEnd;
continue;
}
if (segmentHeadword) {
if (!firstHeadword) {
firstHeadword = segmentHeadword;
@@ -210,49 +314,7 @@ export function mapYomitanParseResultItemToMergedTokens(
}
}
if (!combinedSurface) {
continue;
}
const start = charOffset;
const end = start + combinedSurface.length;
charOffset = end;
const combinedHeadword = selectMergedHeadword(
firstHeadword,
expandedHeadwords,
combinedSurface,
);
if (!combinedHeadword) {
const previousToken = tokens[tokens.length - 1];
if (shouldMergeKanaContinuation(previousToken, combinedSurface)) {
previousToken.surface += combinedSurface;
previousToken.reading += combinedReading;
previousToken.endPos = end;
continue;
}
// No dictionary-backed headword for this merged unit; skip it entirely so
// downstream keyboard/frequency/JLPT flows only operate on lookup-backed tokens.
continue;
}
hasDictionaryMatch = true;
const headword = combinedHeadword;
tokens.push({
surface: combinedSurface,
reading: combinedReading,
headword,
startPos: start,
endPos: end,
partOfSpeech: PartOfSpeech.other,
pos1: '',
isMerged: true,
isNPlusOneTarget: false,
isKnown: (() => {
const matchText = resolveKnownWordText(combinedSurface, headword, knownWordMatchMode);
return matchText ? isKnownWord(matchText) : false;
})(),
});
flushCombinedToken(charOffset);
}
if (validLineCount === 0 || tokens.length === 0 || !hasDictionaryMatch) {
@@ -84,12 +84,7 @@ const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_THOUGHT_SUFFIXES = [
'かな',
'かね',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_POLITE_COPULA_SUFFIXES = [
'か',
'ね',
'よ',
'な',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_POLITE_COPULA_SUFFIXES = ['', 'か', 'ね', 'よ', 'な'] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_JA_NAI_SUFFIXES = [
'',
'か',
@@ -129,6 +124,8 @@ const SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES = new Set([
const AUXILIARY_STEM_GRAMMAR_TAIL_POS1 = new Set(['名詞', '助動詞', '助詞']);
const NON_INDEPENDENT_NOUN_HELPER_TAIL_POS1 = new Set(['助詞', '助動詞']);
const AUXILIARY_INFLECTION_TRAILING_POS1 = new Set(['助動詞']);
const AUXILIARY_HELPER_SPAN_POS1 = new Set(['助詞', '助動詞', '動詞']);
const LEXICAL_VERB_POS2 = new Set(['自立']);
const STANDALONE_GRAMMAR_PARTICLE_SURFACES = new Set([
'か',
'が',
@@ -396,6 +393,27 @@ function isStandaloneAuxiliaryInflectionFragment(token: MergedToken): boolean {
);
}
function isAuxiliaryOnlyHelperSpan(token: MergedToken): boolean {
const normalizedSurface = normalizeKana(token.surface);
const normalizedHeadword = normalizeKana(token.headword);
if (!isKanaOnlyText(normalizedSurface) || !isKanaOnlyText(normalizedHeadword)) {
return false;
}
const pos1Parts = splitNormalizedTagParts(normalizePosTag(token.pos1));
if (
pos1Parts.length === 0 ||
!pos1Parts.every((part) => AUXILIARY_HELPER_SPAN_POS1.has(part)) ||
!pos1Parts.includes('助詞') ||
!pos1Parts.includes('動詞')
) {
return false;
}
const pos2Parts = splitNormalizedTagParts(normalizePosTag(token.pos2));
return !pos2Parts.some((part) => LEXICAL_VERB_POS2.has(part));
}
function isStandaloneSuruTeGrammarHelper(token: MergedToken): boolean {
const normalizedSurface = normalizeKana(token.surface);
const normalizedHeadword = normalizeKana(token.headword);
@@ -404,7 +422,9 @@ function isStandaloneSuruTeGrammarHelper(token: MergedToken): boolean {
}
const pos1Parts = splitNormalizedTagParts(normalizePosTag(token.pos1));
return isKanaOnlyText(normalizedSurface) && (pos1Parts.length === 0 || pos1Parts.includes('動詞'));
return (
isKanaOnlyText(normalizedSurface) && (pos1Parts.length === 0 || pos1Parts.includes('動詞'))
);
}
function isStandaloneGrammarParticle(token: MergedToken): boolean {
@@ -518,6 +538,10 @@ export function shouldExcludeTokenFromSubtitleAnnotations(
return true;
}
if (isAuxiliaryOnlyHelperSpan(token)) {
return true;
}
if (isStandaloneSuruTeGrammarHelper(token)) {
return true;
}
@@ -162,6 +162,48 @@ test('mpv main event main deps wire subtitle callbacks without suppression gate'
assert.equal(typeof deps.setCurrentSubText, 'function');
});
test('mpv main event main deps treat managed playback as quit-on-disconnect', () => {
const deps = createBuildBindMpvMainEventHandlersMainDepsHandler({
appState: {
initialArgs: { managedPlayback: true },
overlayRuntimeInitialized: false,
mpvClient: null,
immersionTracker: null,
subtitleTimingTracker: null,
currentSubText: '',
currentSubAssText: '',
playbackPaused: null,
previousSecondarySubVisibility: false,
},
getQuitOnDisconnectArmed: () => true,
scheduleQuitCheck: () => {},
quitApp: () => {},
reportJellyfinRemoteStopped: () => {},
syncOverlayMpvSubtitleSuppression: () => {},
maybeRunAnilistPostWatchUpdate: async () => {},
logSubtitleTimingError: () => {},
broadcastToOverlayWindows: () => {},
onSubtitleChange: () => {},
ensureImmersionTrackerInitialized: () => {},
updateCurrentMediaPath: () => {},
restoreMpvSubVisibility: () => {},
resetSubtitleSidebarEmbeddedLayout: () => {},
getCurrentAnilistMediaKey: () => null,
resetAnilistMediaTracking: () => {},
maybeProbeAnilistDuration: () => {},
ensureAnilistMediaGuess: () => {},
syncImmersionMediaState: () => {},
updateCurrentMediaTitle: () => {},
resetAnilistMediaGuessState: () => {},
reportJellyfinRemoteProgress: () => {},
updateSubtitleRenderMetrics: () => {},
refreshDiscordPresence: () => {},
})();
assert.equal(deps.hasInitialPlaybackQuitOnDisconnectArg(), true);
assert.equal(deps.shouldQuitOnDisconnectWhenOverlayRuntimeInitialized(), true);
});
test('flushPlaybackPositionOnMediaPathClear ignores disconnected mpv time-pos reads', async () => {
const recorded: number[] = [];
const deps = createBuildBindMpvMainEventHandlersMainDepsHandler({
+11 -3
View File
@@ -2,7 +2,11 @@ import type { MergedToken, SubtitleData } from '../../types';
export function createBuildBindMpvMainEventHandlersMainDepsHandler(deps: {
appState: {
initialArgs?: { jellyfinPlay?: unknown; youtubePlay?: unknown } | null;
initialArgs?: {
jellyfinPlay?: unknown;
managedPlayback?: unknown;
youtubePlay?: unknown;
} | null;
overlayRuntimeInitialized: boolean;
mpvClient: {
connected?: boolean;
@@ -79,10 +83,14 @@ export function createBuildBindMpvMainEventHandlersMainDepsHandler(deps: {
reportJellyfinRemoteStopped: () => deps.reportJellyfinRemoteStopped(),
syncOverlayMpvSubtitleSuppression: () => deps.syncOverlayMpvSubtitleSuppression(),
hasInitialPlaybackQuitOnDisconnectArg: () =>
Boolean(deps.appState.initialArgs?.jellyfinPlay || deps.appState.initialArgs?.youtubePlay),
Boolean(
deps.appState.initialArgs?.managedPlayback ||
deps.appState.initialArgs?.jellyfinPlay ||
deps.appState.initialArgs?.youtubePlay,
),
isOverlayRuntimeInitialized: () => deps.appState.overlayRuntimeInitialized,
shouldQuitOnDisconnectWhenOverlayRuntimeInitialized: () =>
Boolean(deps.appState.initialArgs?.youtubePlay),
Boolean(deps.appState.initialArgs?.managedPlayback || deps.appState.initialArgs?.youtubePlay),
isQuitOnDisconnectArmed: () => deps.getQuitOnDisconnectArmed(),
scheduleQuitCheck: (callback: () => void) => deps.scheduleQuitCheck(callback),
isMpvConnected: () => Boolean(deps.appState.mpvClient?.connected),
@@ -36,14 +36,14 @@ function createHarness(options?: {
};
}
test('stats server routing defers to a live background daemon from another process', () => {
test('stats server routing ignores a live background daemon from another process', () => {
const { calls, handler } = createHarness({
state: { pid: 200, port: 7979, startedAtMs: 1 },
processAlive: true,
});
assert.deepEqual(handler(), { url: 'http://127.0.0.1:7979', source: 'foreign' });
assert.deepEqual(calls, ['readBackgroundState', 'isProcessAlive']);
assert.deepEqual(handler(), { url: 'http://127.0.0.1:6969', source: 'local' });
assert.deepEqual(calls, ['readBackgroundState', 'isProcessAlive', 'startLocalStatsServer']);
});
test('stats server routing clears dead daemon state and starts local server', () => {
+1 -5
View File
@@ -14,9 +14,7 @@ function formatStatsServerUrl(port: number): string {
return `http://127.0.0.1:${port}`;
}
export type EnsureStatsServerUrlResult =
| { url: string; source: 'foreign' }
| { url: string; source: 'local' };
export type EnsureStatsServerUrlResult = { url: string; source: 'local' };
export function createEnsureStatsServerUrlHandler(
deps: EnsureStatsServerUrlDeps,
@@ -29,8 +27,6 @@ export function createEnsureStatsServerUrlHandler(
deps.removeBackgroundState();
} else if (!deps.isProcessAlive(state.pid)) {
deps.removeBackgroundState();
} else if (state.pid !== deps.currentPid) {
return { url: formatStatsServerUrl(state.port), source: 'foreign' };
}
if (!deps.hasLocalStatsServer()) {