Decouple stats daemon and preserve final mine OSD status

- Run `subminer stats -b` as a dedicated daemon process, independent from the overlay app
- Stop Anki progress spinner before showing final `✓`/`x` mine result so it is not overwritten
- Keep grammar/noise subtitle tokens hoverable while stripping annotation metadata
This commit is contained in:
2026-03-18 23:49:27 -07:00
parent 4d96ebf5c0
commit a954f62f55
32 changed files with 1879 additions and 78 deletions

View File

@@ -1,7 +1,12 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import { MergedToken, PartOfSpeech } from '../../../types';
import { annotateTokens, AnnotationStageDeps } from './annotation-stage';
import {
annotateTokens,
AnnotationStageDeps,
shouldExcludeTokenFromSubtitleAnnotations,
stripSubtitleAnnotationMetadata,
} from './annotation-stage';
function makeToken(overrides: Partial<MergedToken> = {}): MergedToken {
return {
@@ -150,6 +155,170 @@ test('annotateTokens handles JLPT disabled and eligibility exclusion paths', ()
assert.equal(excludedLookupCalls, 0);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes explanatory ending variants', () => {
const tokens = [
makeToken({
surface: 'んです',
headword: 'ん',
reading: 'ンデス',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'のだ',
headword: 'の',
reading: 'ノダ',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'んだ',
headword: 'ん',
reading: 'ンダ',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'のです',
headword: 'の',
reading: 'ノデス',
pos1: '名詞|助動詞',
pos2: '非自立',
}),
makeToken({
surface: 'なんです',
headword: 'だ',
reading: 'ナンデス',
pos1: '助動詞|名詞|助動詞',
pos2: '|非自立',
}),
makeToken({
surface: 'んでした',
headword: 'ん',
reading: 'ンデシタ',
pos1: '助動詞|助動詞|助動詞',
}),
makeToken({
surface: 'のでは',
headword: 'の',
reading: 'ノデハ',
pos1: '助詞|接続詞',
}),
];
for (const token of tokens) {
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
}
});
test('shouldExcludeTokenFromSubtitleAnnotations keeps lexical tokens outside explanatory ending family', () => {
const token = makeToken({
surface: '問題',
headword: '問題',
reading: 'モンダイ',
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
pos2: '一般',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone particles auxiliaries and adnominals', () => {
const tokens = [
makeToken({
surface: 'は',
headword: 'は',
reading: 'ハ',
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
}),
makeToken({
surface: 'です',
headword: 'です',
reading: 'デス',
partOfSpeech: PartOfSpeech.bound_auxiliary,
pos1: '助動詞',
}),
makeToken({
surface: 'この',
headword: 'この',
reading: 'コノ',
partOfSpeech: PartOfSpeech.other,
pos1: '連体詞',
}),
];
for (const token of tokens) {
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
}
});
test('shouldExcludeTokenFromSubtitleAnnotations keeps mixed content tokens with trailing helpers', () => {
const token = makeToken({
surface: '行きます',
headword: '行く',
reading: 'イキマス',
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞|助動詞',
pos2: '自立',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
});
test('shouldExcludeTokenFromSubtitleAnnotations excludes merged lexical tokens with trailing quote particles', () => {
const token = makeToken({
surface: 'どうしてもって',
headword: 'どうしても',
reading: 'ドウシテモッテ',
partOfSpeech: PartOfSpeech.other,
pos1: '副詞|助詞',
pos2: '一般|格助詞',
});
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
});
test('stripSubtitleAnnotationMetadata keeps token hover data while clearing annotation fields', () => {
const token = makeToken({
surface: 'は',
headword: 'は',
reading: 'ハ',
partOfSpeech: PartOfSpeech.particle,
pos1: '助詞',
isKnown: true,
isNPlusOneTarget: true,
isNameMatch: true,
jlptLevel: 'N5',
frequencyRank: 12,
});
assert.deepEqual(stripSubtitleAnnotationMetadata(token), {
...token,
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: false,
jlptLevel: undefined,
frequencyRank: undefined,
});
});
test('stripSubtitleAnnotationMetadata leaves content tokens unchanged', () => {
const token = makeToken({
surface: '猫',
headword: '猫',
reading: 'ネコ',
partOfSpeech: PartOfSpeech.noun,
pos1: '名詞',
isKnown: true,
jlptLevel: 'N5',
frequencyRank: 42,
});
assert.strictEqual(stripSubtitleAnnotationMetadata(token), token);
});
test('annotateTokens prioritizes name matches over n+1, frequency, and JLPT when enabled', () => {
let jlptLookupCalls = 0;
const tokens = [

View File

@@ -25,6 +25,45 @@ const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
'ふう',
'ほう',
]);
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES = ['ん', 'の', 'なん', 'なの'];
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES = [
'だ',
'です',
'でした',
'だった',
'では',
'じゃ',
'でしょう',
'だろう',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES = [
'',
'か',
'ね',
'よ',
'な',
'よね',
'かな',
'かね',
] as const;
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS = new Set(
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES.flatMap((prefix) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES.flatMap((core) =>
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_TRAILING_PARTICLES.map(
(particle) => `${prefix}${core}${particle}`,
),
),
),
);
const SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES = new Set([
'って',
'ってよ',
'ってね',
'ってな',
'ってさ',
'ってか',
'ってば',
]);
const jlptLevelLookupCaches = new WeakMap<
(text: string) => JlptLevel | null,
@@ -60,6 +99,7 @@ function normalizePos1Tag(pos1: string | undefined): string {
}
const SUBTITLE_ANNOTATION_EXCLUDED_POS1 = new Set(['感動詞']);
const SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1 = new Set(['助詞', '助動詞', '連体詞']);
function splitNormalizedTagParts(normalizedTag: string): string[] {
if (!normalizedTag) {
@@ -84,7 +124,36 @@ function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<strin
function isExcludedFromSubtitleAnnotationsByPos1(normalizedPos1: string): boolean {
const parts = splitNormalizedTagParts(normalizedPos1);
return parts.some((part) => SUBTITLE_ANNOTATION_EXCLUDED_POS1.has(part));
if (parts.some((part) => SUBTITLE_ANNOTATION_EXCLUDED_POS1.has(part))) {
return true;
}
return parts.length > 0 && parts.every((part) => SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1.has(part));
}
function isExcludedTrailingParticleMergedToken(token: MergedToken): boolean {
const normalizedSurface = normalizeJlptTextForExclusion(token.surface);
const normalizedHeadword = normalizeJlptTextForExclusion(token.headword);
if (!normalizedSurface || !normalizedHeadword || !normalizedSurface.startsWith(normalizedHeadword)) {
return false;
}
const suffix = normalizedSurface.slice(normalizedHeadword.length);
if (!SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES.has(suffix)) {
return false;
}
const pos1Parts = splitNormalizedTagParts(normalizePos1Tag(token.pos1));
if (pos1Parts.length < 2) {
return false;
}
const [leadingPos1, ...trailingPos1] = pos1Parts;
if (!leadingPos1 || SUBTITLE_ANNOTATION_GRAMMAR_ONLY_POS1.has(leadingPos1)) {
return false;
}
return trailingPos1.length > 0 && trailingPos1.every((part) => part === '助詞');
}
function resolvePos1Exclusions(options: AnnotationStageOptions): ReadonlySet<string> {
@@ -520,12 +589,7 @@ function isJlptEligibleToken(token: MergedToken): boolean {
}
function isExcludedFromSubtitleAnnotationsByTerm(token: MergedToken): boolean {
const candidates = [
resolveJlptLookupText(token),
token.surface,
token.headword,
token.reading,
].filter(
const candidates = [token.surface, token.reading, resolveJlptLookupText(token)].filter(
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
);
@@ -542,7 +606,9 @@ function isExcludedFromSubtitleAnnotationsByTerm(token: MergedToken): boolean {
if (
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(trimmedCandidate) ||
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(normalizedCandidate)
SUBTITLE_ANNOTATION_EXCLUDED_TERMS.has(normalizedCandidate) ||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(trimmedCandidate) ||
SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDINGS.has(normalizedCandidate)
) {
return true;
}
@@ -565,9 +631,28 @@ export function shouldExcludeTokenFromSubtitleAnnotations(token: MergedToken): b
return true;
}
if (isExcludedTrailingParticleMergedToken(token)) {
return true;
}
return isExcludedFromSubtitleAnnotationsByTerm(token);
}
export function stripSubtitleAnnotationMetadata(token: MergedToken): MergedToken {
if (!shouldExcludeTokenFromSubtitleAnnotations(token)) {
return token;
}
return {
...token,
isKnown: false,
isNPlusOneTarget: false,
isNameMatch: false,
jlptLevel: undefined,
frequencyRank: undefined,
};
}
function computeTokenKnownStatus(
token: MergedToken,
isKnownWord: (text: string) => boolean,