mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-05-25 12:55:18 -07:00
fix(tokenizer): preserve known-word highlight when POS filters suppress
- Known-word cache matches now set isKnown=true even for tokens excluded by POS filters - POS exclusion gate suppresses N+1, frequency, and JLPT only; known status is computed before the gate - Jellyfin subtitle preload continues after cleanup failures instead of aborting - Update config docs and option description to document the known-word bypass behavior
This commit is contained in:
@@ -523,7 +523,7 @@
|
||||
"highlightEnabled": false, // Enable fast local highlighting for words already known in Anki. Values: true | false
|
||||
"refreshMinutes": 1440, // Minutes between known-word cache refreshes.
|
||||
"addMinedWordsImmediately": true, // Immediately append newly mined card words into the known-word cache. Values: true | false
|
||||
"matchMode": "headword", // Known-word matching strategy for subtitle annotations. Values: headword | surface
|
||||
"matchMode": "headword", // Known-word matching strategy for subtitle annotations. Cache matches always receive known-word highlighting even when POS filters suppress other annotation types. Values: headword | surface
|
||||
"decks": {} // Decks and fields for known-word cache. Object mapping deck names to arrays of field names to extract, e.g. { "Kaishi 1.5k": ["Word", "Word Reading"] }.
|
||||
}, // Known words setting.
|
||||
"behavior": {
|
||||
|
||||
@@ -1045,6 +1045,7 @@ Known-word cache policy:
|
||||
- Cache state is persisted to `known-words-cache.json` under the app `userData` directory.
|
||||
- The cache is automatically invalidated when the configured scope changes (for example, when deck changes).
|
||||
- Cache lookups are in-memory. By default, token headwords are matched against cached `Expression` / `Word` values; set `ankiConnect.knownWords.matchMode` to `"surface"` for raw subtitle text matching.
|
||||
- A known-word cache match always receives known-word highlighting, even when part-of-speech filters suppress N+1, frequency, or JLPT annotations for that token.
|
||||
- Legacy moved keys under `ankiConnect.nPlusOne` (`highlightEnabled`, `refreshMinutes`, `matchMode`, `decks`, `knownWord`) and older `ankiConnect.behavior.nPlusOne*` keys are deprecated and only kept for backward compatibility.
|
||||
- Legacy top-level `ankiConnect` migration keys (for example `audioField`, `generateAudio`, `imageType`) are compatibility-only, validated before mapping, and ignored with a warning when invalid.
|
||||
- If AnkiConnect is unreachable, the cache remains in its previous state and an on-screen/system status message is shown.
|
||||
|
||||
@@ -523,7 +523,7 @@
|
||||
"highlightEnabled": false, // Enable fast local highlighting for words already known in Anki. Values: true | false
|
||||
"refreshMinutes": 1440, // Minutes between known-word cache refreshes.
|
||||
"addMinedWordsImmediately": true, // Immediately append newly mined card words into the known-word cache. Values: true | false
|
||||
"matchMode": "headword", // Known-word matching strategy for subtitle annotations. Values: headword | surface
|
||||
"matchMode": "headword", // Known-word matching strategy for subtitle annotations. Cache matches always receive known-word highlighting even when POS filters suppress other annotation types. Values: headword | surface
|
||||
"decks": {} // Decks and fields for known-word cache. Object mapping deck names to arrays of field names to extract, e.g. { "Kaishi 1.5k": ["Word", "Word Reading"] }.
|
||||
}, // Known words setting.
|
||||
"behavior": {
|
||||
|
||||
@@ -265,7 +265,8 @@ export function buildIntegrationConfigOptionRegistry(
|
||||
kind: 'enum',
|
||||
enumValues: ['headword', 'surface'],
|
||||
defaultValue: defaultConfig.ankiConnect.knownWords.matchMode,
|
||||
description: 'Known-word matching strategy for subtitle annotations.',
|
||||
description:
|
||||
'Known-word matching strategy for subtitle annotations. Cache matches always receive known-word highlighting even when POS filters suppress other annotation types.',
|
||||
},
|
||||
{
|
||||
path: 'ankiConnect.knownWords.highlightEnabled',
|
||||
|
||||
@@ -129,7 +129,7 @@ test('tokenizeSubtitle splits same-line grammar endings before applying annotati
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, 'N5');
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, 40);
|
||||
assert.equal(result.tokens?.[1]?.surface, 'です');
|
||||
assert.equal(result.tokens?.[1]?.isKnown, false);
|
||||
assert.equal(result.tokens?.[1]?.isKnown, true);
|
||||
assert.equal(result.tokens?.[1]?.isNPlusOneTarget, false);
|
||||
assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
|
||||
assert.equal(result.tokens?.[1]?.jlptLevel, undefined);
|
||||
@@ -3365,7 +3365,7 @@ test('tokenizeSubtitle excludes default non-independent pos2 from N+1 and freque
|
||||
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle clears known-word highlight for exact non-independent kanji noun tokens', async () => {
|
||||
test('tokenizeSubtitle keeps known-word highlight for exact non-independent kanji noun tokens', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'その点',
|
||||
makeDepsFromYomitanTokens(
|
||||
@@ -3413,7 +3413,7 @@ test('tokenizeSubtitle clears known-word highlight for exact non-independent kan
|
||||
assert.equal(result.tokens?.length, 2);
|
||||
assert.equal(result.tokens?.[0]?.isKnown, false);
|
||||
assert.equal(result.tokens?.[1]?.surface, '点');
|
||||
assert.equal(result.tokens?.[1]?.isKnown, false);
|
||||
assert.equal(result.tokens?.[1]?.isKnown, true);
|
||||
assert.equal(result.tokens?.[1]?.isNPlusOneTarget, false);
|
||||
assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
|
||||
assert.equal(result.tokens?.[1]?.jlptLevel, undefined);
|
||||
@@ -4028,7 +4028,7 @@ test('tokenizeSubtitle clears all annotations for kana-only demonstrative helper
|
||||
{
|
||||
surface: 'これで',
|
||||
headword: 'これ',
|
||||
isKnown: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
frequencyRank: undefined,
|
||||
jlptLevel: undefined,
|
||||
@@ -4143,7 +4143,7 @@ test('tokenizeSubtitle clears all annotations for explanatory pondering endings'
|
||||
{
|
||||
surface: 'のかな',
|
||||
headword: 'の',
|
||||
isKnown: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
frequencyRank: undefined,
|
||||
jlptLevel: undefined,
|
||||
@@ -4672,7 +4672,7 @@ test('tokenizeSubtitle clears annotations for ja-nai explanatory endings and aru
|
||||
{
|
||||
surface: 'ある',
|
||||
headword: 'ある',
|
||||
isKnown: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
frequencyRank: undefined,
|
||||
jlptLevel: undefined,
|
||||
@@ -4717,7 +4717,7 @@ test('tokenizeSubtitle clears annotations for standalone polite copula endings w
|
||||
{
|
||||
surface: 'ですよ',
|
||||
headword: 'です',
|
||||
isKnown: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
frequencyRank: undefined,
|
||||
jlptLevel: undefined,
|
||||
@@ -5044,7 +5044,7 @@ test('tokenizeSubtitle clears annotations for auxiliary inflection fragments whi
|
||||
{
|
||||
surface: 'れた',
|
||||
headword: 'れる',
|
||||
isKnown: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
frequencyRank: undefined,
|
||||
jlptLevel: undefined,
|
||||
@@ -5181,7 +5181,7 @@ test('tokenizeSubtitle clears annotations for te-kureru auxiliary helper spans',
|
||||
{
|
||||
surface: 'てく',
|
||||
headword: 'てく',
|
||||
isKnown: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
frequencyRank: undefined,
|
||||
jlptLevel: undefined,
|
||||
@@ -5192,7 +5192,7 @@ test('tokenizeSubtitle clears annotations for te-kureru auxiliary helper spans',
|
||||
{
|
||||
surface: 'れた',
|
||||
headword: 'れる',
|
||||
isKnown: false,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: false,
|
||||
frequencyRank: undefined,
|
||||
jlptLevel: undefined,
|
||||
|
||||
@@ -425,6 +425,21 @@ test('shouldExcludeTokenFromSubtitleAnnotations keeps lexical tokens outside exp
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), false);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations still excludes lexical non-independent kanji nouns from non-known annotations', () => {
|
||||
const token = makeToken({
|
||||
surface: '以外',
|
||||
headword: '以外',
|
||||
reading: 'イガイ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '非自立',
|
||||
pos3: '副詞可能',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
assert.equal(shouldExcludeTokenFromVocabularyPersistence(token), true);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone particles auxiliaries and adnominals', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
@@ -971,8 +986,8 @@ test('annotateTokens N+1 minimum sentence words counts only eligible word tokens
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[1]?.isKnown, false);
|
||||
assert.equal(result[2]?.isKnown, false);
|
||||
assert.equal(result[1]?.isKnown, true);
|
||||
assert.equal(result[2]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
@@ -1186,7 +1201,7 @@ test('annotateTokens excludes default non-independent pos2 from frequency and N+
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens clears known-word status for non-independent kanji noun tokens', () => {
|
||||
test('annotateTokens keeps known-word status for non-independent kanji noun tokens', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '点',
|
||||
@@ -1211,12 +1226,41 @@ test('annotateTokens clears known-word status for non-independent kanji noun tok
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens keeps known-word status for lexical non-independent kanji nouns', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '以外',
|
||||
reading: 'イガイ',
|
||||
headword: '以外',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '非自立',
|
||||
pos3: '副詞可能',
|
||||
startPos: 2,
|
||||
endPos: 4,
|
||||
frequencyRank: 437,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === '以外',
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations for non-independent kanji noun tokens under unified gate', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
@@ -1401,7 +1445,7 @@ test('annotateTokens excludes composite tokens when all component pos tags are e
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens applies one shared exclusion gate across known N+1 frequency and JLPT', () => {
|
||||
test('annotateTokens lets known words bypass the shared exclusion gate for known status only', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'これで',
|
||||
@@ -1425,13 +1469,13 @@ test('annotateTokens applies one shared exclusion gate across known N+1 frequenc
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status and other annotations for kana-only non-independent noun helper merges', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations for kana-only non-independent noun helper merges', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'ことに',
|
||||
@@ -1455,13 +1499,13 @@ test('annotateTokens clears known status and other annotations for kana-only non
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status and other annotations for standalone auxiliary inflection fragments', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations for standalone auxiliary inflection fragments', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'れる',
|
||||
@@ -1497,14 +1541,14 @@ test('annotateTokens clears known status and other annotations for standalone au
|
||||
);
|
||||
|
||||
for (const token of result) {
|
||||
assert.equal(token.isKnown, false, token.surface);
|
||||
assert.equal(token.isKnown, true, token.surface);
|
||||
assert.equal(token.isNPlusOneTarget, false, token.surface);
|
||||
assert.equal(token.frequencyRank, undefined, token.surface);
|
||||
assert.equal(token.jlptLevel, undefined, token.surface);
|
||||
}
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status and other annotations for auxiliary-only te-kureru helper spans', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations for auxiliary-only te-kureru helper spans', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'てく',
|
||||
@@ -1540,7 +1584,7 @@ test('annotateTokens clears known status and other annotations for auxiliary-onl
|
||||
);
|
||||
|
||||
for (const token of result) {
|
||||
assert.equal(token.isKnown, false, token.surface);
|
||||
assert.equal(token.isKnown, true, token.surface);
|
||||
assert.equal(token.isNPlusOneTarget, false, token.surface);
|
||||
assert.equal(token.frequencyRank, undefined, token.surface);
|
||||
assert.equal(token.jlptLevel, undefined, token.surface);
|
||||
@@ -1576,7 +1620,7 @@ test('annotateTokens keeps lexical くれる forms eligible for annotation', ()
|
||||
assert.equal(result[0]?.jlptLevel, 'N4');
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status and other annotations for standalone して helper fragments', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations for standalone して helper fragments', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'してる',
|
||||
@@ -1600,13 +1644,13 @@ test('annotateTokens clears known status and other annotations for standalone
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status and other annotations for standalone particle fragments without POS tags', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations for standalone particle fragments without POS tags', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'と',
|
||||
@@ -1630,13 +1674,13 @@ test('annotateTokens clears known status and other annotations for standalone pa
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status from standalone particles even when the known-word cache contains them', () => {
|
||||
test('annotateTokens keeps known status on standalone particles when the known-word cache contains them', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'に',
|
||||
@@ -1671,7 +1715,7 @@ test('annotateTokens clears known status from standalone particles even when the
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
@@ -1728,7 +1772,7 @@ test('annotateTokens does not mark standalone connective particles as N+1', () =
|
||||
assert.equal(result[1]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status and other annotations for rhetorical もんか grammar particle phrases', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations for rhetorical もんか grammar particle phrases', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'もんか',
|
||||
@@ -1752,13 +1796,13 @@ test('annotateTokens clears known status and other annotations for rhetorical
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status and other annotations for bare くれ auxiliary fragments', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations for bare くれ auxiliary fragments', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'くれ',
|
||||
@@ -1782,13 +1826,13 @@ test('annotateTokens clears known status and other annotations for bare くれ a
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status and other annotations for aru existence verbs', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations for aru existence verbs', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '有る',
|
||||
@@ -1818,14 +1862,14 @@ test('annotateTokens clears known status and other annotations for aru existence
|
||||
|
||||
assert.equal(result[0]?.surface, '有る');
|
||||
assert.equal(result[0]?.headword, '有る');
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.isNameMatch, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status and other annotations for standalone quote particle and auxiliary grammar terms', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations for standalone quote particle and auxiliary grammar terms', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'って',
|
||||
@@ -1861,14 +1905,14 @@ test('annotateTokens clears known status and other annotations for standalone qu
|
||||
);
|
||||
|
||||
for (const token of result) {
|
||||
assert.equal(token.isKnown, false, token.surface);
|
||||
assert.equal(token.isKnown, true, token.surface);
|
||||
assert.equal(token.isNPlusOneTarget, false, token.surface);
|
||||
assert.equal(token.frequencyRank, undefined, token.surface);
|
||||
assert.equal(token.jlptLevel, undefined, token.surface);
|
||||
}
|
||||
});
|
||||
|
||||
test('annotateTokens clears known status and other annotations from standalone あ interjections without POS tags', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations from standalone あ interjections without POS tags', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'あ',
|
||||
@@ -1898,13 +1942,13 @@ test('annotateTokens clears known status and other annotations from standalone
|
||||
assert.equal(result[0]?.surface, 'あ');
|
||||
assert.equal(result[0]?.headword, 'あ');
|
||||
assert.equal(result[0]?.reading, 'あ');
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations from expressive subtitle interjections without POS tags', () => {
|
||||
test('annotateTokens keeps known status while clearing other annotations from expressive subtitle interjections without POS tags', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'ハァ',
|
||||
@@ -1960,7 +2004,7 @@ test('annotateTokens clears all annotations from expressive subtitle interjectio
|
||||
);
|
||||
|
||||
for (const token of result.slice(0, 2)) {
|
||||
assert.equal(token.isKnown, false, token.surface);
|
||||
assert.equal(token.isKnown, true, token.surface);
|
||||
assert.equal(token.isNPlusOneTarget, false, token.surface);
|
||||
assert.equal(token.frequencyRank, undefined, token.surface);
|
||||
assert.equal(token.jlptLevel, undefined, token.surface);
|
||||
|
||||
@@ -680,6 +680,11 @@ export function annotateTokens(
|
||||
|
||||
// Single pass: compute known word status, frequency filtering, and JLPT level together
|
||||
const annotated = tokens.map((token, index) => {
|
||||
const isKnownForMatching = shouldComputeKnownStatus
|
||||
? computeTokenKnownStatus(token, deps.isKnownWord, deps.knownWordMatchMode)
|
||||
: false;
|
||||
nPlusOneKnownStatuses[index] = isKnownForMatching;
|
||||
|
||||
if (
|
||||
sharedShouldExcludeTokenFromSubtitleAnnotations(token, {
|
||||
pos1Exclusions,
|
||||
@@ -690,18 +695,13 @@ export function annotateTokens(
|
||||
pos1Exclusions,
|
||||
pos2Exclusions,
|
||||
});
|
||||
nPlusOneKnownStatuses[index] = false;
|
||||
return {
|
||||
...strippedToken,
|
||||
isKnown: false,
|
||||
isKnown: knownWordsEnabled ? isKnownForMatching : false,
|
||||
};
|
||||
}
|
||||
|
||||
const prioritizedNameMatch = nameMatchEnabled && token.isNameMatch === true;
|
||||
const isKnownForMatching = shouldComputeKnownStatus
|
||||
? computeTokenKnownStatus(token, deps.isKnownWord, deps.knownWordMatchMode)
|
||||
: false;
|
||||
nPlusOneKnownStatuses[index] = isKnownForMatching;
|
||||
|
||||
const frequencyRank =
|
||||
frequencyEnabled && !prioritizedNameMatch
|
||||
|
||||
@@ -331,7 +331,8 @@ test('preload jellyfin subtitles cleans previous cached subtitles before a new p
|
||||
assert.deepEqual(cleanupCalls, [['/tmp/subminer-jellyfin-subtitles-0']]);
|
||||
});
|
||||
|
||||
test('preload jellyfin subtitles logs cleanup failures without rejecting', async () => {
|
||||
test('preload jellyfin subtitles continues after cleanup failures', async () => {
|
||||
const commands: Array<Array<string | number>> = [];
|
||||
const logs: string[] = [];
|
||||
let cleanupShouldFail = false;
|
||||
const preload = createPreloadJellyfinExternalSubtitlesHandler(
|
||||
@@ -344,6 +345,7 @@ test('preload jellyfin subtitles logs cleanup failures without rejecting', async
|
||||
path: `/tmp/subminer-jellyfin-subtitles-${track.index}/track.srt`,
|
||||
cleanupDir: `/tmp/subminer-jellyfin-subtitles-${track.index}`,
|
||||
}),
|
||||
sendMpvCommand: (command) => commands.push(command),
|
||||
cleanupCachedSubtitles: () => {
|
||||
if (cleanupShouldFail) {
|
||||
throw new Error('cleanup failed');
|
||||
@@ -357,7 +359,14 @@ test('preload jellyfin subtitles logs cleanup failures without rejecting', async
|
||||
cleanupShouldFail = true;
|
||||
await assert.doesNotReject(() => preload({ session, clientInfo, itemId: 'item-2' }));
|
||||
|
||||
assert.deepEqual(logs, ['Failed to preload Jellyfin external subtitles']);
|
||||
assert.deepEqual(logs, ['Failed to cleanup Jellyfin cached subtitles']);
|
||||
assert.deepEqual(
|
||||
commands.filter((command) => command[0] === 'sub-add'),
|
||||
[
|
||||
['sub-add', '/tmp/subminer-jellyfin-subtitles-0/track.srt', 'auto', 'English', 'eng'],
|
||||
['sub-add', '/tmp/subminer-jellyfin-subtitles-0/track.srt', 'auto', 'English', 'eng'],
|
||||
],
|
||||
);
|
||||
});
|
||||
|
||||
test('preload jellyfin subtitles serializes overlapping preload runs', async () => {
|
||||
|
||||
@@ -246,7 +246,11 @@ export function createPreloadJellyfinExternalSubtitlesHandler(deps: {
|
||||
itemId: string;
|
||||
}): Promise<void> => {
|
||||
try {
|
||||
cleanupActiveCache();
|
||||
try {
|
||||
cleanupActiveCache();
|
||||
} catch (error) {
|
||||
deps.logDebug('Failed to cleanup Jellyfin cached subtitles', error);
|
||||
}
|
||||
const tracks = await deps.listJellyfinSubtitleTracks(
|
||||
params.session,
|
||||
params.clientInfo,
|
||||
|
||||
Reference in New Issue
Block a user