mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-04-26 16:19:26 -07:00
fix: exclude standalone interjection annotations
This commit is contained in:
@@ -812,3 +812,39 @@ test('annotateTokens applies one shared exclusion gate across known N+1 frequenc
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations from standalone あ interjections without POS tags', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'あ',
|
||||
headword: 'あ',
|
||||
reading: 'あ',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
isKnown: true,
|
||||
isNPlusOneTarget: true,
|
||||
frequencyRank: 522,
|
||||
jlptLevel: 'N5',
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'あ',
|
||||
getJlptLevel: (text) => (text === 'あ' ? 'N5' : null),
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.surface, 'あ');
|
||||
assert.equal(result[0]?.headword, 'あ');
|
||||
assert.equal(result[0]?.reading, 'あ');
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
@@ -14,6 +14,7 @@ const KATAKANA_CODEPOINT_START = 0x30a1;
|
||||
const KATAKANA_CODEPOINT_END = 0x30f6;
|
||||
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
|
||||
'あ',
|
||||
'ああ',
|
||||
'ええ',
|
||||
'うう',
|
||||
|
||||
Reference in New Issue
Block a user