fix: address CodeRabbit review comments

This commit is contained in:
2026-04-27 20:10:33 -07:00
parent c150fce782
commit 2fbc90cf3a
13 changed files with 226 additions and 54 deletions

View File

@@ -3086,6 +3086,27 @@ test('tokenizeSubtitle uses Yomitan word classes to classify standalone particle
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
});
test('tokenizeSubtitle uses Yomitan word classes to classify auxiliary subclasses', async () => {
const result = await tokenizeSubtitle(
'です',
makeDepsFromYomitanTokens(
[{ surface: 'です', reading: 'です', headword: 'です', wordClasses: ['aux-v'] }],
{
getFrequencyDictionaryEnabled: () => true,
getFrequencyRank: () => 10,
getJlptLevel: () => 'N5',
tokenizeWithMecab: async () => null,
},
),
);
assert.equal(result.tokens?.length, 1);
assert.equal(result.tokens?.[0]?.partOfSpeech, PartOfSpeech.bound_auxiliary);
assert.equal(result.tokens?.[0]?.pos1, '助動詞');
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
});
test('tokenizeSubtitle fills detailed MeCab POS when Yomitan word class supplies coarse POS', async () => {
const result = await tokenizeSubtitle(
'は',

View File

@@ -359,7 +359,7 @@ function resolvePartOfSpeechFromYomitanWordClasses(wordClasses: string[]): {
if (wordClasses.includes('prt')) {
return { partOfSpeech: PartOfSpeech.particle, pos1: '助詞' };
}
if (wordClasses.includes('aux')) {
if (wordClasses.some((wordClass) => wordClass === 'aux' || wordClass.startsWith('aux-'))) {
return { partOfSpeech: PartOfSpeech.bound_auxiliary, pos1: '助動詞' };
}
if (wordClasses.some((wordClass) => wordClass.startsWith('v'))) {

View File

@@ -713,6 +713,57 @@ test('annotateTokens N+1 sentence word count respects source punctuation gaps om
assert.equal(result[3]?.isNPlusOneTarget, false);
});
test('annotateTokens N+1 sentence word count normalizes line breaks before gap detection', () => {
const tokens = [
makeToken({
surface: '私',
headword: '私',
pos1: '名詞',
startPos: 0,
endPos: 1,
}),
makeToken({
surface: '猫',
headword: '猫',
pos1: '名詞',
startPos: 2,
endPos: 3,
}),
makeToken({
surface: '犬',
headword: '犬',
pos1: '名詞',
startPos: 3,
endPos: 4,
}),
makeToken({
surface: 'ふざけん',
headword: 'ふざける',
partOfSpeech: PartOfSpeech.verb,
pos1: '動詞',
pos2: '自立',
startPos: 5,
endPos: 9,
}),
];
const result = annotateTokens(
tokens,
makeDeps({
isKnownWord: (text) => text === '私' || text === '猫' || text === '犬',
}),
{
minSentenceWordsForNPlusOne: 3,
sourceText: '私\r\n猫犬ふざけんなよ',
},
);
assert.equal(result[0]?.isNPlusOneTarget, false);
assert.equal(result[1]?.isNPlusOneTarget, false);
assert.equal(result[2]?.isNPlusOneTarget, false);
assert.equal(result[3]?.isNPlusOneTarget, false);
});
test('annotateTokens applies configured pos1 exclusions to both frequency and N+1', () => {
const tokens = [
makeToken({