fix: exclude kana-only n+1 targets

This commit is contained in:
2026-04-26 19:21:59 -07:00
parent 077c852a08
commit 0855a7dfcc
5 changed files with 154 additions and 44 deletions
+23
View File
@@ -282,6 +282,26 @@ function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<strin
return parts.every((part) => exclusions.has(part));
}
function isKanaChar(char: string): boolean {
const code = char.codePointAt(0);
if (code === undefined) {
return false;
}
return (
(code >= 0x3041 && code <= 0x3096) ||
(code >= 0x309b && code <= 0x309f) ||
code === 0x30fc ||
(code >= 0x30a0 && code <= 0x30fa) ||
(code >= 0x30fd && code <= 0x30ff)
);
}
function isKanaOnlyText(text: string): boolean {
const normalized = text.trim();
return normalized.length > 0 && Array.from(normalized).every((char) => isKanaChar(char));
}
export function isNPlusOneCandidateToken(
token: MergedToken,
pos1Exclusions: ReadonlySet<string> = N_PLUS_ONE_IGNORED_POS1,
@@ -290,6 +310,9 @@ export function isNPlusOneCandidateToken(
if (token.isKnown) {
return false;
}
if (isKanaOnlyText(token.surface)) {
return false;
}
return isNPlusOneWordCountToken(token, pos1Exclusions, pos2Exclusions);
}