mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-05-26 00:55:16 -07:00
74 lines
2.2 KiB
TypeScript
74 lines
2.2 KiB
TypeScript
function isHiragana(ch: string): boolean {
|
|
const code = ch.charCodeAt(0);
|
|
return code >= 0x3040 && code <= 0x309f;
|
|
}
|
|
|
|
function isKatakana(ch: string): boolean {
|
|
const code = ch.charCodeAt(0);
|
|
return code >= 0x30a0 && code <= 0x30ff;
|
|
}
|
|
|
|
function katakanaToHiragana(text: string): string {
|
|
let result = '';
|
|
for (const ch of text) {
|
|
const code = ch.charCodeAt(0);
|
|
if (code >= 0x30a1 && code <= 0x30f6) {
|
|
result += String.fromCharCode(code - 0x60);
|
|
} else {
|
|
result += ch;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Reconstruct the full word reading from the surface form and the stored
|
|
* (possibly partial) reading.
|
|
*
|
|
* MeCab/Yomitan sometimes stores only the kanji portion's reading. For example,
|
|
* お前 (surface) with reading まえ — the stored reading covers only 前, missing
|
|
* the leading お. This function walks through the surface form: hiragana/katakana
|
|
* characters pass through as-is (converted to hiragana), and the remaining kanji
|
|
* portion is filled in from the stored reading.
|
|
*/
|
|
export function fullReading(headword: string, storedReading: string): string {
|
|
if (!storedReading || !headword) return storedReading || '';
|
|
|
|
const reading = katakanaToHiragana(storedReading);
|
|
|
|
const leadingKana: string[] = [];
|
|
const trailingKana: string[] = [];
|
|
const chars = [...headword];
|
|
|
|
let i = 0;
|
|
while (i < chars.length && (isHiragana(chars[i]) || isKatakana(chars[i]))) {
|
|
leadingKana.push(katakanaToHiragana(chars[i]));
|
|
i++;
|
|
}
|
|
|
|
if (i === chars.length) {
|
|
return reading;
|
|
}
|
|
|
|
let j = chars.length - 1;
|
|
while (j > i && (isHiragana(chars[j]) || isKatakana(chars[j]))) {
|
|
trailingKana.unshift(katakanaToHiragana(chars[j]));
|
|
j--;
|
|
}
|
|
|
|
// Strip matching trailing kana from the stored reading to get the core kanji reading
|
|
let coreReading = reading;
|
|
const trailStr = trailingKana.join('');
|
|
if (trailStr && coreReading.endsWith(trailStr)) {
|
|
coreReading = coreReading.slice(0, -trailStr.length);
|
|
}
|
|
|
|
// Strip matching leading kana from the stored reading if it already includes them
|
|
const leadStr = leadingKana.join('');
|
|
if (leadStr && coreReading.startsWith(leadStr)) {
|
|
return reading;
|
|
}
|
|
|
|
return leadStr + coreReading + trailStr;
|
|
}
|