mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-20 12:11:28 -07:00
feat(stats): fix truncated readings and improve word detail UX
- fullReading() reconstructs full word reading from headword + partial stored reading - FrequencyRankTable always shows reading for every row - Word highlighted in example sentences with underline style - Bar chart clicks open word detail panel
This commit is contained in:
51
stats/src/lib/reading-utils.test.ts
Normal file
51
stats/src/lib/reading-utils.test.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { fullReading } from './reading-utils';
|
||||
|
||||
describe('fullReading', () => {
|
||||
it('prefixes leading hiragana from headword', () => {
|
||||
// お前 with reading まえ → おまえ
|
||||
expect(fullReading('お前', 'まえ')).toBe('おまえ');
|
||||
});
|
||||
|
||||
it('handles katakana stored readings', () => {
|
||||
// お前 with katakana reading マエ → おまえ
|
||||
expect(fullReading('お前', 'マエ')).toBe('おまえ');
|
||||
});
|
||||
|
||||
it('returns stored reading when it already includes leading kana', () => {
|
||||
// Reading already correct
|
||||
expect(fullReading('お前', 'おまえ')).toBe('おまえ');
|
||||
});
|
||||
|
||||
it('handles trailing hiragana', () => {
|
||||
// 隠す with reading かくす — す is trailing hiragana
|
||||
expect(fullReading('隠す', 'かくす')).toBe('かくす');
|
||||
});
|
||||
|
||||
it('handles pure kanji headwords', () => {
|
||||
expect(fullReading('様', 'さま')).toBe('さま');
|
||||
});
|
||||
|
||||
it('returns empty for empty reading', () => {
|
||||
expect(fullReading('前', '')).toBe('');
|
||||
});
|
||||
|
||||
it('returns empty for empty headword', () => {
|
||||
expect(fullReading('', 'まえ')).toBe('まえ');
|
||||
});
|
||||
|
||||
it('handles all-kana headword', () => {
|
||||
// Headword is already all hiragana
|
||||
expect(fullReading('いますぐ', 'いますぐ')).toBe('いますぐ');
|
||||
});
|
||||
|
||||
it('handles mixed leading and trailing kana', () => {
|
||||
// お気に入り: お=leading, に入り=trailing around 気
|
||||
expect(fullReading('お気に入り', 'きにいり')).toBe('おきにいり');
|
||||
});
|
||||
|
||||
it('handles katakana in headword', () => {
|
||||
// カズマ様 — leading katakana + kanji
|
||||
expect(fullReading('カズマ様', 'さま')).toBe('かずまさま');
|
||||
});
|
||||
});
|
||||
73
stats/src/lib/reading-utils.ts
Normal file
73
stats/src/lib/reading-utils.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
function isHiragana(ch: string): boolean {
|
||||
const code = ch.charCodeAt(0);
|
||||
return code >= 0x3040 && code <= 0x309f;
|
||||
}
|
||||
|
||||
function isKatakana(ch: string): boolean {
|
||||
const code = ch.charCodeAt(0);
|
||||
return code >= 0x30a0 && code <= 0x30ff;
|
||||
}
|
||||
|
||||
function katakanaToHiragana(text: string): string {
|
||||
let result = '';
|
||||
for (const ch of text) {
|
||||
const code = ch.charCodeAt(0);
|
||||
if (code >= 0x30a1 && code <= 0x30f6) {
|
||||
result += String.fromCharCode(code - 0x60);
|
||||
} else {
|
||||
result += ch;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconstruct the full word reading from the surface form and the stored
|
||||
* (possibly partial) reading.
|
||||
*
|
||||
* MeCab/Yomitan sometimes stores only the kanji portion's reading. For example,
|
||||
* お前 (surface) with reading まえ — the stored reading covers only 前, missing
|
||||
* the leading お. This function walks through the surface form: hiragana/katakana
|
||||
* characters pass through as-is (converted to hiragana), and the remaining kanji
|
||||
* portion is filled in from the stored reading.
|
||||
*/
|
||||
export function fullReading(headword: string, storedReading: string): string {
|
||||
if (!storedReading || !headword) return storedReading || '';
|
||||
|
||||
const reading = katakanaToHiragana(storedReading);
|
||||
|
||||
const leadingKana: string[] = [];
|
||||
const trailingKana: string[] = [];
|
||||
const chars = [...headword];
|
||||
|
||||
let i = 0;
|
||||
while (i < chars.length && (isHiragana(chars[i]) || isKatakana(chars[i]))) {
|
||||
leadingKana.push(katakanaToHiragana(chars[i]));
|
||||
i++;
|
||||
}
|
||||
|
||||
if (i === chars.length) {
|
||||
return reading;
|
||||
}
|
||||
|
||||
let j = chars.length - 1;
|
||||
while (j > i && (isHiragana(chars[j]) || isKatakana(chars[j]))) {
|
||||
trailingKana.unshift(katakanaToHiragana(chars[j]));
|
||||
j--;
|
||||
}
|
||||
|
||||
// Strip matching trailing kana from the stored reading to get the core kanji reading
|
||||
let coreReading = reading;
|
||||
const trailStr = trailingKana.join('');
|
||||
if (trailStr && coreReading.endsWith(trailStr)) {
|
||||
coreReading = coreReading.slice(0, -trailStr.length);
|
||||
}
|
||||
|
||||
// Strip matching leading kana from the stored reading if it already includes them
|
||||
const leadStr = leadingKana.join('');
|
||||
if (leadStr && coreReading.startsWith(leadStr)) {
|
||||
return reading;
|
||||
}
|
||||
|
||||
return leadStr + coreReading + trailStr;
|
||||
}
|
||||
Reference in New Issue
Block a user