diff --git a/stats/src/components/vocabulary/FrequencyRankTable.tsx b/stats/src/components/vocabulary/FrequencyRankTable.tsx index 519aaec..3b3ff93 100644 --- a/stats/src/components/vocabulary/FrequencyRankTable.tsx +++ b/stats/src/components/vocabulary/FrequencyRankTable.tsx @@ -1,5 +1,6 @@ import { useMemo, useState } from 'react'; import { PosBadge } from './pos-helpers'; +import { fullReading } from '../../lib/reading-utils'; import type { VocabularyEntry } from '../../types/stats'; interface FrequencyRankTableProps { @@ -13,11 +14,12 @@ const PAGE_SIZE = 25; export function FrequencyRankTable({ words, knownWords, onSelectWord }: FrequencyRankTableProps) { const [page, setPage] = useState(0); const [hideKnown, setHideKnown] = useState(true); + const [collapsed, setCollapsed] = useState(false); const hasKnownData = knownWords.size > 0; const isWordKnown = (w: VocabularyEntry): boolean => { - return knownWords.has(w.headword) || knownWords.has(w.word) || knownWords.has(w.reading); + return knownWords.has(w.headword) || knownWords.has(w.word); }; const ranked = useMemo(() => { @@ -25,7 +27,28 @@ export function FrequencyRankTable({ words, knownWords, onSelectWord }: Frequenc if (hideKnown && hasKnownData) { filtered = filtered.filter((w) => !isWordKnown(w)); } - return filtered.sort((a, b) => a.frequencyRank! - b.frequencyRank!); + + const byHeadword = new Map(); + for (const w of filtered) { + const existing = byHeadword.get(w.headword); + if (!existing) { + byHeadword.set(w.headword, { ...w }); + } else { + existing.frequency += w.frequency; + existing.animeCount = Math.max(existing.animeCount, w.animeCount); + if (w.frequencyRank! < existing.frequencyRank!) { + existing.frequencyRank = w.frequencyRank; + } + if (!existing.reading && w.reading) { + existing.reading = w.reading; + } + if (!existing.partOfSpeech && w.partOfSpeech) { + existing.partOfSpeech = w.partOfSpeech; + } + } + } + + return [...byHeadword.values()].sort((a, b) => a.frequencyRank! - b.frequencyRank!); }, [words, knownWords, hideKnown, hasKnownData]); if (words.every((w) => w.frequencyRank == null)) { @@ -44,10 +67,15 @@ export function FrequencyRankTable({ words, knownWords, onSelectWord }: Frequenc return (
-
-

+
+

+
{hasKnownData && (
- {ranked.length === 0 ? ( -
+ {collapsed ? null : ranked.length === 0 ? ( +
{hideKnown ? 'All ranked words are already in Anki!' : 'No words with frequency data.'}
) : ( <> -
+
@@ -98,7 +126,7 @@ export function FrequencyRankTable({ words, knownWords, onSelectWord }: Frequenc {w.headword}
- {w.reading !== w.headword ? w.reading : ''} + {fullReading(w.headword, w.reading) || w.headword} {w.partOfSpeech && } diff --git a/stats/src/components/vocabulary/WordDetailPanel.tsx b/stats/src/components/vocabulary/WordDetailPanel.tsx index 9536a3f..af3017b 100644 --- a/stats/src/components/vocabulary/WordDetailPanel.tsx +++ b/stats/src/components/vocabulary/WordDetailPanel.tsx @@ -1,17 +1,39 @@ -import { useRef, useState } from 'react'; +import { useRef, useState, useEffect } from 'react'; import { useWordDetail } from '../../hooks/useWordDetail'; import { apiClient } from '../../lib/api-client'; import { formatNumber, formatRelativeDate } from '../../lib/formatters'; +import { fullReading } from '../../lib/reading-utils'; import type { VocabularyOccurrenceEntry } from '../../types/stats'; import { PosBadge } from './pos-helpers'; -const OCCURRENCES_PAGE_SIZE = 50; +const INITIAL_PAGE_SIZE = 5; +const LOAD_MORE_SIZE = 10; + +type MineStatus = { loading?: boolean; success?: boolean; error?: string }; interface WordDetailPanelProps { wordId: number | null; onClose: () => void; onSelectWord?: (wordId: number) => void; onNavigateToAnime?: (animeId: number) => void; + isExcluded?: (w: { headword: string; word: string; reading: string }) => boolean; + onToggleExclusion?: (w: { headword: string; word: string; reading: string }) => void; +} + +function highlightWord(text: string, words: string[]): React.ReactNode { + const needles = words.filter(Boolean); + if (needles.length === 0) return text; + + const escaped = needles.map(w => w.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')); + const pattern = new RegExp(`(${escaped.join('|')})`, 'g'); + const parts = text.split(pattern); + const needleSet = new Set(needles); + + return parts.map((part, i) => + needleSet.has(part) + ? {part} + : part + ); } function formatSegment(ms: number | null): string { @@ -22,7 +44,7 @@ function formatSegment(ms: number | null): string { return `${minutes}:${String(seconds).padStart(2, '0')}`; } -export function WordDetailPanel({ wordId, onClose, onSelectWord, onNavigateToAnime }: WordDetailPanelProps) { +export function WordDetailPanel({ wordId, onClose, onSelectWord, onNavigateToAnime, isExcluded, onToggleExclusion }: WordDetailPanelProps) { const { data, loading, error } = useWordDetail(wordId); const [occurrences, setOccurrences] = useState([]); const [occLoading, setOccLoading] = useState(false); @@ -30,11 +52,23 @@ export function WordDetailPanel({ wordId, onClose, onSelectWord, onNavigateToAni const [occError, setOccError] = useState(null); const [hasMore, setHasMore] = useState(false); const [occLoaded, setOccLoaded] = useState(false); + const [mineStatus, setMineStatus] = useState>({}); const requestIdRef = useRef(0); + useEffect(() => { + setOccurrences([]); + setOccLoaded(false); + setOccLoading(false); + setOccLoadingMore(false); + setOccError(null); + setHasMore(false); + setMineStatus({}); + requestIdRef.current++; + }, [wordId]); + if (wordId === null) return null; - const loadOccurrences = async (detail: NonNullable['detail'], offset: number, append: boolean) => { + const loadOccurrences = async (detail: NonNullable['detail'], offset: number, limit: number, append: boolean) => { const reqId = ++requestIdRef.current; if (append) { setOccLoadingMore(true); @@ -45,11 +79,11 @@ export function WordDetailPanel({ wordId, onClose, onSelectWord, onNavigateToAni try { const rows = await apiClient.getWordOccurrences( detail.headword, detail.word, detail.reading, - OCCURRENCES_PAGE_SIZE, offset, + limit, offset, ); if (reqId !== requestIdRef.current) return; setOccurrences(prev => append ? [...prev, ...rows] : rows); - setHasMore(rows.length === OCCURRENCES_PAGE_SIZE); + setHasMore(rows.length === limit); } catch (err) { if (reqId !== requestIdRef.current) return; setOccError(err instanceof Error ? err.message : String(err)); @@ -67,12 +101,44 @@ export function WordDetailPanel({ wordId, onClose, onSelectWord, onNavigateToAni const handleShowOccurrences = () => { if (!data) return; - void loadOccurrences(data.detail, 0, false); + void loadOccurrences(data.detail, 0, INITIAL_PAGE_SIZE, false); }; const handleLoadMore = () => { if (!data || occLoadingMore || !hasMore) return; - void loadOccurrences(data.detail, occurrences.length, true); + void loadOccurrences(data.detail, occurrences.length, LOAD_MORE_SIZE, true); + }; + + const handleMine = async (occ: VocabularyOccurrenceEntry, mode: 'word' | 'sentence' | 'audio') => { + const key = `${occ.sessionId}-${occ.lineIndex}-${occ.segmentStartMs}-${mode}`; + setMineStatus(prev => ({ ...prev, [key]: { loading: true } })); + try { + const result = await apiClient.mineCard({ + sourcePath: occ.sourcePath!, + startMs: occ.segmentStartMs!, + endMs: occ.segmentEndMs!, + sentence: occ.text, + word: data!.detail.headword, + secondaryText: occ.secondaryText, + videoTitle: occ.videoTitle, + mode, + }); + if (result.error) { + setMineStatus(prev => ({ ...prev, [key]: { error: result.error } })); + } else { + setMineStatus(prev => ({ ...prev, [key]: { success: true } })); + const label = mode === 'audio' ? 'Audio card' : mode === 'word' ? data!.detail.headword : occ.text.slice(0, 30); + if (typeof Notification !== 'undefined' && Notification.permission === 'granted') { + new Notification('Anki Card Created', { body: `Mined: ${label}`, icon: '/favicon.png' }); + } else if (typeof Notification !== 'undefined' && Notification.permission !== 'denied') { + Notification.requestPermission().then(p => { + if (p === 'granted') new Notification('Anki Card Created', { body: `Mined: ${label}` }); + }); + } + } + } catch (err) { + setMineStatus(prev => ({ ...prev, [key]: { error: err instanceof Error ? err.message : String(err) } })); + } }; return ( @@ -93,7 +159,7 @@ export function WordDetailPanel({ wordId, onClose, onSelectWord, onNavigateToAni {data && ( <>

{data.detail.headword}

-
{data.detail.reading || data.detail.word}
+
{fullReading(data.detail.headword, data.detail.reading) || data.detail.word}
{data.detail.partOfSpeech && } {data.detail.pos1 && data.detail.pos1 !== data.detail.partOfSpeech && ( @@ -109,13 +175,28 @@ export function WordDetailPanel({ wordId, onClose, onSelectWord, onNavigateToAni )}
- +
+ {data && onToggleExclusion && ( + + )} + +
@@ -190,7 +271,7 @@ export function WordDetailPanel({ wordId, onClose, onSelectWord, onNavigateToAni {occLoading &&
Loading occurrences...
} {occError &&
Error: {occError}
} {occLoaded && !occLoading && occurrences.length === 0 && ( -
No occurrences tracked yet.
+
No example lines tracked yet. Lines are stored for sessions recorded after the subtitle tracking update.
)} {occurrences.length > 0 && (
@@ -212,33 +293,71 @@ export function WordDetailPanel({ wordId, onClose, onSelectWord, onNavigateToAni {formatNumber(occ.occurrenceCount)} in line
-
- {formatSegment(occ.segmentStartMs)}-{formatSegment(occ.segmentEndMs)} · session {occ.sessionId} +
+ {formatSegment(occ.segmentStartMs)}-{formatSegment(occ.segmentEndMs)} · session {occ.sessionId} + {occ.sourcePath && occ.segmentStartMs != null && occ.segmentEndMs != null && (() => { + const baseKey = `${occ.sessionId}-${occ.lineIndex}-${occ.segmentStartMs}`; + const wordStatus = mineStatus[`${baseKey}-word`]; + const sentenceStatus = mineStatus[`${baseKey}-sentence`]; + const audioStatus = mineStatus[`${baseKey}-audio`]; + return ( + <> + + + + + ); + })()}
+ {(() => { + const baseKey = `${occ.sessionId}-${occ.lineIndex}-${occ.segmentStartMs}`; + const errors = ['word', 'sentence', 'audio'] + .map(m => mineStatus[`${baseKey}-${m}`]?.error) + .filter(Boolean); + return errors.length > 0 ?
{errors[0]}
: null; + })()}

- {occ.text} + {highlightWord(occ.text, [data!.detail.headword, data!.detail.word])}

))} + {hasMore && ( + + )}
)} )} - - {occLoaded && !occLoading && !occError && hasMore && ( -
- -
- )} diff --git a/stats/src/lib/reading-utils.test.ts b/stats/src/lib/reading-utils.test.ts new file mode 100644 index 0000000..80f6b78 --- /dev/null +++ b/stats/src/lib/reading-utils.test.ts @@ -0,0 +1,51 @@ +import { describe, it, expect } from 'vitest'; +import { fullReading } from './reading-utils'; + +describe('fullReading', () => { + it('prefixes leading hiragana from headword', () => { + // お前 with reading まえ → おまえ + expect(fullReading('お前', 'まえ')).toBe('おまえ'); + }); + + it('handles katakana stored readings', () => { + // お前 with katakana reading マエ → おまえ + expect(fullReading('お前', 'マエ')).toBe('おまえ'); + }); + + it('returns stored reading when it already includes leading kana', () => { + // Reading already correct + expect(fullReading('お前', 'おまえ')).toBe('おまえ'); + }); + + it('handles trailing hiragana', () => { + // 隠す with reading かくす — す is trailing hiragana + expect(fullReading('隠す', 'かくす')).toBe('かくす'); + }); + + it('handles pure kanji headwords', () => { + expect(fullReading('様', 'さま')).toBe('さま'); + }); + + it('returns empty for empty reading', () => { + expect(fullReading('前', '')).toBe(''); + }); + + it('returns empty for empty headword', () => { + expect(fullReading('', 'まえ')).toBe('まえ'); + }); + + it('handles all-kana headword', () => { + // Headword is already all hiragana + expect(fullReading('いますぐ', 'いますぐ')).toBe('いますぐ'); + }); + + it('handles mixed leading and trailing kana', () => { + // お気に入り: お=leading, に入り=trailing around 気 + expect(fullReading('お気に入り', 'きにいり')).toBe('おきにいり'); + }); + + it('handles katakana in headword', () => { + // カズマ様 — leading katakana + kanji + expect(fullReading('カズマ様', 'さま')).toBe('かずまさま'); + }); +}); diff --git a/stats/src/lib/reading-utils.ts b/stats/src/lib/reading-utils.ts new file mode 100644 index 0000000..6edcee4 --- /dev/null +++ b/stats/src/lib/reading-utils.ts @@ -0,0 +1,73 @@ +function isHiragana(ch: string): boolean { + const code = ch.charCodeAt(0); + return code >= 0x3040 && code <= 0x309f; +} + +function isKatakana(ch: string): boolean { + const code = ch.charCodeAt(0); + return code >= 0x30a0 && code <= 0x30ff; +} + +function katakanaToHiragana(text: string): string { + let result = ''; + for (const ch of text) { + const code = ch.charCodeAt(0); + if (code >= 0x30a1 && code <= 0x30f6) { + result += String.fromCharCode(code - 0x60); + } else { + result += ch; + } + } + return result; +} + +/** + * Reconstruct the full word reading from the surface form and the stored + * (possibly partial) reading. + * + * MeCab/Yomitan sometimes stores only the kanji portion's reading. For example, + * お前 (surface) with reading まえ — the stored reading covers only 前, missing + * the leading お. This function walks through the surface form: hiragana/katakana + * characters pass through as-is (converted to hiragana), and the remaining kanji + * portion is filled in from the stored reading. + */ +export function fullReading(headword: string, storedReading: string): string { + if (!storedReading || !headword) return storedReading || ''; + + const reading = katakanaToHiragana(storedReading); + + const leadingKana: string[] = []; + const trailingKana: string[] = []; + const chars = [...headword]; + + let i = 0; + while (i < chars.length && (isHiragana(chars[i]) || isKatakana(chars[i]))) { + leadingKana.push(katakanaToHiragana(chars[i])); + i++; + } + + if (i === chars.length) { + return reading; + } + + let j = chars.length - 1; + while (j > i && (isHiragana(chars[j]) || isKatakana(chars[j]))) { + trailingKana.unshift(katakanaToHiragana(chars[j])); + j--; + } + + // Strip matching trailing kana from the stored reading to get the core kanji reading + let coreReading = reading; + const trailStr = trailingKana.join(''); + if (trailStr && coreReading.endsWith(trailStr)) { + coreReading = coreReading.slice(0, -trailStr.length); + } + + // Strip matching leading kana from the stored reading if it already includes them + const leadStr = leadingKana.join(''); + if (leadStr && coreReading.startsWith(leadStr)) { + return reading; + } + + return leadStr + coreReading + trailStr; +}