mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-05-28 00:55:16 -07:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
c150fce782
|
|||
|
ab41837d3d
|
|||
|
9e4ad907fe
|
|||
|
af86ce2341
|
|||
|
b10a7b3e98
|
|||
|
96894ff85c
|
+27
@@ -0,0 +1,27 @@
|
||||
---
|
||||
id: TASK-304
|
||||
title: Fix N+1 sentence boundary counting across Yomitan punctuation gaps
|
||||
status: In Progress
|
||||
assignee: []
|
||||
created_date: '2026-04-26 05:33'
|
||||
labels:
|
||||
- bug
|
||||
- tokenizer
|
||||
- annotations
|
||||
dependencies: []
|
||||
priority: medium
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||
N+1 target selection should respect sentence-ending punctuation from the original subtitle text even when Yomitan token output omits punctuation tokens. Current behavior can treat multiple subtitle sentences as one token span and incorrectly satisfy the minimum content-token threshold.
|
||||
<!-- SECTION:DESCRIPTION:END -->
|
||||
|
||||
## Acceptance Criteria
|
||||
<!-- AC:BEGIN -->
|
||||
- [ ] #1 A subtitle like `てんめ!ふざけんなよ!` does not mark `ふざけん`/similar single-content-token second sentence as N+1 when the minimum sentence word count is 3.
|
||||
- [ ] #2 N+1 sentence segmentation uses original subtitle text offsets or equivalent source-boundary data, not only punctuation tokens returned by Yomitan.
|
||||
- [ ] #3 Existing annotation exclusion behavior for particles/grammar tokens remains unchanged.
|
||||
- [ ] #4 Regression tests cover Yomitan-style token streams where punctuation is absent from the token list.
|
||||
<!-- AC:END -->
|
||||
@@ -0,0 +1,55 @@
|
||||
---
|
||||
id: TASK-305
|
||||
title: Use Yomitan word classes for subtitle token POS filtering
|
||||
status: Done
|
||||
assignee: []
|
||||
created_date: '2026-04-26 05:56'
|
||||
updated_date: '2026-04-26 05:59'
|
||||
labels:
|
||||
- tokenizer
|
||||
- yomitan
|
||||
dependencies: []
|
||||
priority: medium
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||
Subtitle annotation filtering currently uses Yomitan token spans, then enriches those spans by running MeCab over the full normalized subtitle line. Add support for carrying Yomitan headword wordClasses from termsFind into SubMiner tokens so dictionary-backed tokens can provide coarse POS/tag metadata without vendored Yomitan changes. MeCab whole-line enrichment should remain a fallback/source of detailed POS data when Yomitan classes are absent.
|
||||
<!-- SECTION:DESCRIPTION:END -->
|
||||
|
||||
## Acceptance Criteria
|
||||
<!-- AC:BEGIN -->
|
||||
- [x] #1 Yomitan scanner tokens preserve matched headword wordClasses when termsFind returns them.
|
||||
- [x] #2 Subtitle tokenization maps recognized Yomitan wordClasses to coarse PartOfSpeech/POS metadata before annotation filtering.
|
||||
- [x] #3 Whole-line MeCab enrichment remains available for missing or more detailed POS metadata and does not break existing subtitle annotation behavior.
|
||||
- [x] #4 Focused tokenizer tests cover wordClasses extraction and POS mapping.
|
||||
<!-- AC:END -->
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
<!-- SECTION:PLAN:BEGIN -->
|
||||
1. Add focused regression coverage for Yomitan scanner wordClasses payload and subtitle POS mapping.
|
||||
2. Extend the app-owned Yomitan scanner payload to carry matched headword wordClasses when present.
|
||||
3. Map recognized Yomitan wordClasses to SubMiner coarse PartOfSpeech/POS metadata before annotation filtering.
|
||||
4. Keep MeCab whole-line enrichment as fallback/detail-fill for missing POS fields.
|
||||
5. Run focused tokenizer tests and typecheck.
|
||||
<!-- SECTION:PLAN:END -->
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
<!-- SECTION:NOTES:BEGIN -->
|
||||
Implemented app-only wordClasses extraction from termsFind results; no vendored Yomitan changes required. Recognized classes currently map prt, aux, v*, adj-i/adj-ix, adj-na, and noun-like classes to SubMiner POS metadata. MeCab enrichment now skips only tokens with complete pos1/pos2/pos3 and otherwise fills missing fields while preserving existing coarse pos1. Verification: bun test src/core/services/tokenizer/yomitan-parser-runtime.test.ts src/core/services/tokenizer.test.ts; bun run typecheck.
|
||||
<!-- SECTION:NOTES:END -->
|
||||
|
||||
## Final Summary
|
||||
|
||||
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
|
||||
Implemented app-only Yomitan wordClasses support for subtitle token annotation filtering. The scanner now carries matched headword wordClasses from termsFind results, tokenizer maps recognized classes into SubMiner coarse POS metadata before annotation, and MeCab whole-line enrichment continues to fill missing detailed POS fields without requiring vendored Yomitan changes.
|
||||
|
||||
Tests run:
|
||||
- bun test src/core/services/tokenizer/yomitan-parser-runtime.test.ts src/core/services/tokenizer.test.ts
|
||||
- bun run typecheck
|
||||
|
||||
Note: the working tree already had unrelated tokenizer/annotation edits and task-304 before this work; those were left intact.
|
||||
<!-- SECTION:FINAL_SUMMARY:END -->
|
||||
@@ -0,0 +1,33 @@
|
||||
---
|
||||
id: TASK-306
|
||||
title: Fix Hyprland fullscreen overlay geometry and hover pause
|
||||
status: Done
|
||||
assignee: []
|
||||
created_date: '2026-04-27 01:44'
|
||||
labels:
|
||||
- linux
|
||||
- hyprland
|
||||
- overlay
|
||||
- bug
|
||||
dependencies: []
|
||||
priority: high
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||
|
||||
Overlay should track mpv geometry through Hyprland fullscreen transitions, stay above fullscreen video, and keep primary subtitle hover pause working after fullscreen/toggle cycles.
|
||||
|
||||
Implemented by observing mpv fullscreen property changes in addition to Hyprland geometry events, then refreshing visible overlay bounds/layering on Linux.
|
||||
|
||||
<!-- SECTION:DESCRIPTION:END -->
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
<!-- AC:BEGIN -->
|
||||
|
||||
- [x] #1 Hyprland tracker reacts to fullscreen/window state changes with updated geometry.
|
||||
- [x] #2 Visible overlay is re-layered above mpv after Hyprland fullscreen geometry updates.
|
||||
- [x] #3 Primary subtitle hover pause remains active after overlay geometry changes or visible overlay toggle cycles.
|
||||
<!-- AC:END -->
|
||||
@@ -0,0 +1,58 @@
|
||||
---
|
||||
id: TASK-307
|
||||
title: Exclude kana-only words from N+1 subtitle targets
|
||||
status: Done
|
||||
assignee:
|
||||
- codex
|
||||
created_date: '2026-04-27 01:52'
|
||||
updated_date: '2026-04-27 01:57'
|
||||
labels:
|
||||
- tokenizer
|
||||
- annotations
|
||||
dependencies: []
|
||||
priority: medium
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||
Subtitle N+1 annotation is over-targeting kana-only or hiragana/katakana tokens that collapse to dictionary words. Adjust targeting so kana-only tokens are not selected as N+1 candidates, while preserving tokenization/hover behavior and other annotation metadata where existing filters allow it.
|
||||
<!-- SECTION:DESCRIPTION:END -->
|
||||
|
||||
## Acceptance Criteria
|
||||
<!-- AC:BEGIN -->
|
||||
- [x] #1 Kana-only subtitle tokens are not marked as N+1 targets.
|
||||
- [x] #2 Kanji or mixed lexical tokens can still be marked as N+1 targets when they are the single unknown candidate in a sentence.
|
||||
- [x] #3 Regression coverage demonstrates the kana-only N+1 exclusion.
|
||||
<!-- AC:END -->
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
<!-- SECTION:PLAN:BEGIN -->
|
||||
1. Add a failing regression in `src/core/services/tokenizer.test.ts` showing a kana-only Yomitan token is not selected as the single N+1 target, while a mixed lexical token in the same style still can be targeted.
|
||||
2. Implement the smallest filter in `src/token-merger.ts`: N+1 candidate selection rejects tokens whose surface is entirely kana; word-count behavior remains governed by existing annotation/POS filters.
|
||||
3. Run the focused tokenizer tests, then update task acceptance criteria/final summary.
|
||||
<!-- SECTION:PLAN:END -->
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
<!-- SECTION:NOTES:BEGIN -->
|
||||
Implemented a surface-level kana-only guard in N+1 candidate selection. Kept existing word-count/POS filtering behavior intact; updated tokenizer and annotation-stage expectations where old tests intentionally allowed kana-only N+1 targets.
|
||||
<!-- SECTION:NOTES:END -->
|
||||
|
||||
## Final Summary
|
||||
|
||||
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
|
||||
Summary:
|
||||
- Added kana-only surface detection to `isNPlusOneCandidateToken` so hiragana/katakana-only subtitle tokens are not selected as N+1 targets.
|
||||
- Added/updated tokenizer and annotation-stage regressions for kana-only targets while preserving non-kana N+1 behavior.
|
||||
- Added changelog fragment `changes/307-kana-nplusone-targets.md`.
|
||||
|
||||
Verification:
|
||||
- `bun test src/core/services/tokenizer.test.ts --test-name-pattern "kana-only N\+1"` failed before the fix with `true !== false`.
|
||||
- `bun test src/core/services/tokenizer/annotation-stage.test.ts src/core/services/tokenizer.test.ts` passed.
|
||||
- `bun run typecheck` passed.
|
||||
- `bun run test:fast` passed.
|
||||
- `bun run changelog:lint` passed.
|
||||
- `bunx prettier --check src/core/services/tokenizer.test.ts src/core/services/tokenizer/annotation-stage.test.ts src/token-merger.ts changes/307-kana-nplusone-targets.md` passed.
|
||||
<!-- SECTION:FINAL_SUMMARY:END -->
|
||||
@@ -0,0 +1,54 @@
|
||||
---
|
||||
id: TASK-308
|
||||
title: Restore persistent JLPT subtitle underlines
|
||||
status: Done
|
||||
assignee:
|
||||
- Codex
|
||||
created_date: '2026-04-27 02:03'
|
||||
updated_date: '2026-04-27 02:07'
|
||||
labels:
|
||||
- overlay
|
||||
- jlpt
|
||||
- renderer
|
||||
dependencies: []
|
||||
priority: medium
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||
JLPT tagging currently exposes the JLPT level on hover, but the persistent subtitle underline is missing. When JLPT annotation is enabled and a rendered subtitle token has a JLPT level, users should see the configured JLPT color underline without needing to hover.
|
||||
<!-- SECTION:DESCRIPTION:END -->
|
||||
|
||||
## Acceptance Criteria
|
||||
<!-- AC:BEGIN -->
|
||||
- [x] #1 JLPT-tagged subtitle tokens render a persistent underline for N1-N5 levels when JLPT tagging is enabled.
|
||||
- [x] #2 Hover and keyboard-selected JLPT labels continue to appear for tagged tokens.
|
||||
- [x] #3 Higher-priority annotation colors such as known words, N+1, names, and frequency styling are not overridden by JLPT text color.
|
||||
- [x] #4 Regression coverage verifies the CSS contract for persistent JLPT underlines.
|
||||
<!-- AC:END -->
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
<!-- SECTION:PLAN:BEGIN -->
|
||||
1. Add a focused renderer CSS regression asserting each `word-jlpt-n*` class provides persistent underline decoration while preserving existing typography constraints.
|
||||
2. Run the focused renderer test to confirm the regression fails before production changes.
|
||||
3. Restore underline CSS for JLPT classes without broadening JLPT text-color precedence over known/N+1/name/frequency tokens.
|
||||
4. Re-run the focused renderer test and update acceptance criteria/task notes.
|
||||
<!-- SECTION:PLAN:END -->
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
<!-- SECTION:NOTES:BEGIN -->
|
||||
Verified red/green regression: tightened `src/renderer/subtitle-render.test.ts` first failed because base `word-jlpt-n*` selectors had no underline decoration, then passed after moving JLPT underline decoration to unconditional base selectors while leaving JLPT text color priority-scoped.
|
||||
|
||||
Checks: `bun test src/renderer/subtitle-render.test.ts`; `bun run changelog:lint`; `bun run typecheck`.
|
||||
<!-- SECTION:NOTES:END -->
|
||||
|
||||
## Final Summary
|
||||
|
||||
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
|
||||
Restored persistent JLPT subtitle underlines by adding underline decoration to each base `word-jlpt-n*` renderer CSS class. JLPT text color remains in the existing priority-scoped selectors, so known/N+1/name/frequency coloring is not overridden while the underline still appears on any JLPT-tagged token.
|
||||
|
||||
Updated renderer CSS regression coverage to assert underline decoration for N1-N5 and added a fixed changelog fragment. Verified with `bun test src/renderer/subtitle-render.test.ts`, `bun run changelog:lint`, and `bun run typecheck`.
|
||||
<!-- SECTION:FINAL_SUMMARY:END -->
|
||||
@@ -0,0 +1,6 @@
|
||||
type: fixed
|
||||
area: tokenizer
|
||||
|
||||
- Use Yomitan `wordClasses` metadata for subtitle POS filtering.
|
||||
- Backfill blank MeCab POS detail fields during parser enrichment.
|
||||
- Keep subtitle annotation metadata stripped from token results.
|
||||
@@ -0,0 +1,4 @@
|
||||
type: fixed
|
||||
area: overlay
|
||||
|
||||
- Fixed Hyprland fullscreen transitions so mpv fullscreen changes refresh visible overlay geometry, reassert topmost stacking, and keep primary subtitle hover pause working after resize/toggle cycles.
|
||||
@@ -0,0 +1,4 @@
|
||||
type: fixed
|
||||
area: tokenizer
|
||||
|
||||
- Stopped kana-only subtitle tokens from being selected as N+1 targets.
|
||||
@@ -0,0 +1,4 @@
|
||||
type: fixed
|
||||
area: overlay
|
||||
|
||||
- Overlay: Restored persistent JLPT subtitle underlines while keeping hover JLPT labels and annotation color priority intact.
|
||||
@@ -324,6 +324,10 @@ Add a `pass` rule for each global shortcut you configure. The defaults are `Alt+
|
||||
|
||||
Without these rules, Hyprland intercepts the keypresses before they reach SubMiner, and the shortcuts silently do nothing.
|
||||
|
||||
**Overlay stays behind mpv after fullscreen**
|
||||
|
||||
SubMiner watches mpv's `fullscreen` property and refreshes the overlay geometry when it changes. If the overlay still does not move or rise above fullscreen mpv, confirm that the mpv IPC socket is connected and that `hyprctl -j clients` and `hyprctl -j monitors` work from the same environment that launched SubMiner.
|
||||
|
||||
For more details, see the Hyprland docs on [global keybinds](https://wiki.hypr.land/Configuring/Binds/#global-keybinds) and [window rules](https://wiki.hypr.land/Configuring/Window-Rules/).
|
||||
|
||||
### macOS
|
||||
|
||||
@@ -59,6 +59,7 @@ const MPV_SUBTITLE_PROPERTY_OBSERVATIONS: string[] = [
|
||||
'sub-ass-override',
|
||||
'sub-use-margins',
|
||||
'pause',
|
||||
'fullscreen',
|
||||
'duration',
|
||||
'media-title',
|
||||
'secondary-sub-visibility',
|
||||
|
||||
@@ -93,6 +93,7 @@ function createDeps(overrides: Partial<MpvProtocolHandleMessageDeps> = {}): {
|
||||
emitTimePosChange: () => {},
|
||||
emitDurationChange: () => {},
|
||||
emitPauseChange: () => {},
|
||||
emitFullscreenChange: (payload) => state.events.push(payload),
|
||||
autoLoadSecondarySubTrack: () => {},
|
||||
setCurrentVideoPath: () => {},
|
||||
emitSecondarySubtitleVisibility: (payload) => state.events.push(payload),
|
||||
@@ -160,6 +161,17 @@ test('dispatchMpvProtocolMessage enforces sub-visibility hidden when overlay sup
|
||||
]);
|
||||
});
|
||||
|
||||
test('dispatchMpvProtocolMessage emits fullscreen changes', async () => {
|
||||
const { deps, state } = createDeps();
|
||||
|
||||
await dispatchMpvProtocolMessage(
|
||||
{ event: 'property-change', name: 'fullscreen', data: true },
|
||||
deps,
|
||||
);
|
||||
|
||||
assert.deepEqual(state.events, [{ fullscreen: true }]);
|
||||
});
|
||||
|
||||
test('dispatchMpvProtocolMessage skips sub-visibility suppression when overlay is hidden', async () => {
|
||||
const { deps, state } = createDeps({
|
||||
isVisibleOverlayVisible: () => false,
|
||||
|
||||
@@ -65,6 +65,7 @@ export interface MpvProtocolHandleMessageDeps {
|
||||
emitTimePosChange: (payload: { time: number }) => void;
|
||||
emitDurationChange: (payload: { duration: number }) => void;
|
||||
emitPauseChange: (payload: { paused: boolean }) => void;
|
||||
emitFullscreenChange: (payload: { fullscreen: boolean }) => void;
|
||||
emitSubtitleMetricsChange: (payload: Partial<MpvSubtitleRenderMetrics>) => void;
|
||||
setCurrentSecondarySubText: (text: string) => void;
|
||||
resolvePendingRequest: (requestId: number, message: MpvMessage) => boolean;
|
||||
@@ -291,6 +292,8 @@ export async function dispatchMpvProtocolMessage(
|
||||
}
|
||||
} else if (msg.name === 'pause') {
|
||||
deps.emitPauseChange({ paused: asBoolean(msg.data, false) });
|
||||
} else if (msg.name === 'fullscreen') {
|
||||
deps.emitFullscreenChange({ fullscreen: asBoolean(msg.data, false) });
|
||||
} else if (msg.name === 'media-title') {
|
||||
deps.emitMediaTitleChange({
|
||||
title: typeof msg.data === 'string' ? msg.data.trim() : null,
|
||||
|
||||
@@ -57,6 +57,22 @@ test('MpvIpcClient handles sub-text property change and broadcasts tokenized sub
|
||||
assert.equal(events[0]!.isOverlayVisible, false);
|
||||
});
|
||||
|
||||
test('MpvIpcClient emits fullscreen property changes', async () => {
|
||||
const events: Array<{ fullscreen: boolean }> = [];
|
||||
const client = new MpvIpcClient('/tmp/mpv.sock', makeDeps());
|
||||
client.on('fullscreen-change', (payload) => {
|
||||
events.push(payload);
|
||||
});
|
||||
|
||||
await invokeHandleMessage(client, {
|
||||
event: 'property-change',
|
||||
name: 'fullscreen',
|
||||
data: true,
|
||||
});
|
||||
|
||||
assert.deepEqual(events, [{ fullscreen: true }]);
|
||||
});
|
||||
|
||||
test('MpvIpcClient clears cached media title when media path changes', async () => {
|
||||
const client = new MpvIpcClient('/tmp/mpv.sock', makeDeps());
|
||||
|
||||
|
||||
@@ -119,6 +119,7 @@ export interface MpvIpcClientEventMap {
|
||||
'time-pos-change': { time: number };
|
||||
'duration-change': { duration: number };
|
||||
'pause-change': { paused: boolean };
|
||||
'fullscreen-change': { fullscreen: boolean };
|
||||
'secondary-subtitle-change': { text: string };
|
||||
'subtitle-track-change': { sid: number | null };
|
||||
'subtitle-track-list-change': { trackList: unknown[] | null };
|
||||
@@ -330,6 +331,9 @@ export class MpvIpcClient implements MpvClient {
|
||||
this.playbackPaused = payload.paused;
|
||||
this.emit('pause-change', payload);
|
||||
},
|
||||
emitFullscreenChange: (payload) => {
|
||||
this.emit('fullscreen-change', payload);
|
||||
},
|
||||
emitSecondarySubtitleChange: (payload) => {
|
||||
this.emit('secondary-subtitle-change', payload);
|
||||
},
|
||||
|
||||
@@ -67,6 +67,8 @@ export function ensureOverlayWindowLevel(window: BrowserWindow): void {
|
||||
return;
|
||||
}
|
||||
window.setAlwaysOnTop(true);
|
||||
window.setVisibleOnAllWorkspaces(true, { visibleOnFullScreen: true });
|
||||
window.moveTop();
|
||||
}
|
||||
|
||||
export function enforceOverlayLayerOrder(options: {
|
||||
|
||||
@@ -25,6 +25,7 @@ interface YomitanTokenInput {
|
||||
reading?: string;
|
||||
headword?: string;
|
||||
isNameMatch?: boolean;
|
||||
wordClasses?: string[];
|
||||
}
|
||||
|
||||
function makeDepsFromYomitanTokens(
|
||||
@@ -55,6 +56,7 @@ function makeDepsFromYomitanTokens(
|
||||
startPos,
|
||||
endPos,
|
||||
isNameMatch: token.isNameMatch ?? false,
|
||||
wordClasses: token.wordClasses,
|
||||
};
|
||||
});
|
||||
},
|
||||
@@ -1552,7 +1554,7 @@ test('tokenizeSubtitle assigns JLPT level to Yomitan tokens', async () => {
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, 'N4');
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle can assign JLPT level to Yomitan particle token', async () => {
|
||||
test('tokenizeSubtitle clears JLPT level from standalone Yomitan particle token', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'は',
|
||||
makeDepsFromYomitanTokens([{ surface: 'は', reading: 'は', headword: 'は' }], {
|
||||
@@ -1561,7 +1563,7 @@ test('tokenizeSubtitle can assign JLPT level to Yomitan particle token', async (
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, 'N5');
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle returns null tokens for empty normalized text', async () => {
|
||||
@@ -2304,6 +2306,29 @@ test('tokenizeSubtitle selects one N+1 target token', async () => {
|
||||
assert.equal(targets[0]?.surface, '犬');
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle does not select kana-only N+1 target tokens', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'私のばあい',
|
||||
makeDepsFromYomitanTokens(
|
||||
[
|
||||
{ surface: '私', reading: 'わたし', headword: '私' },
|
||||
{ surface: 'の', reading: 'の', headword: 'の' },
|
||||
{ surface: 'ばあい', reading: 'ばあい', headword: '場合' },
|
||||
],
|
||||
{
|
||||
getMinSentenceWordsForNPlusOne: () => 2,
|
||||
isKnownWord: (text) => text === '私',
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 3);
|
||||
assert.equal(
|
||||
result.tokens?.some((token) => token.isNPlusOneTarget),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle does not mark target when sentence has multiple candidates', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'猫犬',
|
||||
@@ -3034,6 +3059,64 @@ test('tokenizeSubtitle skips all enrichment stages when disabled', async () => {
|
||||
assert.equal(frequencyCalls, 0);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle uses Yomitan word classes to classify standalone particles', async () => {
|
||||
let mecabCalls = 0;
|
||||
const result = await tokenizeSubtitle(
|
||||
'は',
|
||||
makeDepsFromYomitanTokens(
|
||||
[{ surface: 'は', reading: 'は', headword: 'は', wordClasses: ['prt'] }],
|
||||
{
|
||||
getFrequencyDictionaryEnabled: () => true,
|
||||
getFrequencyRank: (text) => (text === 'は' ? 10 : null),
|
||||
getJlptLevel: (text) => (text === 'は' ? 'N5' : null),
|
||||
tokenizeWithMecab: async () => {
|
||||
mecabCalls += 1;
|
||||
return null;
|
||||
},
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
assert.equal(mecabCalls, 1);
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.partOfSpeech, PartOfSpeech.particle);
|
||||
assert.equal(result.tokens?.[0]?.pos1, '助詞');
|
||||
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
|
||||
assert.equal(result.tokens?.[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle fills detailed MeCab POS when Yomitan word class supplies coarse POS', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'は',
|
||||
makeDepsFromYomitanTokens(
|
||||
[{ surface: 'は', reading: 'は', headword: 'は', wordClasses: ['prt'] }],
|
||||
{
|
||||
tokenizeWithMecab: async () => [
|
||||
{
|
||||
headword: 'は',
|
||||
surface: 'は',
|
||||
reading: 'ハ',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
partOfSpeech: PartOfSpeech.particle,
|
||||
pos1: '助詞',
|
||||
pos2: '係助詞',
|
||||
pos3: '*',
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.[0]?.partOfSpeech, PartOfSpeech.particle);
|
||||
assert.equal(result.tokens?.[0]?.pos1, '助詞');
|
||||
assert.equal(result.tokens?.[0]?.pos2, '係助詞');
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle keeps frequency enrichment while n+1 is disabled', async () => {
|
||||
let knownCalls = 0;
|
||||
let mecabCalls = 0;
|
||||
@@ -3110,6 +3193,60 @@ test('tokenizeSubtitle excludes default non-independent pos2 from N+1 and freque
|
||||
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle preserves known-word highlight for exact non-independent kanji noun tokens', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'その点',
|
||||
makeDepsFromYomitanTokens(
|
||||
[
|
||||
{ surface: 'その', reading: 'その', headword: 'その' },
|
||||
{ surface: '点', reading: 'てん', headword: '点' },
|
||||
],
|
||||
{
|
||||
isKnownWord: (text) => text === '点' || text === 'てん',
|
||||
getFrequencyDictionaryEnabled: () => true,
|
||||
getFrequencyRank: (text) => (text === '点' ? 1384 : null),
|
||||
getJlptLevel: (text) => (text === '点' ? 'N3' : null),
|
||||
tokenizeWithMecab: async () => [
|
||||
{
|
||||
headword: 'その',
|
||||
surface: 'その',
|
||||
reading: 'ソノ',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '連体詞',
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
{
|
||||
headword: '点',
|
||||
surface: '点',
|
||||
reading: 'テン',
|
||||
startPos: 2,
|
||||
endPos: 3,
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '非自立',
|
||||
pos3: '一般',
|
||||
isMerged: false,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
assert.equal(result.tokens?.length, 2);
|
||||
assert.equal(result.tokens?.[0]?.isKnown, false);
|
||||
assert.equal(result.tokens?.[1]?.surface, '点');
|
||||
assert.equal(result.tokens?.[1]?.isKnown, true);
|
||||
assert.equal(result.tokens?.[1]?.isNPlusOneTarget, false);
|
||||
assert.equal(result.tokens?.[1]?.frequencyRank, undefined);
|
||||
assert.equal(result.tokens?.[1]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle keeps mecab-tagged interjections tokenized while clearing annotation metadata', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'ぐはっ',
|
||||
@@ -3574,7 +3711,7 @@ test('tokenizeSubtitle excludes single-kana merged tokens from frequency highlig
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle excludes merged function/content token from frequency highlighting but keeps N+1', async () => {
|
||||
test('tokenizeSubtitle excludes merged kana-only function/content token from frequency and N+1', async () => {
|
||||
const result = await tokenizeSubtitle(
|
||||
'になれば',
|
||||
makeDepsFromYomitanTokens([{ surface: 'になれば', reading: 'になれば', headword: 'なる' }], {
|
||||
@@ -3628,7 +3765,7 @@ test('tokenizeSubtitle excludes merged function/content token from frequency hig
|
||||
assert.equal(result.tokens?.length, 1);
|
||||
assert.equal(result.tokens?.[0]?.pos1, '助詞|動詞');
|
||||
assert.equal(result.tokens?.[0]?.frequencyRank, undefined);
|
||||
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, true);
|
||||
assert.equal(result.tokens?.[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('tokenizeSubtitle clears all annotations for kana-only demonstrative helper merges', async () => {
|
||||
@@ -3827,7 +3964,7 @@ test('tokenizeSubtitle clears all annotations for explanatory pondering endings'
|
||||
surface: 'どうかしちゃった',
|
||||
headword: 'どうかしちゃう',
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: true,
|
||||
isNPlusOneTarget: false,
|
||||
frequencyRank: 3200,
|
||||
jlptLevel: 'N3',
|
||||
},
|
||||
|
||||
@@ -96,6 +96,7 @@ interface TokenizerAnnotationOptions {
|
||||
minSentenceWordsForNPlusOne: number | undefined;
|
||||
pos1Exclusions: ReadonlySet<string>;
|
||||
pos2Exclusions: ReadonlySet<string>;
|
||||
sourceText?: string;
|
||||
}
|
||||
|
||||
let parserEnrichmentWorkerRuntimeModulePromise: Promise<
|
||||
@@ -159,7 +160,7 @@ async function applyAnnotationStage(
|
||||
options: TokenizerAnnotationOptions,
|
||||
): Promise<MergedToken[]> {
|
||||
if (!hasAnyAnnotationEnabled(options)) {
|
||||
return tokens;
|
||||
return stripSubtitleAnnotationMetadata(tokens);
|
||||
}
|
||||
|
||||
if (!annotationStageModulePromise) {
|
||||
@@ -333,6 +334,66 @@ function normalizeSelectedYomitanTokens(tokens: MergedToken[]): MergedToken[] {
|
||||
}));
|
||||
}
|
||||
|
||||
function normalizeYomitanWordClasses(wordClasses: unknown): string[] {
|
||||
if (!Array.isArray(wordClasses)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const normalized: string[] = [];
|
||||
for (const wordClass of wordClasses) {
|
||||
if (typeof wordClass !== 'string') {
|
||||
continue;
|
||||
}
|
||||
const trimmed = wordClass.trim();
|
||||
if (trimmed && !normalized.includes(trimmed)) {
|
||||
normalized.push(trimmed);
|
||||
}
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function resolvePartOfSpeechFromYomitanWordClasses(wordClasses: string[]): {
|
||||
partOfSpeech: PartOfSpeech;
|
||||
pos1?: string;
|
||||
} {
|
||||
if (wordClasses.includes('prt')) {
|
||||
return { partOfSpeech: PartOfSpeech.particle, pos1: '助詞' };
|
||||
}
|
||||
if (wordClasses.includes('aux')) {
|
||||
return { partOfSpeech: PartOfSpeech.bound_auxiliary, pos1: '助動詞' };
|
||||
}
|
||||
if (wordClasses.some((wordClass) => wordClass.startsWith('v'))) {
|
||||
return { partOfSpeech: PartOfSpeech.verb, pos1: '動詞' };
|
||||
}
|
||||
if (wordClasses.includes('adj-i') || wordClasses.includes('adj-ix')) {
|
||||
return { partOfSpeech: PartOfSpeech.i_adjective, pos1: '形容詞' };
|
||||
}
|
||||
if (wordClasses.includes('adj-na')) {
|
||||
return { partOfSpeech: PartOfSpeech.na_adjective, pos1: '名詞' };
|
||||
}
|
||||
if (
|
||||
wordClasses.some(
|
||||
(wordClass) =>
|
||||
wordClass === 'n' ||
|
||||
wordClass === 'num' ||
|
||||
wordClass === 'ctr' ||
|
||||
wordClass === 'pn' ||
|
||||
wordClass.startsWith('n-'),
|
||||
)
|
||||
) {
|
||||
return { partOfSpeech: PartOfSpeech.noun, pos1: '名詞' };
|
||||
}
|
||||
|
||||
return { partOfSpeech: PartOfSpeech.other };
|
||||
}
|
||||
|
||||
function getYomitanWordClassPosMetadata(wordClasses: unknown): {
|
||||
partOfSpeech: PartOfSpeech;
|
||||
pos1?: string;
|
||||
} {
|
||||
return resolvePartOfSpeechFromYomitanWordClasses(normalizeYomitanWordClasses(wordClasses));
|
||||
}
|
||||
|
||||
function resolveFrequencyLookupText(
|
||||
token: MergedToken,
|
||||
matchMode: FrequencyDictionaryMatchMode,
|
||||
@@ -623,19 +684,23 @@ async function parseWithYomitanInternalParser(
|
||||
}
|
||||
const normalizedSelectedTokens = normalizeSelectedYomitanTokens(
|
||||
selectedTokens.map(
|
||||
(token): MergedToken => ({
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
headword: token.headword,
|
||||
startPos: token.startPos,
|
||||
endPos: token.endPos,
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
isMerged: true,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
isNameMatch: token.isNameMatch ?? false,
|
||||
frequencyRank: token.frequencyRank,
|
||||
}),
|
||||
(token): MergedToken => {
|
||||
const posMetadata = getYomitanWordClassPosMetadata(token.wordClasses);
|
||||
return {
|
||||
surface: token.surface,
|
||||
reading: token.reading,
|
||||
headword: token.headword,
|
||||
startPos: token.startPos,
|
||||
endPos: token.endPos,
|
||||
partOfSpeech: posMetadata.partOfSpeech,
|
||||
pos1: posMetadata.pos1,
|
||||
isMerged: true,
|
||||
isKnown: false,
|
||||
isNPlusOneTarget: false,
|
||||
isNameMatch: token.isNameMatch ?? false,
|
||||
frequencyRank: token.frequencyRank,
|
||||
};
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
@@ -716,12 +781,11 @@ export async function tokenizeSubtitle(
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
const annotationOptions = getAnnotationOptions(deps);
|
||||
annotationOptions.sourceText = tokenizeText;
|
||||
|
||||
const yomitanTokens = await parseWithYomitanInternalParser(tokenizeText, deps, annotationOptions);
|
||||
if (yomitanTokens && yomitanTokens.length > 0) {
|
||||
const annotatedTokens = await stripSubtitleAnnotationMetadata(
|
||||
await applyAnnotationStage(yomitanTokens, deps, annotationOptions),
|
||||
);
|
||||
const annotatedTokens = await applyAnnotationStage(yomitanTokens, deps, annotationOptions);
|
||||
return {
|
||||
text: displayText,
|
||||
tokens: annotatedTokens.length > 0 ? annotatedTokens : null,
|
||||
|
||||
@@ -366,6 +366,132 @@ test('shouldExcludeTokenFromSubtitleAnnotations excludes kana-only non-independe
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone して grammar helper fragments', () => {
|
||||
const token = makeToken({
|
||||
surface: 'して',
|
||||
headword: 'する',
|
||||
reading: 'シテ',
|
||||
partOfSpeech: PartOfSpeech.verb,
|
||||
pos1: '動詞|助詞',
|
||||
pos2: '自立|接続助詞',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes inflected standalone して grammar helper fragments', () => {
|
||||
const token = makeToken({
|
||||
surface: 'してる',
|
||||
headword: 'する',
|
||||
reading: 'シテル',
|
||||
partOfSpeech: PartOfSpeech.verb,
|
||||
pos1: '動詞|助動詞',
|
||||
pos2: '自立|非自立',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone particle fragments without POS tags', () => {
|
||||
const token = makeToken({
|
||||
surface: 'と',
|
||||
headword: 'と',
|
||||
reading: 'ト',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone connective particle fragments without POS tags', () => {
|
||||
const token = makeToken({
|
||||
surface: 'たって',
|
||||
headword: 'たって',
|
||||
reading: 'タッテ',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes rhetorical もんか grammar particle phrases', () => {
|
||||
for (const surface of ['もんか', 'ものか']) {
|
||||
const token = makeToken({
|
||||
surface,
|
||||
headword: surface,
|
||||
reading: surface === 'もんか' ? 'モンカ' : 'モノカ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞|助詞',
|
||||
pos2: '非自立|副助詞/並立助詞/終助詞',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, surface);
|
||||
}
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes bare くれ auxiliary fragments', () => {
|
||||
const token = makeToken({
|
||||
surface: 'くれ',
|
||||
headword: '暮れ',
|
||||
reading: 'クレ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '一般',
|
||||
});
|
||||
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true);
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes standalone quote particle and auxiliary grammar terms', () => {
|
||||
for (const token of [
|
||||
makeToken({
|
||||
surface: 'って',
|
||||
headword: 'って',
|
||||
reading: 'ッテ',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'べき',
|
||||
headword: 'べき',
|
||||
reading: 'ベキ',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
}),
|
||||
]) {
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
|
||||
}
|
||||
});
|
||||
|
||||
test('shouldExcludeTokenFromSubtitleAnnotations excludes single-kana surface fragments', () => {
|
||||
for (const token of [
|
||||
makeToken({
|
||||
surface: 'ふ',
|
||||
headword: '不',
|
||||
reading: 'フ',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '接頭詞',
|
||||
pos2: '',
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'フ',
|
||||
headword: '負',
|
||||
reading: 'フ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '一般',
|
||||
}),
|
||||
]) {
|
||||
assert.equal(shouldExcludeTokenFromSubtitleAnnotations(token), true, token.surface);
|
||||
}
|
||||
});
|
||||
|
||||
test('stripSubtitleAnnotationMetadata keeps token hover data while clearing annotation fields', () => {
|
||||
const token = makeToken({
|
||||
surface: 'は',
|
||||
@@ -444,13 +570,13 @@ test('annotateTokens keeps other annotations for name matches when name highligh
|
||||
let jlptLookupCalls = 0;
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'オリヴィア',
|
||||
reading: 'オリヴィア',
|
||||
headword: 'オリヴィア',
|
||||
surface: '山田',
|
||||
reading: 'ヤマダ',
|
||||
headword: '山田',
|
||||
isNameMatch: true,
|
||||
frequencyRank: 42,
|
||||
startPos: 0,
|
||||
endPos: 5,
|
||||
endPos: 2,
|
||||
}),
|
||||
];
|
||||
|
||||
@@ -536,6 +662,57 @@ test('annotateTokens N+1 minimum sentence words counts only eligible word tokens
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens N+1 sentence word count respects source punctuation gaps omitted by Yomitan', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '私',
|
||||
headword: '私',
|
||||
pos1: '名詞',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
}),
|
||||
makeToken({
|
||||
surface: '猫',
|
||||
headword: '猫',
|
||||
pos1: '名詞',
|
||||
startPos: 1,
|
||||
endPos: 2,
|
||||
}),
|
||||
makeToken({
|
||||
surface: '犬',
|
||||
headword: '犬',
|
||||
pos1: '名詞',
|
||||
startPos: 2,
|
||||
endPos: 3,
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'ふざけん',
|
||||
headword: 'ふざける',
|
||||
partOfSpeech: PartOfSpeech.verb,
|
||||
pos1: '動詞',
|
||||
pos2: '自立',
|
||||
startPos: 4,
|
||||
endPos: 8,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === '私' || text === '猫' || text === '犬',
|
||||
}),
|
||||
{
|
||||
minSentenceWordsForNPlusOne: 3,
|
||||
sourceText: '私猫犬!ふざけんなよ!',
|
||||
},
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[1]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[2]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[3]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens applies configured pos1 exclusions to both frequency and N+1', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
@@ -593,7 +770,7 @@ test('annotateTokens allows previously default-excluded pos1 when removed from e
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, 8);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes default non-independent pos2 from frequency and N+1', () => {
|
||||
@@ -618,6 +795,37 @@ test('annotateTokens excludes default non-independent pos2 from frequency and N+
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens preserves exact known-word status for non-independent kanji noun tokens', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '点',
|
||||
reading: 'てん',
|
||||
headword: '点',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '名詞',
|
||||
pos2: '非自立',
|
||||
pos3: '一般',
|
||||
startPos: 2,
|
||||
endPos: 3,
|
||||
frequencyRank: 1384,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === '点' || text === 'てん',
|
||||
getJlptLevel: (text) => (text === '点' ? 'N3' : null),
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations for non-independent kanji noun tokens under unified gate', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
@@ -665,7 +873,7 @@ test('annotateTokens excludes likely kana SFX tokens from frequency when POS tag
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes single hiragana and katakana tokens from frequency when POS tags are missing', () => {
|
||||
test('annotateTokens clears all annotations from single hiragana and katakana surface fragments', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'た',
|
||||
@@ -679,12 +887,12 @@ test('annotateTokens excludes single hiragana and katakana tokens from frequency
|
||||
endPos: 1,
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'ア',
|
||||
reading: 'ア',
|
||||
headword: 'ア',
|
||||
pos1: '',
|
||||
surface: 'フ',
|
||||
reading: 'フ',
|
||||
headword: '負',
|
||||
pos1: '名詞',
|
||||
pos2: '',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
frequencyRank: 22,
|
||||
startPos: 1,
|
||||
endPos: 2,
|
||||
@@ -706,8 +914,14 @@ test('annotateTokens excludes single hiragana and katakana tokens from frequency
|
||||
minSentenceWordsForNPlusOne: 1,
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
assert.equal(result[1]?.isKnown, false);
|
||||
assert.equal(result[1]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[1]?.frequencyRank, undefined);
|
||||
assert.equal(result[1]?.jlptLevel, undefined);
|
||||
assert.equal(result[2]?.frequencyRank, 23);
|
||||
});
|
||||
|
||||
@@ -751,10 +965,10 @@ test('annotateTokens allows previously default-excluded pos2 when removed from e
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, 9);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes composite function/content tokens from frequency but keeps N+1 eligible', () => {
|
||||
test('annotateTokens excludes kana-only composite function/content tokens from frequency and N+1', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'になれば',
|
||||
@@ -772,7 +986,7 @@ test('annotateTokens excludes composite function/content tokens from frequency b
|
||||
});
|
||||
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, true);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
});
|
||||
|
||||
test('annotateTokens excludes composite tokens when all component pos tags are excluded', () => {
|
||||
@@ -856,6 +1070,219 @@ test('annotateTokens clears all annotations for kana-only non-independent noun h
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations for standalone して helper fragments', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'してる',
|
||||
headword: 'する',
|
||||
reading: 'シテル',
|
||||
partOfSpeech: PartOfSpeech.verb,
|
||||
pos1: '動詞|助動詞',
|
||||
pos2: '自立|非自立',
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
frequencyRank: 22,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'する',
|
||||
getJlptLevel: (text) => (text === 'する' ? 'N5' : null),
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations for standalone particle fragments without POS tags', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'と',
|
||||
headword: 'と',
|
||||
reading: 'ト',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
frequencyRank: 4,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'と',
|
||||
getJlptLevel: (text) => (text === 'と' ? 'N5' : null),
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens does not mark standalone connective particles as N+1', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: '逃げる',
|
||||
headword: '逃げる',
|
||||
reading: 'ニゲル',
|
||||
partOfSpeech: PartOfSpeech.verb,
|
||||
pos1: '動詞',
|
||||
pos2: '自立',
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'たって',
|
||||
headword: 'たって',
|
||||
reading: 'タッテ',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
startPos: 3,
|
||||
endPos: 6,
|
||||
frequencyRank: 28,
|
||||
}),
|
||||
makeToken({
|
||||
surface: '無駄',
|
||||
headword: '無駄',
|
||||
reading: 'ムダ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '形容動詞語幹',
|
||||
startPos: 6,
|
||||
endPos: 8,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === '逃げる' || text === '無駄',
|
||||
getJlptLevel: (text) => (text === 'たって' ? 'N3' : null),
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[1]?.isKnown, false);
|
||||
assert.equal(result[1]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[1]?.frequencyRank, undefined);
|
||||
assert.equal(result[1]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations for rhetorical もんか grammar particle phrases', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'もんか',
|
||||
headword: 'もんか',
|
||||
reading: 'モンカ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞|助詞',
|
||||
pos2: '非自立|副助詞/並立助詞/終助詞',
|
||||
startPos: 0,
|
||||
endPos: 3,
|
||||
frequencyRank: 69629,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'もんか',
|
||||
getJlptLevel: (text) => (text === 'もんか' ? 'N2' : null),
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations for bare くれ auxiliary fragments', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'くれ',
|
||||
headword: '暮れ',
|
||||
reading: 'クレ',
|
||||
partOfSpeech: PartOfSpeech.noun,
|
||||
pos1: '名詞',
|
||||
pos2: '一般',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
frequencyRank: 12877,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === '暮れ',
|
||||
getJlptLevel: (text) => (text === '暮れ' ? 'N3' : null),
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
assert.equal(result[0]?.isKnown, false);
|
||||
assert.equal(result[0]?.isNPlusOneTarget, false);
|
||||
assert.equal(result[0]?.frequencyRank, undefined);
|
||||
assert.equal(result[0]?.jlptLevel, undefined);
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations for standalone quote particle and auxiliary grammar terms', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'って',
|
||||
headword: 'って',
|
||||
reading: 'ッテ',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
startPos: 0,
|
||||
endPos: 2,
|
||||
frequencyRank: 28,
|
||||
}),
|
||||
makeToken({
|
||||
surface: 'べき',
|
||||
headword: 'べき',
|
||||
reading: 'ベキ',
|
||||
partOfSpeech: PartOfSpeech.other,
|
||||
pos1: '',
|
||||
pos2: '',
|
||||
startPos: 2,
|
||||
endPos: 4,
|
||||
frequencyRank: 268,
|
||||
}),
|
||||
];
|
||||
|
||||
const result = annotateTokens(
|
||||
tokens,
|
||||
makeDeps({
|
||||
isKnownWord: (text) => text === 'って' || text === 'べき',
|
||||
getJlptLevel: (text) => (text === 'って' || text === 'べき' ? 'N3' : null),
|
||||
}),
|
||||
{ minSentenceWordsForNPlusOne: 1 },
|
||||
);
|
||||
|
||||
for (const token of result) {
|
||||
assert.equal(token.isKnown, false, token.surface);
|
||||
assert.equal(token.isNPlusOneTarget, false, token.surface);
|
||||
assert.equal(token.frequencyRank, undefined, token.surface);
|
||||
assert.equal(token.jlptLevel, undefined, token.surface);
|
||||
}
|
||||
});
|
||||
|
||||
test('annotateTokens clears all annotations from standalone あ interjections without POS tags', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
|
||||
@@ -89,6 +89,7 @@ export interface AnnotationStageOptions {
|
||||
minSentenceWordsForNPlusOne?: number;
|
||||
pos1Exclusions?: ReadonlySet<string>;
|
||||
pos2Exclusions?: ReadonlySet<string>;
|
||||
sourceText?: string;
|
||||
}
|
||||
|
||||
function resolveKnownWordText(
|
||||
@@ -670,6 +671,36 @@ function computeTokenKnownStatus(
|
||||
return normalizedReading !== matchText.trim() && isKnownWord(normalizedReading);
|
||||
}
|
||||
|
||||
function computeExcludedTokenKnownStatus(
|
||||
token: MergedToken,
|
||||
isKnownWord: (text: string) => boolean,
|
||||
): boolean {
|
||||
const normalizedSurface = token.surface.trim();
|
||||
if (!hasKanjiChar(normalizedSurface)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (normalizedSurface && isKnownWord(normalizedSurface)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const normalizedReading = token.reading.trim();
|
||||
if (
|
||||
normalizedReading &&
|
||||
normalizedReading !== normalizedSurface &&
|
||||
isKnownWord(normalizedReading)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const normalizedHeadword = token.headword.trim();
|
||||
return (
|
||||
normalizedHeadword.length > 0 &&
|
||||
normalizedHeadword === normalizedSurface &&
|
||||
isKnownWord(normalizedHeadword)
|
||||
);
|
||||
}
|
||||
|
||||
function filterTokenFrequencyRank(
|
||||
token: MergedToken,
|
||||
pos1Exclusions: ReadonlySet<string>,
|
||||
@@ -732,10 +763,16 @@ export function annotateTokens(
|
||||
pos2Exclusions,
|
||||
})
|
||||
) {
|
||||
return sharedStripSubtitleAnnotationMetadata(token, {
|
||||
const strippedToken = sharedStripSubtitleAnnotationMetadata(token, {
|
||||
pos1Exclusions,
|
||||
pos2Exclusions,
|
||||
});
|
||||
return {
|
||||
...strippedToken,
|
||||
isKnown:
|
||||
nPlusOneEnabled &&
|
||||
computeExcludedTokenKnownStatus(token, deps.isKnownWord),
|
||||
};
|
||||
}
|
||||
|
||||
const prioritizedNameMatch = nameMatchEnabled && token.isNameMatch === true;
|
||||
@@ -779,6 +816,7 @@ export function annotateTokens(
|
||||
sanitizedMinSentenceWordsForNPlusOne,
|
||||
pos1Exclusions,
|
||||
pos2Exclusions,
|
||||
options.sourceText,
|
||||
);
|
||||
|
||||
if (!nameMatchEnabled) {
|
||||
|
||||
@@ -39,6 +39,33 @@ test('enrichTokensWithMecabPos1 fills missing pos1 using surface-sequence fallba
|
||||
assert.equal(enriched[0]?.pos1, '助詞');
|
||||
});
|
||||
|
||||
test('enrichTokensWithMecabPos1 backfills blank pos2 and pos3 fields', () => {
|
||||
const tokens = [
|
||||
makeToken({
|
||||
surface: 'は',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
pos1: '助詞',
|
||||
pos2: '',
|
||||
pos3: ' ',
|
||||
}),
|
||||
];
|
||||
const mecabTokens = [
|
||||
makeToken({
|
||||
surface: 'は',
|
||||
startPos: 0,
|
||||
endPos: 1,
|
||||
pos1: '助詞',
|
||||
pos2: '係助詞',
|
||||
pos3: '一般',
|
||||
}),
|
||||
];
|
||||
|
||||
const enriched = enrichTokensWithMecabPos1(tokens, mecabTokens);
|
||||
assert.equal(enriched[0]?.pos2, '係助詞');
|
||||
assert.equal(enriched[0]?.pos3, '一般');
|
||||
});
|
||||
|
||||
test('enrichTokensWithMecabPos1 keeps partOfSpeech unchanged and only enriches POS tags', () => {
|
||||
const tokens = [makeToken({ surface: 'これは', startPos: 0, endPos: 3 })];
|
||||
const mecabTokens = [
|
||||
|
||||
@@ -120,6 +120,13 @@ function lowerBoundByIndex(candidates: IndexedMecabToken[], targetIndex: number)
|
||||
return low;
|
||||
}
|
||||
|
||||
function coalesceMissingPosField(
|
||||
current: string | undefined,
|
||||
fallback: string | undefined,
|
||||
): string | undefined {
|
||||
return typeof current === 'string' && current.trim().length > 0 ? current : fallback;
|
||||
}
|
||||
|
||||
function joinUniqueTags(values: Array<string | undefined>): string | undefined {
|
||||
const unique: string[] = [];
|
||||
for (const value of values) {
|
||||
@@ -303,7 +310,9 @@ function fillMissingPos1BySurfaceSequence(
|
||||
|
||||
let cursor = 0;
|
||||
return tokens.map((token) => {
|
||||
if (token.pos1 && token.pos1.trim().length > 0) {
|
||||
const hasCompletePosMetadata =
|
||||
token.pos1?.trim() && token.pos2?.trim() && token.pos3?.trim();
|
||||
if (hasCompletePosMetadata) {
|
||||
return token;
|
||||
}
|
||||
|
||||
@@ -327,9 +336,9 @@ function fillMissingPos1BySurfaceSequence(
|
||||
cursor = best.index + 1;
|
||||
return {
|
||||
...token,
|
||||
pos1: best.pos1,
|
||||
pos2: best.pos2,
|
||||
pos3: best.pos3,
|
||||
pos1: coalesceMissingPosField(token.pos1, best.pos1),
|
||||
pos2: coalesceMissingPosField(token.pos2, best.pos2),
|
||||
pos3: coalesceMissingPosField(token.pos3, best.pos3),
|
||||
};
|
||||
});
|
||||
}
|
||||
@@ -382,7 +391,7 @@ export function enrichTokensWithMecabPos1(
|
||||
const metadataByTokenIndex = new Map<number, MecabPosMetadata>();
|
||||
|
||||
for (const [index, token] of tokens.entries()) {
|
||||
if (token.pos1) {
|
||||
if (token.pos1?.trim() && token.pos2?.trim() && token.pos3?.trim()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -410,9 +419,9 @@ export function enrichTokensWithMecabPos1(
|
||||
|
||||
return {
|
||||
...token,
|
||||
pos1: metadata.pos1,
|
||||
pos2: metadata.pos2,
|
||||
pos3: metadata.pos3,
|
||||
pos1: coalesceMissingPosField(token.pos1, metadata.pos1),
|
||||
pos2: coalesceMissingPosField(token.pos2, metadata.pos2),
|
||||
pos3: coalesceMissingPosField(token.pos3, metadata.pos3),
|
||||
};
|
||||
});
|
||||
|
||||
|
||||
@@ -13,17 +13,28 @@ const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
|
||||
const KATAKANA_CODEPOINT_START = 0x30a1;
|
||||
const KATAKANA_CODEPOINT_END = 0x30f6;
|
||||
|
||||
const STANDALONE_GRAMMAR_PARTICLE_PHRASES = ['たって', 'だって'] as const;
|
||||
const STANDALONE_GRAMMAR_PARTICLE_PHRASES_SET: ReadonlySet<string> = new Set(
|
||||
STANDALONE_GRAMMAR_PARTICLE_PHRASES,
|
||||
);
|
||||
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_TERMS = new Set([
|
||||
'あ',
|
||||
'ああ',
|
||||
'ええ',
|
||||
'うう',
|
||||
'おお',
|
||||
'くれ',
|
||||
'って',
|
||||
'はあ',
|
||||
'はは',
|
||||
'べき',
|
||||
'へえ',
|
||||
'ふう',
|
||||
'ほう',
|
||||
'もんか',
|
||||
'ものか',
|
||||
...STANDALONE_GRAMMAR_PARTICLE_PHRASES,
|
||||
]);
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_PREFIXES = ['ん', 'の', 'なん', 'なの'];
|
||||
const SUBTITLE_ANNOTATION_EXCLUDED_EXPLANATORY_ENDING_CORES = [
|
||||
@@ -72,7 +83,25 @@ const SUBTITLE_ANNOTATION_EXCLUDED_TRAILING_PARTICLE_SUFFIXES = new Set([
|
||||
]);
|
||||
const AUXILIARY_STEM_GRAMMAR_TAIL_POS1 = new Set(['名詞', '助動詞', '助詞']);
|
||||
const NON_INDEPENDENT_NOUN_HELPER_TAIL_POS1 = new Set(['助詞', '助動詞']);
|
||||
|
||||
const STANDALONE_GRAMMAR_PARTICLE_SURFACES = new Set([
|
||||
'か',
|
||||
'が',
|
||||
'さ',
|
||||
'し',
|
||||
'ぞ',
|
||||
'ぜ',
|
||||
'と',
|
||||
'な',
|
||||
'に',
|
||||
'ね',
|
||||
'の',
|
||||
'は',
|
||||
'へ',
|
||||
'も',
|
||||
'や',
|
||||
'よ',
|
||||
'を',
|
||||
]);
|
||||
export interface SubtitleAnnotationFilterOptions {
|
||||
pos1Exclusions?: ReadonlySet<string>;
|
||||
pos2Exclusions?: ReadonlySet<string>;
|
||||
@@ -278,6 +307,38 @@ function isKanaOnlyNonIndependentNounHelperMerge(token: MergedToken): boolean {
|
||||
return pos1Parts.slice(1).every((part) => NON_INDEPENDENT_NOUN_HELPER_TAIL_POS1.has(part));
|
||||
}
|
||||
|
||||
function isKanaOnlyText(text: string): boolean {
|
||||
const normalized = normalizeKana(text);
|
||||
return normalized.length > 0 && [...normalized].every(isKanaChar);
|
||||
}
|
||||
|
||||
function isStandaloneSuruTeGrammarHelper(token: MergedToken): boolean {
|
||||
const normalizedSurface = normalizeKana(token.surface);
|
||||
const normalizedHeadword = normalizeKana(token.headword);
|
||||
if (!normalizedSurface.startsWith('して') || normalizedHeadword !== 'する') {
|
||||
return false;
|
||||
}
|
||||
|
||||
const pos1Parts = splitNormalizedTagParts(normalizePosTag(token.pos1));
|
||||
return isKanaOnlyText(normalizedSurface) && (pos1Parts.length === 0 || pos1Parts.includes('動詞'));
|
||||
}
|
||||
|
||||
function isStandaloneGrammarParticle(token: MergedToken): boolean {
|
||||
const normalizedSurface = normalizeKana(token.surface);
|
||||
const normalizedHeadword = normalizeKana(token.headword);
|
||||
return (
|
||||
normalizedSurface === normalizedHeadword &&
|
||||
(STANDALONE_GRAMMAR_PARTICLE_SURFACES.has(normalizedSurface) ||
|
||||
STANDALONE_GRAMMAR_PARTICLE_PHRASES_SET.has(normalizedSurface))
|
||||
);
|
||||
}
|
||||
|
||||
function isSingleKanaSurfaceFragment(token: MergedToken): boolean {
|
||||
const normalizedSurface = normalizeKana(token.surface);
|
||||
const chars = [...normalizedSurface];
|
||||
return chars.length === 1 && chars.every(isKanaChar);
|
||||
}
|
||||
|
||||
function isExcludedByTerm(token: MergedToken): boolean {
|
||||
const candidates = [token.surface, token.reading, token.headword].filter(
|
||||
(candidate): candidate is string => typeof candidate === 'string' && candidate.length > 0,
|
||||
@@ -365,6 +426,18 @@ export function shouldExcludeTokenFromSubtitleAnnotations(
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isStandaloneSuruTeGrammarHelper(token)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isStandaloneGrammarParticle(token)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isSingleKanaSurfaceFragment(token)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isExcludedTrailingParticleMergedToken(token)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1049,6 +1049,60 @@ test('requestYomitanScanTokens marks grouped entries when SubMiner dictionary al
|
||||
assert.equal((result as Array<{ isNameMatch?: boolean }>)[0]?.isNameMatch, true);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens preserves matched headword word classes', async () => {
|
||||
let scannerScript = '';
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('termsFind')) {
|
||||
scannerScript = script;
|
||||
return [];
|
||||
}
|
||||
if (script.includes('optionsGetFull')) {
|
||||
return {
|
||||
profileCurrent: 0,
|
||||
profiles: [
|
||||
{
|
||||
options: {
|
||||
scanning: { length: 40 },
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
await requestYomitanScanTokens('は', deps, { error: () => undefined });
|
||||
|
||||
const result = await runInjectedYomitanScript(scannerScript, (action, params) => {
|
||||
if (action !== 'termsFind') {
|
||||
throw new Error(`unexpected action: ${action}`);
|
||||
}
|
||||
|
||||
const text = (params as { text?: string } | undefined)?.text;
|
||||
if (text !== 'は') {
|
||||
return { originalTextLength: 0, dictionaryEntries: [] };
|
||||
}
|
||||
|
||||
return {
|
||||
originalTextLength: 1,
|
||||
dictionaryEntries: [
|
||||
{
|
||||
headwords: [
|
||||
{
|
||||
term: 'は',
|
||||
reading: 'は',
|
||||
wordClasses: ['prt'],
|
||||
sources: [{ originalText: 'は', isPrimary: true, matchType: 'exact' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
|
||||
assert.deepEqual((result as Array<{ wordClasses?: string[] }>)[0]?.wordClasses, ['prt']);
|
||||
});
|
||||
|
||||
test('requestYomitanScanTokens skips fallback fragments without exact primary source matches', async () => {
|
||||
const deps = createDeps(async (script) => {
|
||||
if (script.includes('optionsGetFull')) {
|
||||
|
||||
@@ -53,6 +53,7 @@ export interface YomitanScanToken {
|
||||
endPos: number;
|
||||
isNameMatch?: boolean;
|
||||
frequencyRank?: number;
|
||||
wordClasses?: string[];
|
||||
}
|
||||
|
||||
interface YomitanProfileMetadata {
|
||||
@@ -91,7 +92,10 @@ function isScanTokenArray(value: unknown): value is YomitanScanToken[] {
|
||||
typeof entry.startPos === 'number' &&
|
||||
typeof entry.endPos === 'number' &&
|
||||
(entry.isNameMatch === undefined || typeof entry.isNameMatch === 'boolean') &&
|
||||
(entry.frequencyRank === undefined || typeof entry.frequencyRank === 'number'),
|
||||
(entry.frequencyRank === undefined || typeof entry.frequencyRank === 'number') &&
|
||||
(entry.wordClasses === undefined ||
|
||||
(Array.isArray(entry.wordClasses) &&
|
||||
entry.wordClasses.every((wordClass) => typeof wordClass === 'string'))),
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -975,6 +979,11 @@ const YOMITAN_SCANNING_HELPERS = String.raw`
|
||||
return best;
|
||||
}
|
||||
function getPreferredHeadword(dictionaryEntries, token, dictionaryPriorityByName, dictionaryFrequencyModeByName) {
|
||||
function normalizeWordClasses(headword) {
|
||||
if (!Array.isArray(headword?.wordClasses)) { return undefined; }
|
||||
const classes = headword.wordClasses.filter((wordClass) => typeof wordClass === "string" && wordClass.trim().length > 0);
|
||||
return classes.length > 0 ? classes : undefined;
|
||||
}
|
||||
function appendDictionaryNames(target, value) {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return;
|
||||
@@ -1033,6 +1042,7 @@ const YOMITAN_SCANNING_HELPERS = String.raw`
|
||||
return {
|
||||
term: preferredMatch.headword.term,
|
||||
reading: preferredMatch.headword.reading,
|
||||
wordClasses: normalizeWordClasses(preferredMatch.headword),
|
||||
isNameMatch: matchedNameDictionary || isNameDictionaryEntry(preferredMatch.dictionaryEntry),
|
||||
frequencyRank: getBestFrequencyRankForMatches(
|
||||
exactFrequencyMatches.length > 0 ? exactFrequencyMatches : exactPrimaryMatches,
|
||||
@@ -1099,7 +1109,7 @@ ${YOMITAN_SCANNING_HELPERS}
|
||||
if (preferredHeadword && typeof preferredHeadword.term === "string") {
|
||||
const reading = typeof preferredHeadword.reading === "string" ? preferredHeadword.reading : "";
|
||||
const segments = distributeFuriganaInflected(preferredHeadword.term, reading, source);
|
||||
tokens.push({
|
||||
const tokenPayload = {
|
||||
surface: segments.map((segment) => segment.text).join("") || source,
|
||||
reading: segments.map((segment) => typeof segment.reading === "string" ? segment.reading : "").join(""),
|
||||
headword: preferredHeadword.term,
|
||||
@@ -1110,7 +1120,11 @@ ${YOMITAN_SCANNING_HELPERS}
|
||||
typeof preferredHeadword.frequencyRank === "number" && Number.isFinite(preferredHeadword.frequencyRank)
|
||||
? Math.max(1, Math.floor(preferredHeadword.frequencyRank))
|
||||
: undefined,
|
||||
});
|
||||
};
|
||||
if (Array.isArray(preferredHeadword.wordClasses) && preferredHeadword.wordClasses.length > 0) {
|
||||
tokenPayload.wordClasses = preferredHeadword.wordClasses;
|
||||
}
|
||||
tokens.push(tokenPayload);
|
||||
i += originalTextLength;
|
||||
continue;
|
||||
}
|
||||
|
||||
+57
-2
@@ -1911,6 +1911,7 @@ const WINDOWS_VISIBLE_OVERLAY_BLUR_REFRESH_DELAYS_MS = [0, 25, 100, 250] as cons
|
||||
const WINDOWS_VISIBLE_OVERLAY_Z_ORDER_RETRY_DELAYS_MS = [0, 48, 120, 240, 480] as const;
|
||||
const WINDOWS_VISIBLE_OVERLAY_FOREGROUND_POLL_INTERVAL_MS = 75;
|
||||
const WINDOWS_VISIBLE_OVERLAY_FOCUS_HANDOFF_GRACE_MS = 200;
|
||||
const LINUX_MPV_FULLSCREEN_OVERLAY_REFRESH_DELAYS_MS = [0, 50, 150, 300, 600] as const;
|
||||
let windowsVisibleOverlayBlurRefreshTimeouts: Array<ReturnType<typeof setTimeout>> = [];
|
||||
let windowsVisibleOverlayZOrderRetryTimeouts: Array<ReturnType<typeof setTimeout>> = [];
|
||||
let windowsVisibleOverlayZOrderSyncInFlight = false;
|
||||
@@ -1918,6 +1919,7 @@ let windowsVisibleOverlayZOrderSyncQueued = false;
|
||||
let windowsVisibleOverlayForegroundPollInterval: ReturnType<typeof setInterval> | null = null;
|
||||
let lastWindowsVisibleOverlayForegroundProcessName: string | null = null;
|
||||
let lastWindowsVisibleOverlayBlurredAtMs = 0;
|
||||
let linuxMpvFullscreenOverlayRefreshTimeouts: Array<ReturnType<typeof setTimeout>> = [];
|
||||
|
||||
function clearWindowsVisibleOverlayBlurRefreshTimeouts(): void {
|
||||
for (const timeout of windowsVisibleOverlayBlurRefreshTimeouts) {
|
||||
@@ -1933,6 +1935,48 @@ function clearWindowsVisibleOverlayZOrderRetryTimeouts(): void {
|
||||
windowsVisibleOverlayZOrderRetryTimeouts = [];
|
||||
}
|
||||
|
||||
function clearLinuxMpvFullscreenOverlayRefreshTimeouts(): void {
|
||||
for (const timeout of linuxMpvFullscreenOverlayRefreshTimeouts) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
linuxMpvFullscreenOverlayRefreshTimeouts = [];
|
||||
}
|
||||
|
||||
function refreshLinuxVisibleOverlayAfterMpvFullscreenChange(): void {
|
||||
if (process.platform !== 'linux' || !overlayManager.getVisibleOverlayVisible()) {
|
||||
return;
|
||||
}
|
||||
|
||||
overlayVisibilityRuntime.updateVisibleOverlayVisibility();
|
||||
|
||||
const mainWindow = overlayManager.getMainWindow();
|
||||
if (!mainWindow || mainWindow.isDestroyed() || !mainWindow.isVisible()) {
|
||||
return;
|
||||
}
|
||||
|
||||
mainWindow.hide();
|
||||
mainWindow.showInactive();
|
||||
ensureOverlayWindowLevel(mainWindow);
|
||||
}
|
||||
|
||||
function scheduleLinuxVisibleOverlayFullscreenRefreshBurst(): void {
|
||||
if (process.platform !== 'linux') {
|
||||
return;
|
||||
}
|
||||
|
||||
clearLinuxMpvFullscreenOverlayRefreshTimeouts();
|
||||
for (const delayMs of LINUX_MPV_FULLSCREEN_OVERLAY_REFRESH_DELAYS_MS) {
|
||||
const refreshTimeout = setTimeout(() => {
|
||||
linuxMpvFullscreenOverlayRefreshTimeouts = linuxMpvFullscreenOverlayRefreshTimeouts.filter(
|
||||
(timeout) => timeout !== refreshTimeout,
|
||||
);
|
||||
refreshLinuxVisibleOverlayAfterMpvFullscreenChange();
|
||||
}, delayMs);
|
||||
refreshTimeout.unref?.();
|
||||
linuxMpvFullscreenOverlayRefreshTimeouts.push(refreshTimeout);
|
||||
}
|
||||
}
|
||||
|
||||
function getWindowsNativeWindowHandle(window: BrowserWindow): string {
|
||||
const handle = window.getNativeWindowHandle();
|
||||
return handle.length >= 8
|
||||
@@ -3806,6 +3850,9 @@ const {
|
||||
}
|
||||
lastObservedTimePos = time;
|
||||
},
|
||||
onFullscreenChange: () => {
|
||||
scheduleLinuxVisibleOverlayFullscreenRefreshBurst();
|
||||
},
|
||||
onSubtitleTrackChange: (sid) => {
|
||||
scheduleSubtitlePrefetchRefresh();
|
||||
youtubePrimarySubtitleNotificationRuntime.handleSubtitleTrackChange(sid);
|
||||
@@ -4046,10 +4093,18 @@ const buildUpdateVisibleOverlayBoundsMainDepsHandler =
|
||||
createBuildUpdateVisibleOverlayBoundsMainDepsHandler({
|
||||
setOverlayWindowBounds: (geometry) => applyOverlayRegions(geometry),
|
||||
afterSetOverlayWindowBounds: () => {
|
||||
if (process.platform !== 'win32' || !overlayManager.getVisibleOverlayVisible()) {
|
||||
if (!overlayManager.getVisibleOverlayVisible()) {
|
||||
return;
|
||||
}
|
||||
scheduleWindowsVisibleOverlayZOrderSyncBurst();
|
||||
if (process.platform === 'win32') {
|
||||
scheduleWindowsVisibleOverlayZOrderSyncBurst();
|
||||
return;
|
||||
}
|
||||
const mainWindow = overlayManager.getMainWindow();
|
||||
if (!mainWindow || mainWindow.isDestroyed()) {
|
||||
return;
|
||||
}
|
||||
ensureOverlayWindowLevel(mainWindow);
|
||||
},
|
||||
});
|
||||
const updateVisibleOverlayBoundsMainDeps = buildUpdateVisibleOverlayBoundsMainDepsHandler();
|
||||
|
||||
@@ -128,6 +128,7 @@ test('mpv event bindings register all expected events', () => {
|
||||
onTimePosChange: () => {},
|
||||
onDurationChange: () => {},
|
||||
onPauseChange: () => {},
|
||||
onFullscreenChange: () => {},
|
||||
onSubtitleMetricsChange: () => {},
|
||||
onSecondarySubtitleVisibility: () => {},
|
||||
});
|
||||
@@ -151,6 +152,7 @@ test('mpv event bindings register all expected events', () => {
|
||||
'time-pos-change',
|
||||
'duration-change',
|
||||
'pause-change',
|
||||
'fullscreen-change',
|
||||
'subtitle-metrics-change',
|
||||
'secondary-subtitle-visibility',
|
||||
]);
|
||||
|
||||
@@ -11,6 +11,7 @@ type MpvBindingEventName =
|
||||
| 'time-pos-change'
|
||||
| 'duration-change'
|
||||
| 'pause-change'
|
||||
| 'fullscreen-change'
|
||||
| 'subtitle-metrics-change'
|
||||
| 'secondary-subtitle-visibility';
|
||||
|
||||
@@ -83,6 +84,7 @@ export function createBindMpvClientEventHandlers(deps: {
|
||||
onTimePosChange: (payload: { time: number }) => void;
|
||||
onDurationChange: (payload: { duration: number }) => void;
|
||||
onPauseChange: (payload: { paused: boolean }) => void;
|
||||
onFullscreenChange: (payload: { fullscreen: boolean }) => void;
|
||||
onSubtitleMetricsChange: (payload: { patch: Record<string, unknown> }) => void;
|
||||
onSecondarySubtitleVisibility: (payload: { visible: boolean }) => void;
|
||||
}) {
|
||||
@@ -99,6 +101,7 @@ export function createBindMpvClientEventHandlers(deps: {
|
||||
mpvClient.on('time-pos-change', deps.onTimePosChange);
|
||||
mpvClient.on('duration-change', deps.onDurationChange);
|
||||
mpvClient.on('pause-change', deps.onPauseChange);
|
||||
mpvClient.on('fullscreen-change', deps.onFullscreenChange);
|
||||
mpvClient.on('subtitle-metrics-change', deps.onSubtitleMetricsChange);
|
||||
mpvClient.on('secondary-subtitle-visibility', deps.onSecondarySubtitleVisibility);
|
||||
};
|
||||
|
||||
@@ -68,6 +68,7 @@ export function createBindMpvMainEventHandlersHandler(deps: {
|
||||
recordMediaDuration: (durationSec: number) => void;
|
||||
reportJellyfinRemoteProgress: (forceImmediate: boolean) => void;
|
||||
onTimePosUpdate?: (time: number) => void;
|
||||
onFullscreenChange?: (fullscreen: boolean) => void;
|
||||
recordPauseState: (paused: boolean) => void;
|
||||
|
||||
updateSubtitleRenderMetrics: (patch: Record<string, unknown>) => void;
|
||||
@@ -177,6 +178,7 @@ export function createBindMpvMainEventHandlersHandler(deps: {
|
||||
onTimePosChange: handleMpvTimePosChange,
|
||||
onDurationChange: ({ duration }) => deps.recordMediaDuration(duration),
|
||||
onPauseChange: handleMpvPauseChange,
|
||||
onFullscreenChange: ({ fullscreen }) => deps.onFullscreenChange?.(fullscreen),
|
||||
onSubtitleMetricsChange: handleMpvSubtitleMetricsChange,
|
||||
onSecondarySubtitleVisibility: handleMpvSecondarySubtitleVisibility,
|
||||
})(mpvClient);
|
||||
|
||||
@@ -57,6 +57,7 @@ test('mpv main event main deps map app state updates and delegate callbacks', as
|
||||
updateCurrentMediaTitle: (title) => calls.push(`title:${title}`),
|
||||
resetAnilistMediaGuessState: () => calls.push('reset-guess'),
|
||||
reportJellyfinRemoteProgress: (forceImmediate) => calls.push(`progress:${forceImmediate}`),
|
||||
onFullscreenChange: (fullscreen) => calls.push(`fullscreen:${fullscreen}`),
|
||||
updateSubtitleRenderMetrics: () => calls.push('metrics'),
|
||||
refreshDiscordPresence: () => calls.push('presence-refresh'),
|
||||
})();
|
||||
@@ -95,6 +96,7 @@ test('mpv main event main deps map app state updates and delegate callbacks', as
|
||||
deps.notifyImmersionTitleUpdate('title');
|
||||
deps.recordPlaybackPosition(10);
|
||||
deps.reportJellyfinRemoteProgress(true);
|
||||
deps.onFullscreenChange?.(true);
|
||||
deps.recordPauseState(true);
|
||||
deps.updateSubtitleRenderMetrics({});
|
||||
deps.setPreviousSecondarySubVisibility(true);
|
||||
@@ -112,6 +114,7 @@ test('mpv main event main deps map app state updates and delegate callbacks', as
|
||||
assert.ok(calls.includes('sync-immersion'));
|
||||
assert.ok(calls.includes('autoplay:/tmp/video'));
|
||||
assert.ok(calls.includes('metrics'));
|
||||
assert.ok(calls.includes('fullscreen:true'));
|
||||
assert.ok(calls.includes('presence-refresh'));
|
||||
assert.ok(calls.includes('restore-mpv-sub'));
|
||||
assert.ok(calls.includes('reset-sidebar-layout'));
|
||||
|
||||
@@ -60,6 +60,7 @@ export function createBuildBindMpvMainEventHandlersMainDepsHandler(deps: {
|
||||
resetAnilistMediaGuessState: () => void;
|
||||
reportJellyfinRemoteProgress: (forceImmediate: boolean) => void;
|
||||
onTimePosUpdate?: (time: number) => void;
|
||||
onFullscreenChange?: (fullscreen: boolean) => void;
|
||||
updateSubtitleRenderMetrics: (patch: Record<string, unknown>) => void;
|
||||
refreshDiscordPresence: () => void;
|
||||
ensureImmersionTrackerInitialized: () => void;
|
||||
@@ -176,6 +177,9 @@ export function createBuildBindMpvMainEventHandlersMainDepsHandler(deps: {
|
||||
onTimePosUpdate: deps.onTimePosUpdate
|
||||
? (time: number) => deps.onTimePosUpdate!(time)
|
||||
: undefined,
|
||||
onFullscreenChange: deps.onFullscreenChange
|
||||
? (fullscreen: boolean) => deps.onFullscreenChange!(fullscreen)
|
||||
: undefined,
|
||||
recordPauseState: (paused: boolean) => {
|
||||
deps.appState.playbackPaused = paused;
|
||||
deps.ensureImmersionTrackerInitialized();
|
||||
|
||||
@@ -1315,6 +1315,74 @@ test('window resize ignores synthetic subtitle enter until the pointer moves aga
|
||||
}
|
||||
});
|
||||
|
||||
test('window resize allows primary hover pause from a real mouseenter over subtitles', async () => {
|
||||
const ctx = createMouseTestContext();
|
||||
const originalWindow = globalThis.window;
|
||||
const originalDocument = globalThis.document;
|
||||
const mpvCommands: Array<(string | number)[]> = [];
|
||||
const windowListeners = new Map<string, Array<() => void>>();
|
||||
ctx.platform.shouldToggleMouseIgnore = true;
|
||||
|
||||
Object.defineProperty(globalThis, 'window', {
|
||||
configurable: true,
|
||||
value: {
|
||||
electronAPI: {
|
||||
setIgnoreMouseEvents: () => {},
|
||||
},
|
||||
addEventListener: (type: string, listener: () => void) => {
|
||||
const bucket = windowListeners.get(type) ?? [];
|
||||
bucket.push(listener);
|
||||
windowListeners.set(type, bucket);
|
||||
},
|
||||
getComputedStyle: () => ({
|
||||
visibility: 'hidden',
|
||||
display: 'none',
|
||||
opacity: '0',
|
||||
}),
|
||||
focus: () => {},
|
||||
innerHeight: 1000,
|
||||
},
|
||||
});
|
||||
Object.defineProperty(globalThis, 'document', {
|
||||
configurable: true,
|
||||
value: {
|
||||
addEventListener: () => {},
|
||||
elementFromPoint: () => ctx.dom.subtitleContainer,
|
||||
querySelectorAll: () => [],
|
||||
body: {},
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
const handlers = createMouseHandlers(ctx as never, {
|
||||
modalStateReader: {
|
||||
isAnySettingsModalOpen: () => false,
|
||||
isAnyModalOpen: () => false,
|
||||
},
|
||||
applyYPercent: () => {},
|
||||
getCurrentYPercent: () => 10,
|
||||
persistSubtitlePositionPatch: () => {},
|
||||
getSubtitleHoverAutoPauseEnabled: () => true,
|
||||
getYomitanPopupAutoPauseEnabled: () => false,
|
||||
getPlaybackPaused: async () => false,
|
||||
sendMpvCommand: (command) => {
|
||||
mpvCommands.push(command);
|
||||
},
|
||||
});
|
||||
|
||||
handlers.setupResizeHandler();
|
||||
for (const listener of windowListeners.get('resize') ?? []) {
|
||||
listener();
|
||||
}
|
||||
|
||||
await handlers.handlePrimaryMouseEnter({ clientX: 120, clientY: 240 } as MouseEvent);
|
||||
assert.deepEqual(mpvCommands, [['set_property', 'pause', 'yes']]);
|
||||
} finally {
|
||||
Object.defineProperty(globalThis, 'window', { configurable: true, value: originalWindow });
|
||||
Object.defineProperty(globalThis, 'document', { configurable: true, value: originalDocument });
|
||||
}
|
||||
});
|
||||
|
||||
test('visibility recovery keeps overlay click-through when pointer is not over subtitles', () => {
|
||||
const ctx = createMouseTestContext();
|
||||
const originalWindow = globalThis.window;
|
||||
|
||||
@@ -300,12 +300,15 @@ export function createMouseHandlers(
|
||||
}
|
||||
|
||||
async function handleMouseEnter(
|
||||
_event?: MouseEvent,
|
||||
event?: MouseEvent,
|
||||
showSecondaryHover = false,
|
||||
source: 'direct' | 'tracked-pointer' = 'direct',
|
||||
): Promise<void> {
|
||||
if (source === 'direct' && suppressDirectHoverEnterSource !== null) {
|
||||
return;
|
||||
if (!event || !syncHoverStateFromPoint(event.clientX, event.clientY).isOverSubtitle) {
|
||||
return;
|
||||
}
|
||||
suppressDirectHoverEnterSource = null;
|
||||
}
|
||||
|
||||
ctx.state.isOverSubtitle = true;
|
||||
|
||||
@@ -793,6 +793,14 @@ body.settings-modal-open [data-subminer-yomitan-popup-host='true'] {
|
||||
color: var(--subtitle-name-match-color, #f5bde6);
|
||||
}
|
||||
|
||||
#subtitleRoot .word.word-jlpt-n1 {
|
||||
text-decoration-line: underline;
|
||||
text-decoration-thickness: 2px;
|
||||
text-underline-offset: 4px;
|
||||
text-decoration-color: var(--subtitle-jlpt-n1-color, #ed8796);
|
||||
text-decoration-style: solid;
|
||||
}
|
||||
|
||||
#subtitleRoot
|
||||
.word.word-jlpt-n1:not(
|
||||
:is(
|
||||
@@ -814,6 +822,14 @@ body.settings-modal-open [data-subminer-yomitan-popup-host='true'] {
|
||||
color: var(--subtitle-jlpt-n1-color, #ed8796);
|
||||
}
|
||||
|
||||
#subtitleRoot .word.word-jlpt-n2 {
|
||||
text-decoration-line: underline;
|
||||
text-decoration-thickness: 2px;
|
||||
text-underline-offset: 4px;
|
||||
text-decoration-color: var(--subtitle-jlpt-n2-color, #f5a97f);
|
||||
text-decoration-style: solid;
|
||||
}
|
||||
|
||||
#subtitleRoot
|
||||
.word.word-jlpt-n2:not(
|
||||
:is(
|
||||
@@ -835,6 +851,14 @@ body.settings-modal-open [data-subminer-yomitan-popup-host='true'] {
|
||||
color: var(--subtitle-jlpt-n2-color, #f5a97f);
|
||||
}
|
||||
|
||||
#subtitleRoot .word.word-jlpt-n3 {
|
||||
text-decoration-line: underline;
|
||||
text-decoration-thickness: 2px;
|
||||
text-underline-offset: 4px;
|
||||
text-decoration-color: var(--subtitle-jlpt-n3-color, #f9e2af);
|
||||
text-decoration-style: solid;
|
||||
}
|
||||
|
||||
#subtitleRoot
|
||||
.word.word-jlpt-n3:not(
|
||||
:is(
|
||||
@@ -856,6 +880,14 @@ body.settings-modal-open [data-subminer-yomitan-popup-host='true'] {
|
||||
color: var(--subtitle-jlpt-n3-color, #f9e2af);
|
||||
}
|
||||
|
||||
#subtitleRoot .word.word-jlpt-n4 {
|
||||
text-decoration-line: underline;
|
||||
text-decoration-thickness: 2px;
|
||||
text-underline-offset: 4px;
|
||||
text-decoration-color: var(--subtitle-jlpt-n4-color, #a6e3a1);
|
||||
text-decoration-style: solid;
|
||||
}
|
||||
|
||||
#subtitleRoot
|
||||
.word.word-jlpt-n4:not(
|
||||
:is(
|
||||
@@ -877,6 +909,14 @@ body.settings-modal-open [data-subminer-yomitan-popup-host='true'] {
|
||||
color: var(--subtitle-jlpt-n4-color, #a6e3a1);
|
||||
}
|
||||
|
||||
#subtitleRoot .word.word-jlpt-n5 {
|
||||
text-decoration-line: underline;
|
||||
text-decoration-thickness: 2px;
|
||||
text-underline-offset: 4px;
|
||||
text-decoration-color: var(--subtitle-jlpt-n5-color, #8aadf4);
|
||||
text-decoration-style: solid;
|
||||
}
|
||||
|
||||
#subtitleRoot
|
||||
.word.word-jlpt-n5:not(
|
||||
:is(
|
||||
|
||||
@@ -901,6 +901,14 @@ test('subtitle annotation CSS changes token color without overriding typography'
|
||||
for (let level = 1; level <= 5; level += 1) {
|
||||
const plainJlptBlock = extractClassBlock(cssText, `#subtitleRoot .word.word-jlpt-n${level}`);
|
||||
assert.doesNotMatch(plainJlptBlock, /(?:^|\n)\s*color\s*:/m);
|
||||
assert.match(plainJlptBlock, /text-decoration-line:\s*underline;/);
|
||||
assert.match(plainJlptBlock, /text-decoration-thickness:\s*2px;/);
|
||||
assert.match(plainJlptBlock, /text-underline-offset:\s*4px;/);
|
||||
assert.match(
|
||||
plainJlptBlock,
|
||||
new RegExp(`text-decoration-color:\\s*var\\(--subtitle-jlpt-n${level}-color,`),
|
||||
);
|
||||
assert.match(plainJlptBlock, /text-decoration-style:\s*solid;/);
|
||||
|
||||
const block = extractClassBlock(cssText, buildJlptColorSelector(level));
|
||||
assert.ok(block.length > 0, `word-jlpt-n${level} class should exist`);
|
||||
|
||||
@@ -282,6 +282,26 @@ function isExcludedByTagSet(normalizedTag: string, exclusions: ReadonlySet<strin
|
||||
return parts.every((part) => exclusions.has(part));
|
||||
}
|
||||
|
||||
function isKanaChar(char: string): boolean {
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (
|
||||
(code >= 0x3041 && code <= 0x3096) ||
|
||||
(code >= 0x309b && code <= 0x309f) ||
|
||||
code === 0x30fc ||
|
||||
(code >= 0x30a0 && code <= 0x30fa) ||
|
||||
(code >= 0x30fd && code <= 0x30ff)
|
||||
);
|
||||
}
|
||||
|
||||
function isKanaOnlyText(text: string): boolean {
|
||||
const normalized = text.trim();
|
||||
return normalized.length > 0 && Array.from(normalized).every((char) => isKanaChar(char));
|
||||
}
|
||||
|
||||
export function isNPlusOneCandidateToken(
|
||||
token: MergedToken,
|
||||
pos1Exclusions: ReadonlySet<string> = N_PLUS_ONE_IGNORED_POS1,
|
||||
@@ -290,6 +310,9 @@ export function isNPlusOneCandidateToken(
|
||||
if (token.isKnown) {
|
||||
return false;
|
||||
}
|
||||
if (isKanaOnlyText(token.surface)) {
|
||||
return false;
|
||||
}
|
||||
return isNPlusOneWordCountToken(token, pos1Exclusions, pos2Exclusions);
|
||||
}
|
||||
|
||||
@@ -347,22 +370,40 @@ function isSentenceBoundaryToken(token: MergedToken): boolean {
|
||||
return SENTENCE_BOUNDARY_SURFACES.has(token.surface);
|
||||
}
|
||||
|
||||
function hasSentenceBoundaryInSourceGap(
|
||||
sourceText: string | undefined,
|
||||
previousEnd: number | null,
|
||||
nextStart: number,
|
||||
): boolean {
|
||||
if (typeof sourceText !== 'string' || previousEnd === null || nextStart <= previousEnd) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const gap = sourceText.slice(previousEnd, nextStart);
|
||||
return [...gap].some((char) => SENTENCE_BOUNDARY_SURFACES.has(char));
|
||||
}
|
||||
|
||||
export function markNPlusOneTargets(
|
||||
tokens: MergedToken[],
|
||||
minSentenceWords = 3,
|
||||
pos1Exclusions: ReadonlySet<string> = N_PLUS_ONE_IGNORED_POS1,
|
||||
pos2Exclusions: ReadonlySet<string> = N_PLUS_ONE_IGNORED_POS2,
|
||||
sourceText?: string,
|
||||
): MergedToken[] {
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const normalizedSourceText =
|
||||
typeof sourceText === 'string' ? sourceText.replace(/\r?\n/g, ' ').trim() : undefined;
|
||||
|
||||
const markedTokens = tokens.map((token) => ({
|
||||
...token,
|
||||
isNPlusOneTarget: false,
|
||||
}));
|
||||
|
||||
let sentenceStart = 0;
|
||||
let previousTokenEnd: number | null = null;
|
||||
const minimumSentenceWords = Number.isInteger(minSentenceWords)
|
||||
? Math.max(1, minSentenceWords)
|
||||
: 3;
|
||||
@@ -393,10 +434,15 @@ export function markNPlusOneTargets(
|
||||
for (let i = 0; i < markedTokens.length; i++) {
|
||||
const token = markedTokens[i];
|
||||
if (!token) continue;
|
||||
if (hasSentenceBoundaryInSourceGap(normalizedSourceText, previousTokenEnd, token.startPos)) {
|
||||
markSentence(sentenceStart, i);
|
||||
sentenceStart = i;
|
||||
}
|
||||
if (isSentenceBoundaryToken(token)) {
|
||||
markSentence(sentenceStart, i);
|
||||
sentenceStart = i + 1;
|
||||
}
|
||||
previousTokenEnd = token.endPos;
|
||||
}
|
||||
|
||||
if (sentenceStart < markedTokens.length) {
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import {
|
||||
isHyprlandGeometryEvent,
|
||||
parseHyprctlClients,
|
||||
resolveHyprlandWindowGeometry,
|
||||
selectHyprlandMpvWindow,
|
||||
type HyprlandClient,
|
||||
type HyprlandMonitor,
|
||||
} from './hyprland-tracker';
|
||||
|
||||
function makeClient(overrides: Partial<HyprlandClient> = {}): HyprlandClient {
|
||||
@@ -19,6 +22,17 @@ function makeClient(overrides: Partial<HyprlandClient> = {}): HyprlandClient {
|
||||
};
|
||||
}
|
||||
|
||||
function makeMonitor(overrides: Partial<HyprlandMonitor> = {}): HyprlandMonitor {
|
||||
return {
|
||||
id: 0,
|
||||
x: 0,
|
||||
y: 0,
|
||||
width: 1920,
|
||||
height: 1080,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
test('selectHyprlandMpvWindow ignores hidden and unmapped mpv clients', () => {
|
||||
const selected = selectHyprlandMpvWindow(
|
||||
[
|
||||
@@ -106,3 +120,32 @@ test('parseHyprctlClients tolerates non-json prefix output', () => {
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test('isHyprlandGeometryEvent treats fullscreenv2 as a geometry-changing event', () => {
|
||||
assert.equal(isHyprlandGeometryEvent('fullscreenv2'), true);
|
||||
assert.equal(isHyprlandGeometryEvent('workspacev2'), true);
|
||||
assert.equal(isHyprlandGeometryEvent('activewindowv2'), false);
|
||||
});
|
||||
|
||||
test('resolveHyprlandWindowGeometry uses monitor bounds for fullscreen clients', () => {
|
||||
const geometry = resolveHyprlandWindowGeometry(
|
||||
makeClient({
|
||||
at: [60, 80],
|
||||
size: [1280, 720],
|
||||
monitor: 1,
|
||||
fullscreen: 2,
|
||||
fullscreenClient: 2,
|
||||
}),
|
||||
[
|
||||
makeMonitor({ id: 0, x: 0, y: 0, width: 1920, height: 1080 }),
|
||||
makeMonitor({ id: 1, x: 1920, y: 0, width: 2560, height: 1440 }),
|
||||
],
|
||||
);
|
||||
|
||||
assert.deepEqual(geometry, {
|
||||
x: 1920,
|
||||
y: 0,
|
||||
width: 2560,
|
||||
height: 1440,
|
||||
});
|
||||
});
|
||||
|
||||
@@ -20,6 +20,7 @@ import * as net from 'net';
|
||||
import { execSync } from 'child_process';
|
||||
import { BaseWindowTracker } from './base-tracker';
|
||||
import { createLogger } from '../logger';
|
||||
import type { WindowGeometry } from '../types';
|
||||
|
||||
const log = createLogger('tracker').child('hyprland');
|
||||
|
||||
@@ -29,11 +30,22 @@ export interface HyprlandClient {
|
||||
initialClass?: string;
|
||||
at: [number, number];
|
||||
size: [number, number];
|
||||
monitor?: number;
|
||||
fullscreen?: number;
|
||||
fullscreenClient?: number;
|
||||
pid?: number;
|
||||
mapped?: boolean;
|
||||
hidden?: boolean;
|
||||
}
|
||||
|
||||
export interface HyprlandMonitor {
|
||||
id: number;
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
interface SelectHyprlandMpvWindowOptions {
|
||||
targetMpvSocketPath: string | null;
|
||||
activeWindowAddress: string | null;
|
||||
@@ -132,8 +144,73 @@ export function parseHyprctlClients(output: string): HyprlandClient[] | null {
|
||||
return parsed as HyprlandClient[];
|
||||
}
|
||||
|
||||
export function parseHyprctlMonitors(output: string): HyprlandMonitor[] | null {
|
||||
const jsonPayload = extractHyprctlJsonPayload(output);
|
||||
if (!jsonPayload) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(jsonPayload) as unknown;
|
||||
if (!Array.isArray(parsed)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return parsed as HyprlandMonitor[];
|
||||
}
|
||||
|
||||
function isHyprlandFullscreenClient(client: HyprlandClient): boolean {
|
||||
return (client.fullscreen ?? 0) > 0;
|
||||
}
|
||||
|
||||
export function resolveHyprlandWindowGeometry(
|
||||
client: HyprlandClient,
|
||||
monitors: HyprlandMonitor[] | null,
|
||||
): WindowGeometry {
|
||||
if (isHyprlandFullscreenClient(client) && typeof client.monitor === 'number') {
|
||||
const monitor = monitors?.find((candidate) => candidate.id === client.monitor);
|
||||
if (monitor) {
|
||||
return {
|
||||
x: monitor.x,
|
||||
y: monitor.y,
|
||||
width: monitor.width,
|
||||
height: monitor.height,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
x: client.at[0],
|
||||
y: client.at[1],
|
||||
width: client.size[0],
|
||||
height: client.size[1],
|
||||
};
|
||||
}
|
||||
|
||||
export function isHyprlandGeometryEvent(name: string): boolean {
|
||||
return (
|
||||
name === 'movewindow' ||
|
||||
name === 'movewindowv2' ||
|
||||
name === 'resizewindow' ||
|
||||
name === 'resizewindowv2' ||
|
||||
name === 'windowtitle' ||
|
||||
name === 'windowtitlev2' ||
|
||||
name === 'openwindow' ||
|
||||
name === 'closewindow' ||
|
||||
name === 'fullscreen' ||
|
||||
name === 'fullscreenv2' ||
|
||||
name === 'changefloatingmode' ||
|
||||
name === 'workspace' ||
|
||||
name === 'workspacev2' ||
|
||||
name === 'focusedmon' ||
|
||||
name === 'monitoradded' ||
|
||||
name === 'monitoraddedv2' ||
|
||||
name === 'monitorremoved'
|
||||
);
|
||||
}
|
||||
|
||||
export class HyprlandWindowTracker extends BaseWindowTracker {
|
||||
private pollInterval: ReturnType<typeof setInterval> | null = null;
|
||||
private pollTimeouts: Array<ReturnType<typeof setTimeout>> = [];
|
||||
private eventSocket: net.Socket | null = null;
|
||||
private readonly targetMpvSocketPath: string | null;
|
||||
private activeWindowAddress: string | null = null;
|
||||
@@ -154,6 +231,10 @@ export class HyprlandWindowTracker extends BaseWindowTracker {
|
||||
clearInterval(this.pollInterval);
|
||||
this.pollInterval = null;
|
||||
}
|
||||
for (const timeout of this.pollTimeouts) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
this.pollTimeouts = [];
|
||||
if (this.eventSocket) {
|
||||
this.eventSocket.destroy();
|
||||
this.eventSocket = null;
|
||||
@@ -200,6 +281,9 @@ export class HyprlandWindowTracker extends BaseWindowTracker {
|
||||
}
|
||||
|
||||
const [name, rawData = ''] = trimmedEvent.split('>>', 2);
|
||||
if (!name) {
|
||||
return;
|
||||
}
|
||||
const data = rawData.trim();
|
||||
|
||||
if (name === 'activewindowv2') {
|
||||
@@ -212,17 +296,25 @@ export class HyprlandWindowTracker extends BaseWindowTracker {
|
||||
this.activeWindowAddress = null;
|
||||
}
|
||||
|
||||
if (
|
||||
name === 'movewindow' ||
|
||||
name === 'movewindowv2' ||
|
||||
name === 'windowtitle' ||
|
||||
name === 'windowtitlev2' ||
|
||||
name === 'openwindow' ||
|
||||
name === 'closewindow' ||
|
||||
name === 'fullscreen' ||
|
||||
name === 'changefloatingmode'
|
||||
) {
|
||||
this.pollGeometry();
|
||||
if (isHyprlandGeometryEvent(name)) {
|
||||
this.scheduleGeometryPollBurst();
|
||||
}
|
||||
}
|
||||
|
||||
private scheduleGeometryPollBurst(): void {
|
||||
this.pollGeometry();
|
||||
for (const timeout of this.pollTimeouts) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
this.pollTimeouts = [50, 150, 300].map((delayMs) => {
|
||||
const pollTimeout = setTimeout(() => {
|
||||
this.pollTimeouts = this.pollTimeouts.filter((timeout) => timeout !== pollTimeout);
|
||||
this.pollGeometry();
|
||||
}, delayMs);
|
||||
return pollTimeout;
|
||||
});
|
||||
for (const pollTimeout of this.pollTimeouts) {
|
||||
pollTimeout.unref?.();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -237,12 +329,9 @@ export class HyprlandWindowTracker extends BaseWindowTracker {
|
||||
const mpvWindow = this.findTargetWindow(clients);
|
||||
|
||||
if (mpvWindow) {
|
||||
this.updateGeometry({
|
||||
x: mpvWindow.at[0],
|
||||
y: mpvWindow.at[1],
|
||||
width: mpvWindow.size[0],
|
||||
height: mpvWindow.size[1],
|
||||
});
|
||||
this.updateGeometry(
|
||||
resolveHyprlandWindowGeometry(mpvWindow, this.getHyprlandMonitors(mpvWindow)),
|
||||
);
|
||||
} else {
|
||||
this.updateGeometry(null);
|
||||
}
|
||||
@@ -259,6 +348,19 @@ export class HyprlandWindowTracker extends BaseWindowTracker {
|
||||
});
|
||||
}
|
||||
|
||||
private getHyprlandMonitors(client: HyprlandClient): HyprlandMonitor[] | null {
|
||||
if (!isHyprlandFullscreenClient(client)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const output = execSync('hyprctl -j monitors', { encoding: 'utf-8' });
|
||||
return parseHyprctlMonitors(output);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private getWindowCommandLine(pid: number): string | null {
|
||||
const commandLine = execSync(`ps -p ${pid} -o args=`, {
|
||||
encoding: 'utf-8',
|
||||
|
||||
Reference in New Issue
Block a user