Files
SubMiner/plugin/subminer/process.lua
T
sudacode 430373f010 feat(tokenizer): use Yomitan word classes for subtitle POS filtering (#57)
* feat(tokenizer): use Yomitan word classes for subtitle POS filtering

- Carry matched headword wordClasses from termsFind into YomitanScanToken
- Map recognized Yomitan wordClasses to SubMiner coarse POS before annotation
- MeCab enrichment now fills only missing POS fields, preserving existing coarse pos1
- Exclude standalone grammar particles, して helper fragments, and single-kana surfaces from annotations
- Respect source-text punctuation gaps when counting N+1 sentence words
- Preserve known-word highlight on excluded kanji-containing tokens
- Add backlog tasks 304 (N+1 boundary bug) and 305 (wordClasses POS, done)

* fix(tokenizer): preserve annotation and enrichment behavior

* fix: restore jlpt subtitle underlines

* fix: exclude kana-only n+1 targets

* fix: refresh overlay on Hyprland fullscreen

* fix: address fullscreen and n-plus-one review notes

* fix: address CodeRabbit review comments

* fix: accept modified digits for multi-line sentence mining

* Cancel pending Linux MPV fullscreen overlay refresh bursts

- return a cancel handle from the Linux refresh burst scheduler
- clear pending refresh bursts when overlays hide or windows close
- tighten the burst test polling to wait for the async refresh

* fix: suppress N+1 for kana-only candidates and fix minSentenceWords coun

- Treat kana-only tokens with surrounding subtitle punctuation (…, ―, etc.) as kana-only so they are not promoted to N+1 targets
- Exclude unknown tokens filtered from N+1 targeting from the minSentenceWords count so filtered kana-only unknowns cannot satisfy sentence length threshold
- Add regression tests for kana-only candidate suppression and filtered-unknown padding cases

* Suppress subtitle annotations for grammar fragments

- Hide annotation metadata for auxiliary inflection and ja-nai endings
- Preserve lexical `くれる` forms and add regression coverage

* Fix kana-only N+1 tokenizer regression test

- Use a pure-kana fixture for the subtitle token N+1 case
- Update task notes for the latest CodeRabbit follow-up

* Fix managed playback exit and tokenizer grammar splits

- Ignore background stats daemons during regular app startup
- Split standalone grammar endings before applying annotations
- Clear helper-span annotations for auxiliary-only tokens

* fix: refresh current subtitle after known-word mining

* fix: suppress sigh interjection annotations

* fix: preserve jlpt underline color after lookup

* Replace grammar-ending permutations with shared matcher; preserve word a

- Extract `grammar-ending.ts` with `isStandaloneGrammarEndingText` / `isSubtitleGrammarEndingText` pattern matchers
- Replace `STANDALONE_GRAMMAR_ENDINGS` set in parser-selection-stage with shared matcher
- Replace generated phrase sets in subtitle-annotation-filter with shared matcher
- Remove stale duplicate subtitle-exclusion constants and helpers from annotation-stage
- Manual clipboard card updates now write only to the sentence audio field, leaving word/expression audio untouched

* fix: CI changelog, annotation options threading, and Jellyfin quit

- Add `type: fixed` / `area:` frontmatter to `changes/319` to pass `changelog:lint`
- Thread `TokenizerAnnotationOptions` through `stripSubtitleAnnotationMetadata` so `sourceText` is honored
- Include `jellyfinPlay` in `shouldQuitOnDisconnectWhenOverlayRuntimeInitialized` predicate
- Make mouse test `elementFromPoint` stubs coordinate-sensitive
- Make Lua test `.tmp` mkdir portable on Windows

* Preserve overlay across macOS flaps and mpv playlist changes

- keep visible overlays alive during transient macOS tracker loss
- reuse the running mpv overlay path on playlist navigation
- update regression coverage and changelog fragments

* fix: restore stats daemon deferral

* fix: keep subtitle prefetch alive after cache hits

* Fix JLPT underline color drift and AniList skipped-threshold sync

- Replace JLPT `text-decoration` underlines with `border-bottom` so Chromium selection/hover cannot repaint them to another annotation's color
- Lock JLPT underline color for combined annotation selectors (known, n+1, frequency) and character hover/selection states
- Trigger AniList post-watch check on every mpv time-position update to catch skipped completion thresholds
- Fall back to filename-parser season/episode when guessit omits them

* fix: address coderabbit feedback

* fix: sync AniList after seeked completion

* fix: preserve ordinal frequency annotations

* fix: preserve known highlighting for filtered tokens

* fix: address PR #57 CodeRabbit feedback

- Acquire AniList post-watch in-flight lock before async gating to prevent duplicate writes
- Isolate manual watched mark result from AniList post-watch callback failures
- Report known-word cache clears as mutations during immediate append when state existed
- Add regression tests for each fix

* fix: stop AniList setup reopening on Linux when keyring token exists

- Gate setup success on token persistence: `saveToken` now returns `boolean`; on failure, keeps the setup window open instead of reporting success
- Config reload passes `allowSetupPrompt: false` so playback reloads don't re-open the setup window
- Add regression test for persistence-failure path

* fix: suppress known highlights for subtitle particles

* fix: retry transient AniList safeStorage failures

* fix: hide overlay focus ring

* fix: align Hyprland fullscreen overlays

* fix: restore subtitle playback keybindings

* fix: align Hyprland overlay windows to mpv and stop pinning them

- Force-apply exact Hyprland move/resize/setprop dispatches when bounds are provided
- Stop pinning overlay windows; toggle pin off when Hyprland reports pinned=true
- Compensate stats overlay outer placement for Electron/Wayland content insets
- Make stats overlay window and page opaque so mpv cannot show through transparent insets
- Constrain stats app to h-screen with internal scroll so content covers mpv from y=0
- Lock overlay/stats window titles against page-title-updated events
- Add regression coverage for placement dispatches, inset compensation, and CSS overlay mode

* fix: retain frequency rank for honorific prefix-noun tokens

- Add `shouldAllowHonorificPrefixNounFrequency` to exempt お/ご/御 + noun merged tokens from frequency exclusion
- Add regression test for `ご機嫌` asserting rank 5484 is preserved after MeCab enrichment and annotation
- Close TASK-341

* fix: map openCharacterDictionary session action to --open-character-dict

- Add missing Lua CLI dispatch entry for openCharacterDictionary
- Add regression test for Alt+Meta+A binding and CLI flag forwarding

* fix: keep macOS overlay interactive while mpv remains active

- Overlay no longer hides or becomes click-through during tracker refreshes when mpv is the focused window
- Preserve already-visible overlay when tracker is temporarily not ready but mpv target signal is active
- Add regression tests for active-mpv tracker refresh and transient tracker-not-ready paths

* fix: address coderabbit subtitle follow-ups

* fix: resolve media detail from sessions when lifetime summary is absent

- Change `getMediaDetail` JOIN to LEFT JOIN on `imm_lifetime_media` and fall back to aggregated session metrics when no lifetime row exists
- Add filter `AND (lm.video_id IS NOT NULL OR s.session_id IS NOT NULL)` to keep results valid
- Add regression test covering the session-visible / media-detail-missing mismatch

* fix: address PR-57 CodeRabbit findings and CI failures

- use filtered word counts in media detail session token aggregation
- cancel fullscreen refresh burst on exit via updateLinuxMpvFullscreenOverlayRefreshBurst
- guard Hyprland JSON.parse in try/catch; exclude windowtitle from geometry events
- narrow focus suppression from :focus to :focus-visible
- apply JLPT lock selectors to word-name-match tokens (N1–N5)

* fix: macOS overlay z-order and Yomitan compound token known highlighting

- Release always-on-top when tracked mpv loses foreground on macOS
- Skip visible overlay blur restacking on macOS to avoid covering unrelated windows
- Prefer Yomitan internal parse tokens over fragmented scanner output for known-word decisions
- Add regression tests for both behaviors

* fix: macOS visible-overlay blur no longer invokes Windows-only blur call

- Split win32/darwin branches in handleOverlayWindowBlurred so darwin visible blur returns early without calling onWindowsVisibleOverlayBlur
- Add regression test asserting Windows callback stays inactive on macOS visible overlay blur
- Close TASK-347
2026-05-12 12:08:09 -07:00

577 lines
17 KiB
Lua

local M = {}
local OVERLAY_START_RETRY_DELAY_SECONDS = 0.2
local OVERLAY_START_MAX_ATTEMPTS = 6
local AUTO_PLAY_READY_LOADING_OSD = "Loading subtitle tokenization..."
local AUTO_PLAY_READY_READY_OSD = "Subtitle tokenization ready"
local DEFAULT_AUTO_PLAY_READY_TIMEOUT_SECONDS = 15
function M.create(ctx)
local mp = ctx.mp
local opts = ctx.opts
local state = ctx.state
local binary = ctx.binary
local environment = ctx.environment
local options_helper = ctx.options_helper
local subminer_log = ctx.log.subminer_log
local show_osd = ctx.log.show_osd
local normalize_log_level = ctx.log.normalize_log_level
local run_control_command_async
local function resolve_visible_overlay_startup()
local raw_visible_overlay = opts.auto_start_visible_overlay
if raw_visible_overlay == nil then
raw_visible_overlay = opts["auto-start-visible-overlay"]
end
return options_helper.coerce_bool(raw_visible_overlay, false)
end
local function resolve_pause_until_ready()
local raw_pause_until_ready = opts.auto_start_pause_until_ready
if raw_pause_until_ready == nil then
raw_pause_until_ready = opts["auto-start-pause-until-ready"]
end
return options_helper.coerce_bool(raw_pause_until_ready, false)
end
local function resolve_texthooker_enabled(override_value)
if override_value ~= nil then
return options_helper.coerce_bool(override_value, false)
end
local raw_texthooker_enabled = opts.texthooker_enabled
if raw_texthooker_enabled == nil then
raw_texthooker_enabled = opts["texthooker-enabled"]
end
return options_helper.coerce_bool(raw_texthooker_enabled, false)
end
local function resolve_pause_until_ready_timeout_seconds()
local raw_timeout_seconds = opts.auto_start_pause_until_ready_timeout_seconds
if raw_timeout_seconds == nil then
raw_timeout_seconds = opts["auto-start-pause-until-ready-timeout-seconds"]
end
if type(raw_timeout_seconds) == "number" then
return raw_timeout_seconds
end
if type(raw_timeout_seconds) == "string" then
local parsed = tonumber(raw_timeout_seconds)
if parsed ~= nil then
return parsed
end
end
return DEFAULT_AUTO_PLAY_READY_TIMEOUT_SECONDS
end
local function normalize_socket_path(path)
if type(path) ~= "string" then
return nil
end
local trimmed = path:match("^%s*(.-)%s*$")
if trimmed == "" then
return nil
end
return trimmed
end
local function has_matching_mpv_ipc_socket(target_socket_path)
local expected_socket = normalize_socket_path(target_socket_path or opts.socket_path)
local active_socket = normalize_socket_path(mp.get_property("input-ipc-server"))
if expected_socket == nil or active_socket == nil then
return false
end
return expected_socket == active_socket
end
local function resolve_backend(override_backend)
local selected = override_backend
if selected == nil or selected == "" then
selected = opts.backend
end
if selected == "auto" then
return environment.detect_backend()
end
return selected
end
local function clear_auto_play_ready_timeout()
local timeout = state.auto_play_ready_timeout
if timeout and timeout.kill then
timeout:kill()
end
state.auto_play_ready_timeout = nil
end
local function clear_auto_play_ready_osd_timer()
local timer = state.auto_play_ready_osd_timer
if timer and timer.kill then
timer:kill()
end
state.auto_play_ready_osd_timer = nil
end
local function disarm_auto_play_ready_gate(options)
local should_resume = options == nil or options.resume_playback ~= false
local was_armed = state.auto_play_ready_gate_armed
clear_auto_play_ready_timeout()
clear_auto_play_ready_osd_timer()
state.auto_play_ready_gate_armed = false
if was_armed and should_resume then
mp.set_property_native("pause", false)
end
end
local function release_auto_play_ready_gate(reason)
if not state.auto_play_ready_gate_armed then
return
end
disarm_auto_play_ready_gate({ resume_playback = false })
mp.set_property_native("pause", false)
show_osd(AUTO_PLAY_READY_READY_OSD)
subminer_log("info", "process", "Resuming playback after startup gate: " .. tostring(reason or "ready"))
end
local function arm_auto_play_ready_gate()
if state.auto_play_ready_gate_armed then
clear_auto_play_ready_timeout()
clear_auto_play_ready_osd_timer()
end
state.auto_play_ready_gate_armed = true
mp.set_property_native("pause", true)
show_osd(AUTO_PLAY_READY_LOADING_OSD)
if type(mp.add_periodic_timer) == "function" then
state.auto_play_ready_osd_timer = mp.add_periodic_timer(2.5, function()
if state.auto_play_ready_gate_armed then
show_osd(AUTO_PLAY_READY_LOADING_OSD)
end
end)
end
subminer_log("info", "process", "Pausing playback until SubMiner overlay/tokenization readiness signal")
local timeout_seconds = resolve_pause_until_ready_timeout_seconds()
if timeout_seconds and timeout_seconds > 0 then
state.auto_play_ready_timeout = mp.add_timeout(timeout_seconds, function()
if not state.auto_play_ready_gate_armed then
return
end
subminer_log(
"warn",
"process",
"Startup readiness signal timed out; resuming playback to avoid stalled pause"
)
release_auto_play_ready_gate("timeout")
end)
end
end
local function notify_auto_play_ready()
release_auto_play_ready_gate("tokenization-ready")
if state.suppress_ready_overlay_restore then
return
end
if state.overlay_running and resolve_visible_overlay_startup() then
run_control_command_async("show-visible-overlay", {
socket_path = opts.socket_path,
})
end
end
local function build_command_args(action, overrides)
overrides = overrides or {}
local args = { state.binary_path }
table.insert(args, "--" .. action)
local log_level = normalize_log_level(overrides.log_level or opts.log_level)
if log_level ~= "info" then
table.insert(args, "--log-level")
table.insert(args, log_level)
end
if action == "start" then
table.insert(args, "--background")
table.insert(args, "--managed-playback")
local backend = resolve_backend(overrides.backend)
if backend and backend ~= "" then
table.insert(args, "--backend")
table.insert(args, backend)
end
local socket_path = overrides.socket_path or opts.socket_path
table.insert(args, "--socket")
table.insert(args, socket_path)
local should_show_visible = resolve_visible_overlay_startup()
if should_show_visible then
table.insert(args, "--show-visible-overlay")
else
table.insert(args, "--hide-visible-overlay")
end
local texthooker_enabled = resolve_texthooker_enabled(overrides.texthooker_enabled)
if texthooker_enabled then
table.insert(args, "--texthooker")
end
end
return args
end
run_control_command_async = function(action, overrides, callback)
local args = build_command_args(action, overrides)
subminer_log("debug", "process", "Control command: " .. table.concat(args, " "))
mp.command_native_async({
name = "subprocess",
args = args,
playback_only = false,
capture_stdout = true,
capture_stderr = true,
}, function(success, result, error)
local ok = success and (result == nil or result.status == 0)
if callback then
callback(ok, result, error)
end
end)
end
local function run_binary_command_async(args, callback)
subminer_log("debug", "process", "Binary command: " .. table.concat(args, " "))
mp.command_native_async({
name = "subprocess",
args = args,
playback_only = false,
capture_stdout = true,
capture_stderr = true,
}, function(success, result, error)
local ok = success and (result == nil or result.status == 0)
if callback then
callback(ok, result, error)
end
end)
end
local function parse_start_script_message_overrides(...)
local overrides = {}
for i = 1, select("#", ...) do
local token = select(i, ...)
if type(token) == "string" and token ~= "" then
local key, value = token:match("^([%w_%-]+)=(.+)$")
if key and value then
local normalized_key = key:lower()
if normalized_key == "backend" then
local backend = value:lower()
if backend == "auto" or backend == "hyprland" or backend == "sway" or backend == "x11" or backend == "macos" then
overrides.backend = backend
end
elseif normalized_key == "socket" or normalized_key == "socket_path" then
overrides.socket_path = value
elseif normalized_key == "texthooker" or normalized_key == "texthooker_enabled" then
local parsed = options_helper.coerce_bool(value, nil)
if parsed ~= nil then
overrides.texthooker_enabled = parsed
end
elseif normalized_key == "log-level" or normalized_key == "log_level" then
overrides.log_level = normalize_log_level(value)
end
end
end
end
return overrides
end
local function ensure_texthooker_running(callback)
if callback then
callback()
end
end
local function start_overlay(overrides)
overrides = overrides or {}
if overrides.auto_start_trigger == true then
state.suppress_ready_overlay_restore = false
end
if not binary.ensure_binary_available() then
subminer_log("error", "binary", "SubMiner binary not found")
show_osd("Error: binary not found")
return
end
if state.overlay_running then
if overrides.auto_start_trigger == true then
subminer_log("debug", "process", "Auto-start ignored because overlay is already running")
local socket_path = overrides.socket_path or opts.socket_path
if not state.auto_play_ready_gate_armed then
disarm_auto_play_ready_gate()
end
local visibility_action = resolve_visible_overlay_startup()
and "show-visible-overlay"
or "hide-visible-overlay"
run_control_command_async(visibility_action, {
socket_path = socket_path,
log_level = overrides.log_level,
})
return
end
subminer_log("info", "process", "Overlay already running")
show_osd("Already running")
return
end
local texthooker_enabled = resolve_texthooker_enabled(overrides.texthooker_enabled)
local socket_path = overrides.socket_path or opts.socket_path
local should_pause_until_ready = (
overrides.auto_start_trigger == true
and resolve_visible_overlay_startup()
and resolve_pause_until_ready()
and has_matching_mpv_ipc_socket(socket_path)
)
if should_pause_until_ready then
arm_auto_play_ready_gate()
else
disarm_auto_play_ready_gate()
end
local function launch_overlay_with_retry(attempt)
local args = build_command_args("start", overrides)
if attempt == 1 then
subminer_log("info", "process", "Starting overlay: " .. table.concat(args, " "))
else
subminer_log(
"warn",
"process",
"Retrying overlay start (attempt " .. tostring(attempt) .. "): " .. table.concat(args, " ")
)
end
if attempt == 1 and not state.auto_play_ready_gate_armed then
show_osd("Starting...")
end
state.overlay_running = true
mp.command_native_async({
name = "subprocess",
args = args,
playback_only = false,
capture_stdout = true,
capture_stderr = true,
}, function(success, result, error)
if not success or (result and result.status ~= 0) then
local reason = error or (result and result.stderr) or "unknown error"
if attempt < OVERLAY_START_MAX_ATTEMPTS then
mp.add_timeout(OVERLAY_START_RETRY_DELAY_SECONDS, function()
launch_overlay_with_retry(attempt + 1)
end)
return
end
state.overlay_running = false
subminer_log("error", "process", "Overlay start failed after retries: " .. reason)
show_osd("Overlay start failed")
release_auto_play_ready_gate("overlay-start-failed")
return
end
if overrides.auto_start_trigger == true then
local visibility_action = resolve_visible_overlay_startup()
and "show-visible-overlay"
or "hide-visible-overlay"
run_control_command_async(visibility_action, {
socket_path = socket_path,
log_level = overrides.log_level,
})
end
end)
end
launch_overlay_with_retry(1)
if texthooker_enabled then
ensure_texthooker_running(function() end)
end
end
local function start_overlay_from_script_message(...)
local overrides = parse_start_script_message_overrides(...)
start_overlay(overrides)
end
local function stop_overlay()
if not binary.ensure_binary_available() then
subminer_log("error", "binary", "SubMiner binary not found")
show_osd("Error: binary not found")
return
end
run_control_command_async("stop", nil, function(ok, result)
if ok then
subminer_log("info", "process", "Overlay stopped")
else
subminer_log(
"warn",
"process",
"Stop command returned non-zero status: " .. tostring(result and result.status or "unknown")
)
end
end)
state.overlay_running = false
state.texthooker_running = false
disarm_auto_play_ready_gate()
show_osd("Stopped")
end
local function hide_visible_overlay()
if not binary.ensure_binary_available() then
subminer_log("error", "binary", "SubMiner binary not found")
return
end
state.suppress_ready_overlay_restore = true
run_control_command_async("hide-visible-overlay", nil, function(ok, result)
if ok then
subminer_log("info", "process", "Visible overlay hidden")
else
subminer_log(
"warn",
"process",
"Hide-visible-overlay command returned non-zero status: "
.. tostring(result and result.status or "unknown")
)
end
end)
disarm_auto_play_ready_gate()
end
local function toggle_overlay()
if not binary.ensure_binary_available() then
subminer_log("error", "binary", "SubMiner binary not found")
show_osd("Error: binary not found")
return
end
state.suppress_ready_overlay_restore = true
run_control_command_async("toggle-visible-overlay", nil, function(ok)
if not ok then
subminer_log("warn", "process", "Toggle command failed")
show_osd("Toggle failed")
end
end)
end
local function toggle_primary_subtitle_bar()
if not binary.ensure_binary_available() then
subminer_log("error", "binary", "SubMiner binary not found")
show_osd("Error: binary not found")
return
end
run_control_command_async("toggle-primary-subtitle-bar", nil, function(ok)
if not ok then
subminer_log("warn", "process", "Primary subtitle bar toggle command failed")
show_osd("Primary subtitle toggle failed")
end
end)
end
local function open_options()
if not binary.ensure_binary_available() then
subminer_log("error", "binary", "SubMiner binary not found")
show_osd("Error: binary not found")
return
end
run_control_command_async("settings", nil, function(ok)
if ok then
subminer_log("info", "process", "Options window opened")
show_osd("Options opened")
else
subminer_log("warn", "process", "Failed to open options")
show_osd("Failed to open options")
end
end)
end
local function restart_overlay()
if not binary.ensure_binary_available() then
subminer_log("error", "binary", "SubMiner binary not found")
show_osd("Error: binary not found")
return
end
subminer_log("info", "process", "Restarting overlay...")
show_osd("Restarting...")
run_control_command_async("stop", nil, function()
state.overlay_running = false
state.texthooker_running = false
disarm_auto_play_ready_gate()
local start_args = build_command_args("start")
subminer_log("info", "process", "Starting overlay: " .. table.concat(start_args, " "))
state.overlay_running = true
mp.command_native_async({
name = "subprocess",
args = start_args,
playback_only = false,
capture_stdout = true,
capture_stderr = true,
}, function(success, result, error)
if not success or (result and result.status ~= 0) then
state.overlay_running = false
subminer_log(
"error",
"process",
"Overlay start failed: " .. (error or (result and result.stderr) or "unknown error")
)
show_osd("Restart failed")
else
show_osd("Restarted successfully")
end
end)
if resolve_texthooker_enabled(nil) then
ensure_texthooker_running(function() end)
end
end)
end
local function check_status()
if not binary.ensure_binary_available() then
show_osd("Status: binary not found")
return
end
local status = state.overlay_running and "running" or "stopped"
show_osd("Status: overlay is " .. status)
subminer_log("info", "process", "Status check: overlay is " .. status)
end
local function check_binary_available()
return binary.ensure_binary_available()
end
return {
build_command_args = build_command_args,
has_matching_mpv_ipc_socket = has_matching_mpv_ipc_socket,
run_control_command_async = run_control_command_async,
run_binary_command_async = run_binary_command_async,
parse_start_script_message_overrides = parse_start_script_message_overrides,
ensure_texthooker_running = ensure_texthooker_running,
start_overlay = start_overlay,
start_overlay_from_script_message = start_overlay_from_script_message,
stop_overlay = stop_overlay,
hide_visible_overlay = hide_visible_overlay,
toggle_overlay = toggle_overlay,
toggle_primary_subtitle_bar = toggle_primary_subtitle_bar,
open_options = open_options,
restart_overlay = restart_overlay,
check_status = check_status,
check_binary_available = check_binary_available,
notify_auto_play_ready = notify_auto_play_ready,
disarm_auto_play_ready_gate = disarm_auto_play_ready_gate,
}
end
return M