From a55819e90d682035ab54c12ce4e98d2af9d0ab9b Mon Sep 17 00:00:00 2001 From: sudacode Date: Sun, 29 Mar 2026 15:16:56 -0700 Subject: [PATCH] fix: strip inline subtitle markup from sidebar cues --- ...title-markup-from-subtitle-sidebar-cues.md | 60 +++++++++++++++++++ src/core/services/subtitle-cue-parser.test.ts | 27 +++++++++ src/core/services/subtitle-cue-parser.ts | 18 +++--- 3 files changed, 97 insertions(+), 8 deletions(-) create mode 100644 backlog/tasks/task-247 - Strip-inline-subtitle-markup-from-subtitle-sidebar-cues.md diff --git a/backlog/tasks/task-247 - Strip-inline-subtitle-markup-from-subtitle-sidebar-cues.md b/backlog/tasks/task-247 - Strip-inline-subtitle-markup-from-subtitle-sidebar-cues.md new file mode 100644 index 0000000..6b40532 --- /dev/null +++ b/backlog/tasks/task-247 - Strip-inline-subtitle-markup-from-subtitle-sidebar-cues.md @@ -0,0 +1,60 @@ +--- +id: TASK-247 +title: Strip inline subtitle markup from subtitle sidebar cues +status: Done +assignee: + - codex +created_date: '2026-03-29 10:01' +updated_date: '2026-03-29 10:10' +labels: [] +dependencies: [] +references: + - src/core/services/subtitle-cue-parser.ts + - src/renderer/modals/subtitle-sidebar.ts + - src/core/services/subtitle-cue-parser.test.ts +--- + +## Description + + +Subtitle sidebar should display readable subtitle text when loaded subtitle files include inline markup such as HTML-like font tags. Parsed cue text currently preserves markup, causing raw tags to appear in the sidebar instead of clean subtitle content. + + +## Acceptance Criteria + +- [x] #1 Subtitle sidebar cue text omits inline subtitle markup such as HTML-like font tags while preserving visible subtitle content. +- [x] #2 Parsed subtitle cues used by the sidebar keep timing order and expected line-break behavior after markup sanitization. +- [x] #3 Regression tests cover markup-bearing subtitle cue parsing so raw tags do not reappear in the sidebar. + + +## Implementation Plan + + +1. Add regression tests in src/core/services/subtitle-cue-parser.test.ts for subtitle cues containing HTML-like font tags, including multi-line content. +2. Verify the new parser test fails against current behavior to confirm the bug is covered. +3. Update src/core/services/subtitle-cue-parser.ts to sanitize inline subtitle markup while preserving visible text and expected newline handling. +4. Re-run focused parser tests, then run broader verification commands required for handoff as practical. +5. Update task notes/acceptance criteria based on verified results and finalize the task record. + + +## Implementation Notes + + +User approved implementation on 2026-03-29. + +Implemented parser-level subtitle cue sanitization for HTML-like tags so loaded sidebar cues render readable text while preserving cue line breaks. + +Added regression coverage for SRT and ASS cue parsing with markup. + +Verification: bun test src/core/services/subtitle-cue-parser.test.ts; bun run typecheck; bun run test:fast; bun run test:env; bun run build; bun run test:smoke:dist. + + +## Final Summary + + +Sanitized parsed subtitle cue text in src/core/services/subtitle-cue-parser.ts so HTML-like inline markup such as is removed before cues reach the subtitle sidebar. The sanitizer is shared across SRT/VTT-style parsing and ASS parsing, while existing cue timing and line-break semantics remain intact. + +Added regression tests in src/core/services/subtitle-cue-parser.test.ts covering markup-bearing SRT lines and ASS dialogue lines with \N breaks, and verified the original failure before implementing the fix. + +Tests run: bun test src/core/services/subtitle-cue-parser.test.ts; bun run typecheck; bun run test:fast; bun run test:env; bun run build; bun run test:smoke:dist. + diff --git a/src/core/services/subtitle-cue-parser.test.ts b/src/core/services/subtitle-cue-parser.test.ts index 5c59e62..d908f14 100644 --- a/src/core/services/subtitle-cue-parser.test.ts +++ b/src/core/services/subtitle-cue-parser.test.ts @@ -35,6 +35,21 @@ test('parseSrtCues handles multi-line subtitle text', () => { assert.equal(cues[0]!.text, 'これは\nテストです'); }); +test('parseSrtCues strips HTML-like markup while preserving line breaks', () => { + const content = [ + '1', + '00:01:00,000 --> 00:01:05,000', + 'これは', + 'テストです', + '', + ].join('\n'); + + const cues = parseSrtCues(content); + + assert.equal(cues.length, 1); + assert.equal(cues[0]!.text, 'これは\nテストです'); +}); + test('parseSrtCues handles hours in timestamps', () => { const content = ['1', '01:30:00,000 --> 01:30:05,000', 'テスト', ''].join('\n'); @@ -134,6 +149,18 @@ test('parseAssCues handles \\N line breaks', () => { assert.equal(cues[0]!.text, '一行目\\N二行目'); }); +test('parseAssCues strips HTML-like markup while preserving ASS line breaks', () => { + const content = [ + '[Events]', + 'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text', + 'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,一行目\\N二行目', + ].join('\n'); + + const cues = parseAssCues(content); + + assert.equal(cues[0]!.text, '一行目\\N二行目'); +}); + test('parseAssCues returns empty for content without Events section', () => { const content = ['[Script Info]', 'Title: Test'].join('\n'); diff --git a/src/core/services/subtitle-cue-parser.ts b/src/core/services/subtitle-cue-parser.ts index 760bba3..e1f0434 100644 --- a/src/core/services/subtitle-cue-parser.ts +++ b/src/core/services/subtitle-cue-parser.ts @@ -4,6 +4,8 @@ export interface SubtitleCue { text: string; } +const HTML_SUBTITLE_TAG_PATTERN = /<\/?[A-Za-z][^>\n]*>/g; + const SRT_TIMING_PATTERN = /^\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})/; @@ -21,6 +23,10 @@ function parseTimestamp( ); } +function sanitizeSubtitleCueText(text: string): string { + return text.replace(ASS_OVERRIDE_TAG_PATTERN, '').replace(HTML_SUBTITLE_TAG_PATTERN, '').trim(); +} + export function parseSrtCues(content: string): SubtitleCue[] { const cues: SubtitleCue[] = []; const lines = content.split(/\r?\n/); @@ -54,7 +60,7 @@ export function parseSrtCues(content: string): SubtitleCue[] { i += 1; } - const text = textLines.join('\n').trim(); + const text = sanitizeSubtitleCueText(textLines.join('\n')); if (text) { cues.push({ startTime, endTime, text }); } @@ -140,13 +146,9 @@ export function parseAssCues(content: string): SubtitleCue[] { continue; } - const rawText = fields - .slice(textFieldIndex) - .join(',') - .replace(ASS_OVERRIDE_TAG_PATTERN, '') - .trim(); - if (rawText) { - cues.push({ startTime, endTime, text: rawText }); + const text = sanitizeSubtitleCueText(fields.slice(textFieldIndex).join(',')); + if (text) { + cues.push({ startTime, endTime, text }); } }