fix: strip inline subtitle markup from sidebar cues

This commit is contained in:
2026-03-29 15:16:56 -07:00
parent a0b0b9f972
commit a55819e90d
3 changed files with 97 additions and 8 deletions

View File

@@ -0,0 +1,60 @@
---
id: TASK-247
title: Strip inline subtitle markup from subtitle sidebar cues
status: Done
assignee:
- codex
created_date: '2026-03-29 10:01'
updated_date: '2026-03-29 10:10'
labels: []
dependencies: []
references:
- src/core/services/subtitle-cue-parser.ts
- src/renderer/modals/subtitle-sidebar.ts
- src/core/services/subtitle-cue-parser.test.ts
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Subtitle sidebar should display readable subtitle text when loaded subtitle files include inline markup such as HTML-like font tags. Parsed cue text currently preserves markup, causing raw tags to appear in the sidebar instead of clean subtitle content.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [x] #1 Subtitle sidebar cue text omits inline subtitle markup such as HTML-like font tags while preserving visible subtitle content.
- [x] #2 Parsed subtitle cues used by the sidebar keep timing order and expected line-break behavior after markup sanitization.
- [x] #3 Regression tests cover markup-bearing subtitle cue parsing so raw tags do not reappear in the sidebar.
<!-- AC:END -->
## Implementation Plan
<!-- SECTION:PLAN:BEGIN -->
1. Add regression tests in src/core/services/subtitle-cue-parser.test.ts for subtitle cues containing HTML-like font tags, including multi-line content.
2. Verify the new parser test fails against current behavior to confirm the bug is covered.
3. Update src/core/services/subtitle-cue-parser.ts to sanitize inline subtitle markup while preserving visible text and expected newline handling.
4. Re-run focused parser tests, then run broader verification commands required for handoff as practical.
5. Update task notes/acceptance criteria based on verified results and finalize the task record.
<!-- SECTION:PLAN:END -->
## Implementation Notes
<!-- SECTION:NOTES:BEGIN -->
User approved implementation on 2026-03-29.
Implemented parser-level subtitle cue sanitization for HTML-like tags so loaded sidebar cues render readable text while preserving cue line breaks.
Added regression coverage for SRT and ASS cue parsing with <font ...> markup.
Verification: bun test src/core/services/subtitle-cue-parser.test.ts; bun run typecheck; bun run test:fast; bun run test:env; bun run build; bun run test:smoke:dist.
<!-- SECTION:NOTES:END -->
## Final Summary
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
Sanitized parsed subtitle cue text in src/core/services/subtitle-cue-parser.ts so HTML-like inline markup such as <font ...> is removed before cues reach the subtitle sidebar. The sanitizer is shared across SRT/VTT-style parsing and ASS parsing, while existing cue timing and line-break semantics remain intact.
Added regression tests in src/core/services/subtitle-cue-parser.test.ts covering markup-bearing SRT lines and ASS dialogue lines with \N breaks, and verified the original failure before implementing the fix.
Tests run: bun test src/core/services/subtitle-cue-parser.test.ts; bun run typecheck; bun run test:fast; bun run test:env; bun run build; bun run test:smoke:dist.
<!-- SECTION:FINAL_SUMMARY:END -->

View File

@@ -35,6 +35,21 @@ test('parseSrtCues handles multi-line subtitle text', () => {
assert.equal(cues[0]!.text, 'これは\nテストです');
});
test('parseSrtCues strips HTML-like markup while preserving line breaks', () => {
const content = [
'1',
'00:01:00,000 --> 00:01:05,000',
'<font color="japanese">これは</font>',
'<font color="japanese">テストです</font>',
'',
].join('\n');
const cues = parseSrtCues(content);
assert.equal(cues.length, 1);
assert.equal(cues[0]!.text, 'これは\nテストです');
});
test('parseSrtCues handles hours in timestamps', () => {
const content = ['1', '01:30:00,000 --> 01:30:05,000', 'テスト', ''].join('\n');
@@ -134,6 +149,18 @@ test('parseAssCues handles \\N line breaks', () => {
assert.equal(cues[0]!.text, '一行目\\N二行目');
});
test('parseAssCues strips HTML-like markup while preserving ASS line breaks', () => {
const content = [
'[Events]',
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,<font color="japanese">一行目</font>\\N<font color="japanese">二行目</font>',
].join('\n');
const cues = parseAssCues(content);
assert.equal(cues[0]!.text, '一行目\\N二行目');
});
test('parseAssCues returns empty for content without Events section', () => {
const content = ['[Script Info]', 'Title: Test'].join('\n');

View File

@@ -4,6 +4,8 @@ export interface SubtitleCue {
text: string;
}
const HTML_SUBTITLE_TAG_PATTERN = /<\/?[A-Za-z][^>\n]*>/g;
const SRT_TIMING_PATTERN =
/^\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})/;
@@ -21,6 +23,10 @@ function parseTimestamp(
);
}
function sanitizeSubtitleCueText(text: string): string {
return text.replace(ASS_OVERRIDE_TAG_PATTERN, '').replace(HTML_SUBTITLE_TAG_PATTERN, '').trim();
}
export function parseSrtCues(content: string): SubtitleCue[] {
const cues: SubtitleCue[] = [];
const lines = content.split(/\r?\n/);
@@ -54,7 +60,7 @@ export function parseSrtCues(content: string): SubtitleCue[] {
i += 1;
}
const text = textLines.join('\n').trim();
const text = sanitizeSubtitleCueText(textLines.join('\n'));
if (text) {
cues.push({ startTime, endTime, text });
}
@@ -140,13 +146,9 @@ export function parseAssCues(content: string): SubtitleCue[] {
continue;
}
const rawText = fields
.slice(textFieldIndex)
.join(',')
.replace(ASS_OVERRIDE_TAG_PATTERN, '')
.trim();
if (rawText) {
cues.push({ startTime, endTime, text: rawText });
const text = sanitizeSubtitleCueText(fields.slice(textFieldIndex).join(','));
if (text) {
cues.push({ startTime, endTime, text });
}
}