mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-30 06:12:06 -07:00
fix: strip inline subtitle markup from sidebar cues
This commit is contained in:
@@ -0,0 +1,60 @@
|
||||
---
|
||||
id: TASK-247
|
||||
title: Strip inline subtitle markup from subtitle sidebar cues
|
||||
status: Done
|
||||
assignee:
|
||||
- codex
|
||||
created_date: '2026-03-29 10:01'
|
||||
updated_date: '2026-03-29 10:10'
|
||||
labels: []
|
||||
dependencies: []
|
||||
references:
|
||||
- src/core/services/subtitle-cue-parser.ts
|
||||
- src/renderer/modals/subtitle-sidebar.ts
|
||||
- src/core/services/subtitle-cue-parser.test.ts
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||
Subtitle sidebar should display readable subtitle text when loaded subtitle files include inline markup such as HTML-like font tags. Parsed cue text currently preserves markup, causing raw tags to appear in the sidebar instead of clean subtitle content.
|
||||
<!-- SECTION:DESCRIPTION:END -->
|
||||
|
||||
## Acceptance Criteria
|
||||
<!-- AC:BEGIN -->
|
||||
- [x] #1 Subtitle sidebar cue text omits inline subtitle markup such as HTML-like font tags while preserving visible subtitle content.
|
||||
- [x] #2 Parsed subtitle cues used by the sidebar keep timing order and expected line-break behavior after markup sanitization.
|
||||
- [x] #3 Regression tests cover markup-bearing subtitle cue parsing so raw tags do not reappear in the sidebar.
|
||||
<!-- AC:END -->
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
<!-- SECTION:PLAN:BEGIN -->
|
||||
1. Add regression tests in src/core/services/subtitle-cue-parser.test.ts for subtitle cues containing HTML-like font tags, including multi-line content.
|
||||
2. Verify the new parser test fails against current behavior to confirm the bug is covered.
|
||||
3. Update src/core/services/subtitle-cue-parser.ts to sanitize inline subtitle markup while preserving visible text and expected newline handling.
|
||||
4. Re-run focused parser tests, then run broader verification commands required for handoff as practical.
|
||||
5. Update task notes/acceptance criteria based on verified results and finalize the task record.
|
||||
<!-- SECTION:PLAN:END -->
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
<!-- SECTION:NOTES:BEGIN -->
|
||||
User approved implementation on 2026-03-29.
|
||||
|
||||
Implemented parser-level subtitle cue sanitization for HTML-like tags so loaded sidebar cues render readable text while preserving cue line breaks.
|
||||
|
||||
Added regression coverage for SRT and ASS cue parsing with <font ...> markup.
|
||||
|
||||
Verification: bun test src/core/services/subtitle-cue-parser.test.ts; bun run typecheck; bun run test:fast; bun run test:env; bun run build; bun run test:smoke:dist.
|
||||
<!-- SECTION:NOTES:END -->
|
||||
|
||||
## Final Summary
|
||||
|
||||
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
|
||||
Sanitized parsed subtitle cue text in src/core/services/subtitle-cue-parser.ts so HTML-like inline markup such as <font ...> is removed before cues reach the subtitle sidebar. The sanitizer is shared across SRT/VTT-style parsing and ASS parsing, while existing cue timing and line-break semantics remain intact.
|
||||
|
||||
Added regression tests in src/core/services/subtitle-cue-parser.test.ts covering markup-bearing SRT lines and ASS dialogue lines with \N breaks, and verified the original failure before implementing the fix.
|
||||
|
||||
Tests run: bun test src/core/services/subtitle-cue-parser.test.ts; bun run typecheck; bun run test:fast; bun run test:env; bun run build; bun run test:smoke:dist.
|
||||
<!-- SECTION:FINAL_SUMMARY:END -->
|
||||
@@ -35,6 +35,21 @@ test('parseSrtCues handles multi-line subtitle text', () => {
|
||||
assert.equal(cues[0]!.text, 'これは\nテストです');
|
||||
});
|
||||
|
||||
test('parseSrtCues strips HTML-like markup while preserving line breaks', () => {
|
||||
const content = [
|
||||
'1',
|
||||
'00:01:00,000 --> 00:01:05,000',
|
||||
'<font color="japanese">これは</font>',
|
||||
'<font color="japanese">テストです</font>',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseSrtCues(content);
|
||||
|
||||
assert.equal(cues.length, 1);
|
||||
assert.equal(cues[0]!.text, 'これは\nテストです');
|
||||
});
|
||||
|
||||
test('parseSrtCues handles hours in timestamps', () => {
|
||||
const content = ['1', '01:30:00,000 --> 01:30:05,000', 'テスト', ''].join('\n');
|
||||
|
||||
@@ -134,6 +149,18 @@ test('parseAssCues handles \\N line breaks', () => {
|
||||
assert.equal(cues[0]!.text, '一行目\\N二行目');
|
||||
});
|
||||
|
||||
test('parseAssCues strips HTML-like markup while preserving ASS line breaks', () => {
|
||||
const content = [
|
||||
'[Events]',
|
||||
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,<font color="japanese">一行目</font>\\N<font color="japanese">二行目</font>',
|
||||
].join('\n');
|
||||
|
||||
const cues = parseAssCues(content);
|
||||
|
||||
assert.equal(cues[0]!.text, '一行目\\N二行目');
|
||||
});
|
||||
|
||||
test('parseAssCues returns empty for content without Events section', () => {
|
||||
const content = ['[Script Info]', 'Title: Test'].join('\n');
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ export interface SubtitleCue {
|
||||
text: string;
|
||||
}
|
||||
|
||||
const HTML_SUBTITLE_TAG_PATTERN = /<\/?[A-Za-z][^>\n]*>/g;
|
||||
|
||||
const SRT_TIMING_PATTERN =
|
||||
/^\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})/;
|
||||
|
||||
@@ -21,6 +23,10 @@ function parseTimestamp(
|
||||
);
|
||||
}
|
||||
|
||||
function sanitizeSubtitleCueText(text: string): string {
|
||||
return text.replace(ASS_OVERRIDE_TAG_PATTERN, '').replace(HTML_SUBTITLE_TAG_PATTERN, '').trim();
|
||||
}
|
||||
|
||||
export function parseSrtCues(content: string): SubtitleCue[] {
|
||||
const cues: SubtitleCue[] = [];
|
||||
const lines = content.split(/\r?\n/);
|
||||
@@ -54,7 +60,7 @@ export function parseSrtCues(content: string): SubtitleCue[] {
|
||||
i += 1;
|
||||
}
|
||||
|
||||
const text = textLines.join('\n').trim();
|
||||
const text = sanitizeSubtitleCueText(textLines.join('\n'));
|
||||
if (text) {
|
||||
cues.push({ startTime, endTime, text });
|
||||
}
|
||||
@@ -140,13 +146,9 @@ export function parseAssCues(content: string): SubtitleCue[] {
|
||||
continue;
|
||||
}
|
||||
|
||||
const rawText = fields
|
||||
.slice(textFieldIndex)
|
||||
.join(',')
|
||||
.replace(ASS_OVERRIDE_TAG_PATTERN, '')
|
||||
.trim();
|
||||
if (rawText) {
|
||||
cues.push({ startTime, endTime, text: rawText });
|
||||
const text = sanitizeSubtitleCueText(fields.slice(textFieldIndex).join(','));
|
||||
if (text) {
|
||||
cues.push({ startTime, endTime, text });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user