mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-30 06:12:06 -07:00
fix: strip inline subtitle markup from sidebar cues
This commit is contained in:
@@ -0,0 +1,60 @@
|
|||||||
|
---
|
||||||
|
id: TASK-247
|
||||||
|
title: Strip inline subtitle markup from subtitle sidebar cues
|
||||||
|
status: Done
|
||||||
|
assignee:
|
||||||
|
- codex
|
||||||
|
created_date: '2026-03-29 10:01'
|
||||||
|
updated_date: '2026-03-29 10:10'
|
||||||
|
labels: []
|
||||||
|
dependencies: []
|
||||||
|
references:
|
||||||
|
- src/core/services/subtitle-cue-parser.ts
|
||||||
|
- src/renderer/modals/subtitle-sidebar.ts
|
||||||
|
- src/core/services/subtitle-cue-parser.test.ts
|
||||||
|
---
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
<!-- SECTION:DESCRIPTION:BEGIN -->
|
||||||
|
Subtitle sidebar should display readable subtitle text when loaded subtitle files include inline markup such as HTML-like font tags. Parsed cue text currently preserves markup, causing raw tags to appear in the sidebar instead of clean subtitle content.
|
||||||
|
<!-- SECTION:DESCRIPTION:END -->
|
||||||
|
|
||||||
|
## Acceptance Criteria
|
||||||
|
<!-- AC:BEGIN -->
|
||||||
|
- [x] #1 Subtitle sidebar cue text omits inline subtitle markup such as HTML-like font tags while preserving visible subtitle content.
|
||||||
|
- [x] #2 Parsed subtitle cues used by the sidebar keep timing order and expected line-break behavior after markup sanitization.
|
||||||
|
- [x] #3 Regression tests cover markup-bearing subtitle cue parsing so raw tags do not reappear in the sidebar.
|
||||||
|
<!-- AC:END -->
|
||||||
|
|
||||||
|
## Implementation Plan
|
||||||
|
|
||||||
|
<!-- SECTION:PLAN:BEGIN -->
|
||||||
|
1. Add regression tests in src/core/services/subtitle-cue-parser.test.ts for subtitle cues containing HTML-like font tags, including multi-line content.
|
||||||
|
2. Verify the new parser test fails against current behavior to confirm the bug is covered.
|
||||||
|
3. Update src/core/services/subtitle-cue-parser.ts to sanitize inline subtitle markup while preserving visible text and expected newline handling.
|
||||||
|
4. Re-run focused parser tests, then run broader verification commands required for handoff as practical.
|
||||||
|
5. Update task notes/acceptance criteria based on verified results and finalize the task record.
|
||||||
|
<!-- SECTION:PLAN:END -->
|
||||||
|
|
||||||
|
## Implementation Notes
|
||||||
|
|
||||||
|
<!-- SECTION:NOTES:BEGIN -->
|
||||||
|
User approved implementation on 2026-03-29.
|
||||||
|
|
||||||
|
Implemented parser-level subtitle cue sanitization for HTML-like tags so loaded sidebar cues render readable text while preserving cue line breaks.
|
||||||
|
|
||||||
|
Added regression coverage for SRT and ASS cue parsing with <font ...> markup.
|
||||||
|
|
||||||
|
Verification: bun test src/core/services/subtitle-cue-parser.test.ts; bun run typecheck; bun run test:fast; bun run test:env; bun run build; bun run test:smoke:dist.
|
||||||
|
<!-- SECTION:NOTES:END -->
|
||||||
|
|
||||||
|
## Final Summary
|
||||||
|
|
||||||
|
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
|
||||||
|
Sanitized parsed subtitle cue text in src/core/services/subtitle-cue-parser.ts so HTML-like inline markup such as <font ...> is removed before cues reach the subtitle sidebar. The sanitizer is shared across SRT/VTT-style parsing and ASS parsing, while existing cue timing and line-break semantics remain intact.
|
||||||
|
|
||||||
|
Added regression tests in src/core/services/subtitle-cue-parser.test.ts covering markup-bearing SRT lines and ASS dialogue lines with \N breaks, and verified the original failure before implementing the fix.
|
||||||
|
|
||||||
|
Tests run: bun test src/core/services/subtitle-cue-parser.test.ts; bun run typecheck; bun run test:fast; bun run test:env; bun run build; bun run test:smoke:dist.
|
||||||
|
<!-- SECTION:FINAL_SUMMARY:END -->
|
||||||
@@ -35,6 +35,21 @@ test('parseSrtCues handles multi-line subtitle text', () => {
|
|||||||
assert.equal(cues[0]!.text, 'これは\nテストです');
|
assert.equal(cues[0]!.text, 'これは\nテストです');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('parseSrtCues strips HTML-like markup while preserving line breaks', () => {
|
||||||
|
const content = [
|
||||||
|
'1',
|
||||||
|
'00:01:00,000 --> 00:01:05,000',
|
||||||
|
'<font color="japanese">これは</font>',
|
||||||
|
'<font color="japanese">テストです</font>',
|
||||||
|
'',
|
||||||
|
].join('\n');
|
||||||
|
|
||||||
|
const cues = parseSrtCues(content);
|
||||||
|
|
||||||
|
assert.equal(cues.length, 1);
|
||||||
|
assert.equal(cues[0]!.text, 'これは\nテストです');
|
||||||
|
});
|
||||||
|
|
||||||
test('parseSrtCues handles hours in timestamps', () => {
|
test('parseSrtCues handles hours in timestamps', () => {
|
||||||
const content = ['1', '01:30:00,000 --> 01:30:05,000', 'テスト', ''].join('\n');
|
const content = ['1', '01:30:00,000 --> 01:30:05,000', 'テスト', ''].join('\n');
|
||||||
|
|
||||||
@@ -134,6 +149,18 @@ test('parseAssCues handles \\N line breaks', () => {
|
|||||||
assert.equal(cues[0]!.text, '一行目\\N二行目');
|
assert.equal(cues[0]!.text, '一行目\\N二行目');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('parseAssCues strips HTML-like markup while preserving ASS line breaks', () => {
|
||||||
|
const content = [
|
||||||
|
'[Events]',
|
||||||
|
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
||||||
|
'Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,<font color="japanese">一行目</font>\\N<font color="japanese">二行目</font>',
|
||||||
|
].join('\n');
|
||||||
|
|
||||||
|
const cues = parseAssCues(content);
|
||||||
|
|
||||||
|
assert.equal(cues[0]!.text, '一行目\\N二行目');
|
||||||
|
});
|
||||||
|
|
||||||
test('parseAssCues returns empty for content without Events section', () => {
|
test('parseAssCues returns empty for content without Events section', () => {
|
||||||
const content = ['[Script Info]', 'Title: Test'].join('\n');
|
const content = ['[Script Info]', 'Title: Test'].join('\n');
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ export interface SubtitleCue {
|
|||||||
text: string;
|
text: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const HTML_SUBTITLE_TAG_PATTERN = /<\/?[A-Za-z][^>\n]*>/g;
|
||||||
|
|
||||||
const SRT_TIMING_PATTERN =
|
const SRT_TIMING_PATTERN =
|
||||||
/^\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})/;
|
/^\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*(?:(\d{1,2}):)?(\d{2}):(\d{2})[,.](\d{1,3})/;
|
||||||
|
|
||||||
@@ -21,6 +23,10 @@ function parseTimestamp(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function sanitizeSubtitleCueText(text: string): string {
|
||||||
|
return text.replace(ASS_OVERRIDE_TAG_PATTERN, '').replace(HTML_SUBTITLE_TAG_PATTERN, '').trim();
|
||||||
|
}
|
||||||
|
|
||||||
export function parseSrtCues(content: string): SubtitleCue[] {
|
export function parseSrtCues(content: string): SubtitleCue[] {
|
||||||
const cues: SubtitleCue[] = [];
|
const cues: SubtitleCue[] = [];
|
||||||
const lines = content.split(/\r?\n/);
|
const lines = content.split(/\r?\n/);
|
||||||
@@ -54,7 +60,7 @@ export function parseSrtCues(content: string): SubtitleCue[] {
|
|||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
const text = textLines.join('\n').trim();
|
const text = sanitizeSubtitleCueText(textLines.join('\n'));
|
||||||
if (text) {
|
if (text) {
|
||||||
cues.push({ startTime, endTime, text });
|
cues.push({ startTime, endTime, text });
|
||||||
}
|
}
|
||||||
@@ -140,13 +146,9 @@ export function parseAssCues(content: string): SubtitleCue[] {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const rawText = fields
|
const text = sanitizeSubtitleCueText(fields.slice(textFieldIndex).join(','));
|
||||||
.slice(textFieldIndex)
|
if (text) {
|
||||||
.join(',')
|
cues.push({ startTime, endTime, text });
|
||||||
.replace(ASS_OVERRIDE_TAG_PATTERN, '')
|
|
||||||
.trim();
|
|
||||||
if (rawText) {
|
|
||||||
cues.push({ startTime, endTime, text: rawText });
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user