Refactor, fix spacing with some engines like OneOCR

This commit is contained in:
AuroraWright
2025-10-05 23:49:28 +02:00
parent b7e0df6c19
commit 0143a6d97c
2 changed files with 116 additions and 114 deletions

View File

@@ -85,8 +85,6 @@ try:
except:
optimized_png_encode = False
cj_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
@dataclass
class BoundingBox:
@@ -136,18 +134,6 @@ class OcrResult:
def empty_post_process(text):
return text
def post_process(text):
is_cj_text = cj_regex.search(text)
if is_cj_text:
text = ' '.join([''.join(i.split()) for i in text.splitlines()])
else:
text = ' '.join([re.sub(r'\s+', ' ', i).strip() for i in text.splitlines()])
text = text.replace('', '...')
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
if is_cj_text:
text = jaconv.h2z(text, ascii=True, digit=True)
return text
def input_to_pil_image(img):
is_path = False
if isinstance(img, Image.Image):