Preserve text block splits (for engines detecting them
This commit is contained in:
12
owocr/ocr.py
12
owocr/ocr.py
@@ -78,7 +78,7 @@ def empty_post_process(text):
|
|||||||
|
|
||||||
|
|
||||||
def post_process(text):
|
def post_process(text):
|
||||||
text = ''.join(text.split())
|
text = ' '.join([''.join(i.split()) for i in text.splitlines()])
|
||||||
text = text.replace('…', '...')
|
text = text.replace('…', '...')
|
||||||
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
|
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
|
||||||
text = jaconv.h2z(text, ascii=True, digit=True)
|
text = jaconv.h2z(text, ascii=True, digit=True)
|
||||||
@@ -224,7 +224,7 @@ class GoogleLens:
|
|||||||
if len(text) > 0:
|
if len(text) > 0:
|
||||||
lines = text[0]
|
lines = text[0]
|
||||||
for line in lines:
|
for line in lines:
|
||||||
res += line + ' '
|
res += line + '\n'
|
||||||
|
|
||||||
x = (True, res)
|
x = (True, res)
|
||||||
return x
|
return x
|
||||||
@@ -277,7 +277,7 @@ class AppleVision:
|
|||||||
res = ''
|
res = ''
|
||||||
if success[0]:
|
if success[0]:
|
||||||
for result in req.results():
|
for result in req.results():
|
||||||
res += result.text() + ' '
|
res += result.text() + '\n'
|
||||||
req.dealloc()
|
req.dealloc()
|
||||||
x = (True, res)
|
x = (True, res)
|
||||||
else:
|
else:
|
||||||
@@ -382,7 +382,7 @@ class AzureImageAnalysis:
|
|||||||
if read_result.read:
|
if read_result.read:
|
||||||
for block in read_result.read.blocks:
|
for block in read_result.read.blocks:
|
||||||
for line in block.lines:
|
for line in block.lines:
|
||||||
res += line.text + ' '
|
res += line.text + '\n'
|
||||||
else:
|
else:
|
||||||
return (False, 'Unknown error!')
|
return (False, 'Unknown error!')
|
||||||
|
|
||||||
@@ -418,7 +418,7 @@ class EasyOCR:
|
|||||||
res = ''
|
res = ''
|
||||||
read_result = self.model.readtext(self._preprocess(img), detail=0)
|
read_result = self.model.readtext(self._preprocess(img), detail=0)
|
||||||
for text in read_result:
|
for text in read_result:
|
||||||
res += text + ' '
|
res += text + '\n'
|
||||||
|
|
||||||
x = (True, res)
|
x = (True, res)
|
||||||
return x
|
return x
|
||||||
@@ -466,7 +466,7 @@ class RapidOCR:
|
|||||||
read_results, elapsed = self.model(self._preprocess(img))
|
read_results, elapsed = self.model(self._preprocess(img))
|
||||||
if read_results:
|
if read_results:
|
||||||
for read_result in read_results:
|
for read_result in read_results:
|
||||||
res += read_result[1] + ' '
|
res += read_result[1] + '\n'
|
||||||
|
|
||||||
x = (True, res)
|
x = (True, res)
|
||||||
return x
|
return x
|
||||||
|
|||||||
Reference in New Issue
Block a user