Filter out repeated blocks of text when reading with screen capture

This commit is contained in:
AuroraWright
2024-01-30 19:22:13 +01:00
parent e1caf06134
commit e097bce606
2 changed files with 29 additions and 13 deletions

View File

@@ -67,12 +67,15 @@ except ImportError:
pass pass
def empty_post_process(text):
return text
def post_process(text): def post_process(text):
text = ''.join(text.split()) text = ''.join(text.split())
text = text.replace('', '...') text = text.replace('', '...')
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text) text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
text = jaconv.h2z(text, ascii=True, digit=True) text = jaconv.h2z(text, ascii=True, digit=True)
return text return text
@@ -87,6 +90,8 @@ class MangaOcr:
logger.warning('manga-ocr not available, Manga OCR will not work!') logger.warning('manga-ocr not available, Manga OCR will not work!')
else: else:
logger.disable('manga_ocr') logger.disable('manga_ocr')
from manga_ocr import ocr
ocr.post_process = empty_post_process
logger.info(f'Loading Manga OCR model') logger.info(f'Loading Manga OCR model')
self.model = MOCR(config['pretrained_model_name_or_path'], config['force_cpu']) self.model = MOCR(config['pretrained_model_name_or_path'], config['force_cpu'])
self.available = True self.available = True
@@ -140,7 +145,7 @@ class GoogleVision:
except: except:
return (False, 'Unknown error!') return (False, 'Unknown error!')
texts = response.text_annotations texts = response.text_annotations
x = (True, post_process(texts[0].description)) x = (True, texts[0].description)
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -200,7 +205,7 @@ class GoogleLens:
for line in lines: for line in lines:
res += line + ' ' res += line + ' '
x = (True, post_process(res)) x = (True, res)
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -255,7 +260,7 @@ class AppleVision:
for result in req.results(): for result in req.results():
res += result.text() + ' ' res += result.text() + ' '
req.dealloc() req.dealloc()
x = (True, post_process(res)) x = (True, res)
else: else:
x = (False, 'Unknown error!') x = (False, 'Unknown error!')
@@ -317,7 +322,7 @@ class WinRTOCR:
res = json.loads(res.text)['text'] res = json.loads(res.text)['text']
x = (True, post_process(res)) x = (True, res)
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -378,7 +383,7 @@ class AzureComputerVision:
else: else:
return (False, 'Unknown error!') return (False, 'Unknown error!')
x = (True, post_process(res)) x = (True, res)
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -415,7 +420,7 @@ class EasyOCR:
for text in read_result: for text in read_result:
res += text + ' ' res += text + ' '
x = (True, post_process(res)) x = (True, res)
return x return x
def _preprocess(self, img): def _preprocess(self, img):
@@ -463,7 +468,7 @@ class RapidOCR:
for read_result in read_results: for read_result in read_results:
res += read_result[1] + ' ' res += read_result[1] + ' '
x = (True, post_process(res)) x = (True, res)
return x return x
def _preprocess(self, img): def _preprocess(self, img):

View File

@@ -13,6 +13,7 @@ import asyncio
import websockets import websockets
import queue import queue
import io import io
import re
from PIL import Image from PIL import Image
from PIL import UnidentifiedImageError from PIL import UnidentifiedImageError
@@ -249,13 +250,17 @@ def are_images_identical(img1, img2):
return (img1.shape == img2.shape) and (img1 == img2).all() return (img1.shape == img2.shape) and (img1 == img2).all()
def process_and_write_results(engine_instance, img_or_path, write_to): def process_and_write_results(engine_instance, img_or_path, write_to, last_text):
t0 = time.time() t0 = time.time()
res, text = engine_instance(img_or_path) res, text = engine_instance(img_or_path)
t1 = time.time() t1 = time.time()
engine_color = config.get_general('engine_color') engine_color = config.get_general('engine_color')
if res: if res:
orig_text = text
if last_text != '':
text = ''.join(map(str, [block for block in re.split(r'[  ,!?.、。?!"「」\']', text) if block not in re.split(r'[  ,!?.、。?!"「」\']', last_text)]))
text = post_process(text)
logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}') logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
if config.get_general('notifications'): if config.get_general('notifications'):
notification = Notify() notification = Notify()
@@ -264,6 +269,7 @@ def process_and_write_results(engine_instance, img_or_path, write_to):
notification.message = text notification.message = text
notification.send(block=False) notification.send(block=False)
else: else:
orig_text = ''
logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {t1 - t0:0.03f}s: {text}') logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {t1 - t0:0.03f}s: {text}')
if write_to == 'websocket': if write_to == 'websocket':
@@ -278,6 +284,8 @@ def process_and_write_results(engine_instance, img_or_path, write_to):
with write_to.open('a', encoding='utf-8') as f: with write_to.open('a', encoding='utf-8') as f:
f.write(text + '\n') f.write(text + '\n')
return (res, orig_text)
def get_path_key(path): def get_path_key(path):
return path, path.lstat().st_mtime return path, path.lstat().st_mtime
@@ -422,6 +430,7 @@ def run(read_from=None,
screencapture_window_mode = False screencapture_window_mode = False
screencapture_window_active = True screencapture_window_active = True
screencapture_window_visible = True screencapture_window_visible = True
last_text = ''
sct = mss.mss() sct = mss.mss()
mon = sct.monitors mon = sct.monitors
if len(mon) <= screen_capture_monitor: if len(mon) <= screen_capture_monitor:
@@ -501,7 +510,7 @@ def run(read_from=None,
else: else:
if not paused and not tmp_paused: if not paused and not tmp_paused:
img = Image.open(io.BytesIO(item)) img = Image.open(io.BytesIO(item))
process_and_write_results(engine_instances[engine_index], img, write_to) process_and_write_results(engine_instances[engine_index], img, write_to, '')
elif read_from == 'clipboard': elif read_from == 'clipboard':
if windows_clipboard_polling: if windows_clipboard_polling:
clipboard_changed = clipboard_event.wait(delay_secs) clipboard_changed = clipboard_event.wait(delay_secs)
@@ -528,7 +537,7 @@ def run(read_from=None,
isinstance(img, Image.Image) and \ isinstance(img, Image.Image) and \
(ignore_flag or pyperclipfix.paste() != '*ocr_ignore*') and \ (ignore_flag or pyperclipfix.paste() != '*ocr_ignore*') and \
((not generic_clipboard_polling) or (not are_images_identical(img, old_img))): ((not generic_clipboard_polling) or (not are_images_identical(img, old_img))):
process_and_write_results(engine_instances[engine_index], img, write_to) process_and_write_results(engine_instances[engine_index], img, write_to, '')
just_unpaused = False just_unpaused = False
@@ -545,7 +554,9 @@ def run(read_from=None,
if take_screenshot and screencapture_window_visible: if take_screenshot and screencapture_window_visible:
sct_img = sct.grab(sct_params) sct_img = sct.grab(sct_params)
img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX') img = Image.frombytes('RGB', sct_img.size, sct_img.bgra, 'raw', 'BGRX')
process_and_write_results(engine_instances[engine_index], img, write_to) res = process_and_write_results(engine_instances[engine_index], img, write_to, last_text)
if res[0] and res[1] != '':
last_text = res[1]
delay = screen_capture_delay_secs delay = screen_capture_delay_secs
else: else:
delay = delay_secs delay = delay_secs
@@ -566,7 +577,7 @@ def run(read_from=None,
except (UnidentifiedImageError, OSError) as e: except (UnidentifiedImageError, OSError) as e:
logger.warning(f'Error while reading file {path}: {e}') logger.warning(f'Error while reading file {path}: {e}')
else: else:
process_and_write_results(engine_instances[engine_index], img, write_to) process_and_write_results(engine_instances[engine_index], img, write_to, '')
img.close() img.close()
if delete_images: if delete_images:
Path.unlink(path) Path.unlink(path)