Refactor, fix spacing with some engines like OneOCR
This commit is contained in:
14
owocr/ocr.py
14
owocr/ocr.py
@@ -85,8 +85,6 @@ try:
|
|||||||
except:
|
except:
|
||||||
optimized_png_encode = False
|
optimized_png_encode = False
|
||||||
|
|
||||||
cj_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BoundingBox:
|
class BoundingBox:
|
||||||
@@ -136,18 +134,6 @@ class OcrResult:
|
|||||||
def empty_post_process(text):
|
def empty_post_process(text):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def post_process(text):
|
|
||||||
is_cj_text = cj_regex.search(text)
|
|
||||||
if is_cj_text:
|
|
||||||
text = ' '.join([''.join(i.split()) for i in text.splitlines()])
|
|
||||||
else:
|
|
||||||
text = ' '.join([re.sub(r'\s+', ' ', i).strip() for i in text.splitlines()])
|
|
||||||
text = text.replace('…', '...')
|
|
||||||
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
|
|
||||||
if is_cj_text:
|
|
||||||
text = jaconv.h2z(text, ascii=True, digit=True)
|
|
||||||
return text
|
|
||||||
|
|
||||||
def input_to_pil_image(img):
|
def input_to_pil_image(img):
|
||||||
is_path = False
|
is_path = False
|
||||||
if isinstance(img, Image.Image):
|
if isinstance(img, Image.Image):
|
||||||
|
|||||||
216
owocr/run.py
216
owocr/run.py
@@ -307,6 +307,7 @@ class TextFiltering:
|
|||||||
self.language = config.get_general('language')
|
self.language = config.get_general('language')
|
||||||
self.segmenter = Segmenter(language=self.language, clean=True)
|
self.segmenter = Segmenter(language=self.language, clean=True)
|
||||||
self.regex = self.get_regex()
|
self.regex = self.get_regex()
|
||||||
|
self.last_result = ([], engine_index)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from transformers import pipeline, AutoTokenizer
|
from transformers import pipeline, AutoTokenizer
|
||||||
@@ -353,7 +354,7 @@ class TextFiltering:
|
|||||||
return re.compile(
|
return re.compile(
|
||||||
r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
|
r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
|
||||||
|
|
||||||
def __call__(self, text, last_result):
|
def __call__(self, text):
|
||||||
orig_text = self.segmenter.segment(text)
|
orig_text = self.segmenter.segment(text)
|
||||||
orig_text_filtered = []
|
orig_text_filtered = []
|
||||||
for block in orig_text:
|
for block in orig_text:
|
||||||
@@ -364,8 +365,8 @@ class TextFiltering:
|
|||||||
else:
|
else:
|
||||||
orig_text_filtered.append(None)
|
orig_text_filtered.append(None)
|
||||||
|
|
||||||
if last_result[1] == engine_index:
|
if self.last_result[1] == engine_index:
|
||||||
last_text = last_result[0]
|
last_text = self.last_result[0]
|
||||||
else:
|
else:
|
||||||
last_text = []
|
last_text = []
|
||||||
|
|
||||||
@@ -389,7 +390,9 @@ class TextFiltering:
|
|||||||
final_blocks.append(block)
|
final_blocks.append(block)
|
||||||
|
|
||||||
text = '\n'.join(final_blocks)
|
text = '\n'.join(final_blocks)
|
||||||
return text, orig_text_filtered
|
|
||||||
|
self.last_result = (orig_text_filtered, engine_index)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
class ScreenshotThread(threading.Thread):
|
class ScreenshotThread(threading.Thread):
|
||||||
@@ -717,6 +720,107 @@ class AutopauseTimer:
|
|||||||
pause_handler(True)
|
pause_handler(True)
|
||||||
|
|
||||||
|
|
||||||
|
class OutputResult:
|
||||||
|
def __init__(self, init_filtering):
|
||||||
|
self.filtering = TextFiltering() if init_filtering else None
|
||||||
|
self.cj_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
|
||||||
|
|
||||||
|
def _coordinate_format_to_string(self, result_data):
|
||||||
|
full_text_parts = []
|
||||||
|
for p in result_data.paragraphs:
|
||||||
|
for l in p.lines:
|
||||||
|
for w in l.words:
|
||||||
|
full_text_parts.append(w.text)
|
||||||
|
if w.separator != None:
|
||||||
|
full_text_parts.append(w.separator)
|
||||||
|
else:
|
||||||
|
full_text_parts.append(' ')
|
||||||
|
full_text_parts.append('\n')
|
||||||
|
return "".join(full_text_parts)
|
||||||
|
|
||||||
|
def _post_process(self, text):
|
||||||
|
is_cj_text = self.cj_regex.search(text)
|
||||||
|
if is_cj_text:
|
||||||
|
text = ' '.join([''.join(i.split()) for i in text.splitlines()])
|
||||||
|
else:
|
||||||
|
text = ' '.join([re.sub(r'\s+', ' ', i).strip() for i in text.splitlines()])
|
||||||
|
text = text.replace('…', '...')
|
||||||
|
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
|
||||||
|
if is_cj_text:
|
||||||
|
text = jaconv.h2z(text, ascii=True, digit=True)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def __call__(self, img_or_path, filter_text, notify):
|
||||||
|
if auto_pause_handler and not filter_text:
|
||||||
|
auto_pause_handler.stop()
|
||||||
|
|
||||||
|
engine_instance = engine_instances[engine_index]
|
||||||
|
start_time = time.time()
|
||||||
|
res, result_data = engine_instance(img_or_path)
|
||||||
|
end_time = time.time()
|
||||||
|
|
||||||
|
orig_text = []
|
||||||
|
engine_color = config.get_general('engine_color')
|
||||||
|
if not res:
|
||||||
|
logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {result_data}')
|
||||||
|
return orig_text
|
||||||
|
|
||||||
|
output_format = config.get_general('output_format')
|
||||||
|
verbosity = config.get_general('verbosity')
|
||||||
|
output_string = ''
|
||||||
|
log_message = ''
|
||||||
|
result_data_text = None
|
||||||
|
|
||||||
|
# Check if the engine returned a structured OcrResult object
|
||||||
|
if isinstance(result_data, OcrResult):
|
||||||
|
unprocessed_text = self._coordinate_format_to_string(result_data)
|
||||||
|
|
||||||
|
if output_format == 'json':
|
||||||
|
result_dict = asdict(result_data)
|
||||||
|
output_string = json.dumps(result_dict, ensure_ascii=False)
|
||||||
|
log_message = self._post_process(unprocessed_text)
|
||||||
|
else:
|
||||||
|
result_data_text = unprocessed_text
|
||||||
|
else:
|
||||||
|
result_data_text = result_data
|
||||||
|
|
||||||
|
if result_data_text:
|
||||||
|
if output_format == 'json':
|
||||||
|
logger.warning(f"Engine '{engine_instance.name}' does not support JSON output. Falling back to text.")
|
||||||
|
if filter_text:
|
||||||
|
text_to_process = self.filtering(result_data_text)
|
||||||
|
output_string = self._post_process(text_to_process)
|
||||||
|
else:
|
||||||
|
output_string = self._post_process(result_data_text)
|
||||||
|
log_message = output_string
|
||||||
|
|
||||||
|
if verbosity != 0:
|
||||||
|
if verbosity < -1:
|
||||||
|
log_message_terminal = ': ' + log_message
|
||||||
|
elif verbosity == -1:
|
||||||
|
log_message_terminal = ''
|
||||||
|
else:
|
||||||
|
log_message_terminal = ': ' + (log_message if len(log_message) <= verbosity else log_message[:verbosity] + '[...]')
|
||||||
|
|
||||||
|
logger.opt(ansi=True).info(f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>{log_message_terminal}')
|
||||||
|
|
||||||
|
if notify and config.get_general('notifications'):
|
||||||
|
notifier.send(title='owocr', message='Text recognized: ' + log_message, urgency=get_notification_urgency())
|
||||||
|
|
||||||
|
# Write the final formatted string to the destination
|
||||||
|
write_to = config.get_general('write_to')
|
||||||
|
if write_to == 'websocket':
|
||||||
|
websocket_server_thread.send_text(output_string)
|
||||||
|
elif write_to == 'clipboard':
|
||||||
|
pyperclipfix.copy(output_string)
|
||||||
|
else:
|
||||||
|
with Path(write_to).open('a', encoding='utf-8') as f:
|
||||||
|
f.write(output_string + '\n')
|
||||||
|
|
||||||
|
if auto_pause_handler and not paused and not filter_text:
|
||||||
|
auto_pause_handler.start()
|
||||||
|
|
||||||
|
|
||||||
def get_notification_urgency():
|
def get_notification_urgency():
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
return Urgency.Low
|
return Urgency.Low
|
||||||
@@ -809,90 +913,6 @@ def on_screenshot_combo():
|
|||||||
screenshot_event.set()
|
screenshot_event.set()
|
||||||
|
|
||||||
|
|
||||||
def process_and_write_results(img_or_path, last_result, filtering, notify):
|
|
||||||
if auto_pause_handler and not filtering:
|
|
||||||
auto_pause_handler.stop()
|
|
||||||
|
|
||||||
engine_instance = engine_instances[engine_index]
|
|
||||||
start_time = time.time()
|
|
||||||
res, result_data = engine_instance(img_or_path)
|
|
||||||
end_time = time.time()
|
|
||||||
|
|
||||||
orig_text = []
|
|
||||||
engine_color = config.get_general('engine_color')
|
|
||||||
if not res:
|
|
||||||
logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {result_data}')
|
|
||||||
return orig_text
|
|
||||||
|
|
||||||
output_format = config.get_general('output_format')
|
|
||||||
verbosity = config.get_general('verbosity')
|
|
||||||
output_string = ''
|
|
||||||
log_message = ''
|
|
||||||
|
|
||||||
# Check if the engine returned a structured OcrResult object
|
|
||||||
if isinstance(result_data, OcrResult):
|
|
||||||
# Assemble full text for logging/notifications
|
|
||||||
full_text_parts = []
|
|
||||||
for p in result_data.paragraphs:
|
|
||||||
for l in p.lines:
|
|
||||||
for w in l.words:
|
|
||||||
full_text_parts.append(w.text)
|
|
||||||
if w.separator:
|
|
||||||
full_text_parts.append(w.separator)
|
|
||||||
full_text_parts.append('\n')
|
|
||||||
unprocessed_text = "".join(full_text_parts)
|
|
||||||
|
|
||||||
if output_format == 'json':
|
|
||||||
result_dict = asdict(result_data)
|
|
||||||
output_string = json.dumps(result_dict, ensure_ascii=False)
|
|
||||||
log_message = post_process(unprocessed_text)
|
|
||||||
else: # 'text' format
|
|
||||||
if filtering:
|
|
||||||
text_to_process, orig_text = filtering(unprocessed_text, last_result)
|
|
||||||
output_string = post_process(text_to_process)
|
|
||||||
else:
|
|
||||||
output_string = post_process(unprocessed_text)
|
|
||||||
log_message = output_string
|
|
||||||
else: # Handle engines that return a simple string for result_data
|
|
||||||
if output_format == 'json':
|
|
||||||
logger.warning(f"Engine '{engine_instance.name}' does not support JSON output. Falling back to text.")
|
|
||||||
unprocessed_text = result_data
|
|
||||||
if filtering:
|
|
||||||
text_to_process, orig_text = filtering(unprocessed_text, last_result)
|
|
||||||
output_string = post_process(text_to_process)
|
|
||||||
else:
|
|
||||||
output_string = post_process(unprocessed_text)
|
|
||||||
log_message = output_string
|
|
||||||
|
|
||||||
if verbosity != 0:
|
|
||||||
if verbosity < -1:
|
|
||||||
log_message_terminal = ': ' + log_message
|
|
||||||
elif verbosity == -1:
|
|
||||||
log_message_terminal = ''
|
|
||||||
else:
|
|
||||||
log_message_terminal = ': ' + (log_message if len(log_message) <= verbosity else log_message[:verbosity] + '[...]')
|
|
||||||
|
|
||||||
logger.opt(ansi=True).info(f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>{log_message_terminal}')
|
|
||||||
|
|
||||||
if notify and config.get_general('notifications'):
|
|
||||||
notifier.send(title='owocr', message='Text recognized: ' + log_message, urgency=get_notification_urgency())
|
|
||||||
|
|
||||||
# Write the final formatted string to the destination
|
|
||||||
write_to = config.get_general('write_to')
|
|
||||||
if write_to == 'websocket':
|
|
||||||
websocket_server_thread.send_text(output_string)
|
|
||||||
elif write_to == 'clipboard':
|
|
||||||
pyperclipfix.copy(output_string)
|
|
||||||
else:
|
|
||||||
with Path(write_to).open('a', encoding='utf-8') as f:
|
|
||||||
f.write(output_string + '\n')
|
|
||||||
|
|
||||||
if auto_pause_handler and not paused and not filtering:
|
|
||||||
auto_pause_handler.start()
|
|
||||||
|
|
||||||
return orig_text
|
|
||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
|
logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
|
||||||
|
|
||||||
@@ -961,7 +981,7 @@ def run():
|
|||||||
directory_watcher_thread = None
|
directory_watcher_thread = None
|
||||||
unix_socket_server = None
|
unix_socket_server = None
|
||||||
key_combo_listener = None
|
key_combo_listener = None
|
||||||
filtering = None
|
init_filtering = False
|
||||||
auto_pause_handler = None
|
auto_pause_handler = None
|
||||||
engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
|
engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
|
||||||
engine_color = config.get_general('engine_color')
|
engine_color = config.get_general('engine_color')
|
||||||
@@ -988,7 +1008,6 @@ def run():
|
|||||||
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
|
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
|
||||||
screen_capture_combo = config.get_general('screen_capture_combo')
|
screen_capture_combo = config.get_general('screen_capture_combo')
|
||||||
last_screenshot_time = 0
|
last_screenshot_time = 0
|
||||||
last_result = ([], engine_index)
|
|
||||||
if screen_capture_combo != '':
|
if screen_capture_combo != '':
|
||||||
screen_capture_on_combo = True
|
screen_capture_on_combo = True
|
||||||
key_combos[screen_capture_combo] = on_screenshot_combo
|
key_combos[screen_capture_combo] = on_screenshot_combo
|
||||||
@@ -998,7 +1017,7 @@ def run():
|
|||||||
screenshot_event = threading.Event()
|
screenshot_event = threading.Event()
|
||||||
screenshot_thread = ScreenshotThread(screen_capture_on_combo)
|
screenshot_thread = ScreenshotThread(screen_capture_on_combo)
|
||||||
screenshot_thread.start()
|
screenshot_thread.start()
|
||||||
filtering = TextFiltering()
|
init_filtering = True
|
||||||
read_from_readable.append('screen capture')
|
read_from_readable.append('screen capture')
|
||||||
if 'websocket' in (read_from, read_from_secondary):
|
if 'websocket' in (read_from, read_from_secondary):
|
||||||
read_from_readable.append('websocket')
|
read_from_readable.append('websocket')
|
||||||
@@ -1027,6 +1046,8 @@ def run():
|
|||||||
directory_watcher_thread.start()
|
directory_watcher_thread.start()
|
||||||
read_from_readable.append(f'directory {read_from_path}')
|
read_from_readable.append(f'directory {read_from_path}')
|
||||||
|
|
||||||
|
output_result = OutputResult(init_filtering)
|
||||||
|
|
||||||
if len(key_combos) > 0:
|
if len(key_combos) > 0:
|
||||||
key_combo_listener = keyboard.GlobalHotKeys(key_combos)
|
key_combo_listener = keyboard.GlobalHotKeys(key_combos)
|
||||||
key_combo_listener.start()
|
key_combo_listener.start()
|
||||||
@@ -1058,11 +1079,11 @@ def run():
|
|||||||
while not terminated:
|
while not terminated:
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
img = None
|
img = None
|
||||||
filter_img = False
|
filter_text = False
|
||||||
|
|
||||||
if process_queue:
|
if process_queue:
|
||||||
try:
|
try:
|
||||||
img, filter_img = image_queue.get(timeout=0.1)
|
img, filter_text = image_queue.get(timeout=0.1)
|
||||||
notify = True
|
notify = True
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
pass
|
pass
|
||||||
@@ -1071,7 +1092,7 @@ def run():
|
|||||||
if (not paused) and screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
|
if (not paused) and screenshot_thread.screencapture_window_active and screenshot_thread.screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
|
||||||
screenshot_event.set()
|
screenshot_event.set()
|
||||||
img = periodic_screenshot_queue.get()
|
img = periodic_screenshot_queue.get()
|
||||||
filter_img = True
|
filter_text = True
|
||||||
notify = False
|
notify = False
|
||||||
last_screenshot_time = time.time()
|
last_screenshot_time = time.time()
|
||||||
|
|
||||||
@@ -1080,12 +1101,7 @@ def run():
|
|||||||
terminated = True
|
terminated = True
|
||||||
break
|
break
|
||||||
elif img:
|
elif img:
|
||||||
if filter_img:
|
output_result(img, filter_text, notify)
|
||||||
res = process_and_write_results(img, last_result, filtering, notify)
|
|
||||||
if res:
|
|
||||||
last_result = (res, engine_index)
|
|
||||||
else:
|
|
||||||
process_and_write_results(img, None, None, notify)
|
|
||||||
if isinstance(img, Path):
|
if isinstance(img, Path):
|
||||||
if delete_images:
|
if delete_images:
|
||||||
Path.unlink(img)
|
Path.unlink(img)
|
||||||
|
|||||||
Reference in New Issue
Block a user