Merge pull request #27 from AuroraWright/Multi-Language-Support

Multi-language support
This commit is contained in:
Aurora
2025-07-28 18:41:56 +02:00
committed by GitHub
3 changed files with 41 additions and 9 deletions

View File

@@ -48,6 +48,8 @@ parser.add_argument('-sw', '--screen_capture_only_active_windows', type=str2bool
help="When reading with screen capture and screen_capture_area is a window name, only target the window while it's active.") help="When reading with screen capture and screen_capture_area is a window name, only target the window while it's active.")
parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS, parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS,
help='When reading with screen capture, combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') help='When reading with screen capture, combo to wait on for taking a screenshot instead of using the delay. As an example: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add.argument('-l', '--language', type=str, default=argparse.SUPPRESS,
help='Two letter language code for filtering screencapture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).')
class Config: class Config:
has_config = False has_config = False
@@ -76,7 +78,8 @@ class Config:
'screen_capture_delay_secs': 3, 'screen_capture_delay_secs': 3,
'screen_capture_only_active_windows': True, 'screen_capture_only_active_windows': True,
'screen_capture_combo': '', 'screen_capture_combo': '',
'screen_capture_old_macos_api': False 'screen_capture_old_macos_api': False,
'language': 'ja'
} }
def __parse(self, value): def __parse(self, value):

View File

@@ -299,10 +299,11 @@ class RequestHandler(socketserver.BaseRequestHandler):
class TextFiltering: class TextFiltering:
accurate_filtering = False accurate_filtering = False
def __init__(self): def __init__(self, language='ja'):
from pysbd import Segmenter from pysbd import Segmenter
self.segmenter = Segmenter(language='ja', clean=True) self.segmenter = Segmenter(language=language, clean=True)
self.kana_kanji_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]') self.regex = self.get_regex(language)
try: try:
from transformers import pipeline, AutoTokenizer from transformers import pipeline, AutoTokenizer
import torch import torch
@@ -326,12 +327,34 @@ class TextFiltering:
import langid import langid
self.classify = langid.classify self.classify = langid.classify
def get_regex(self, language):
if language == 'ja':
return re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
elif language == 'zh':
return re.compile(r'[\u4E00-\u9FFF]')
elif language == 'ko':
return re.compile(r'[\uAC00-\uD7AF]')
elif language == 'ar':
return re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
elif language == 'ru':
return re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
elif language == 'el':
return re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
elif language == 'he':
return re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
elif language == 'th':
return re.compile(r'[\u0E00-\u0E7F]')
else:
# Latin Extended regex for many European languages/English
return re.compile(
r'[a-zA-Z\u00C0-\u00FF\u0100-\u017F\u0180-\u024F\u0250-\u02AF\u1D00-\u1D7F\u1D80-\u1DBF\u1E00-\u1EFF\u2C60-\u2C7F\uA720-\uA7FF\uAB30-\uAB6F]')
def __call__(self, text, last_result): def __call__(self, text, last_result):
orig_text = self.segmenter.segment(text) orig_text = self.segmenter.segment(text)
orig_text_filtered = [] orig_text_filtered = []
for block in orig_text: for block in orig_text:
block_filtered = self.kana_kanji_regex.findall(block) block_filtered = self.regex.findall(block)
if block_filtered: if block_filtered:
orig_text_filtered.append(''.join(block_filtered)) orig_text_filtered.append(''.join(block_filtered))
else: else:
@@ -352,12 +375,13 @@ class TextFiltering:
detection_results = self.pipe(new_blocks, top_k=3, truncation=True) detection_results = self.pipe(new_blocks, top_k=3, truncation=True)
for idx, block in enumerate(new_blocks): for idx, block in enumerate(new_blocks):
for result in detection_results[idx]: for result in detection_results[idx]:
if result['label'] == 'ja': if result['label'] == self.language:
final_blocks.append(block) final_blocks.append(block)
break break
else: else:
for block in new_blocks: for block in new_blocks:
if self.classify(block)[0] in ('ja', 'zh'): # This only looks at language IF language is ja or zh, otherwise it keeps all text
if self.language not in ["ja", "zh"] or self.classify(block)[0] in ['ja', 'zh'] or block == "\n":
final_blocks.append(block) final_blocks.append(block)
text = '\n'.join(final_blocks) text = '\n'.join(final_blocks)
@@ -872,6 +896,7 @@ def run():
terminated = False terminated = False
paused = config.get_general('pause_at_startup') paused = config.get_general('pause_at_startup')
auto_pause = config.get_general('auto_pause') auto_pause = config.get_general('auto_pause')
language = config.get_general('language')
clipboard_thread = None clipboard_thread = None
websocket_server_thread = None websocket_server_thread = None
screenshot_thread = None screenshot_thread = None
@@ -915,7 +940,7 @@ def run():
screenshot_event = threading.Event() screenshot_event = threading.Event()
screenshot_thread = ScreenshotThread(screen_capture_on_combo) screenshot_thread = ScreenshotThread(screen_capture_on_combo)
screenshot_thread.start() screenshot_thread.start()
filtering = TextFiltering() filtering = TextFiltering(language=language)
read_from_readable.append('screen capture') read_from_readable.append('screen capture')
if 'websocket' in (read_from, read_from_secondary): if 'websocket' in (read_from, read_from_secondary):
read_from_readable.append('websocket') read_from_readable.append('websocket')

View File

@@ -29,6 +29,10 @@
;note: this specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key ;note: this specifies a combo to wait on for taking a screenshot instead of using the delay. As an example: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;screen_capture_combo = <ctrl>+<shift>+s ;screen_capture_combo = <ctrl>+<shift>+s
;screen_capture_old_macos_api = False ;screen_capture_old_macos_api = False
;language = ja
;language = zh
;note: This specifies the language to use for text filtering while using "screencapture". Valid values: ja: (Japanese) zh: (Chinese) ko: (Korean) ar: (Arabic) ru: (Russian) el: (Greek) he: (Hebrew) th: (Thai)
;Any other value will use Latin Extended (for most European languages and English).
;[winrtocr] ;[winrtocr]
;url = http://aaa.xxx.yyy.zzz:8000 ;url = http://aaa.xxx.yyy.zzz:8000
;[oneocr] ;[oneocr]