Big refactoring, allow second image source
This commit is contained in:
@@ -12,6 +12,8 @@ parser = argparse.ArgumentParser(prog='owocr', description=textwrap.dedent('''\
|
||||
|
||||
parser.add_argument('-r', '--read_from', type=str, default=argparse.SUPPRESS,
|
||||
help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
|
||||
parser.add_argument('-rs', '--read_from_secondary', type=str, default=argparse.SUPPRESS,
|
||||
help='Where to read input images from. Can be either "clipboard", "websocket", "unixsocket" (on macOS/Linux), "screencapture", or a path to a directory.')
|
||||
parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
|
||||
help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
|
||||
parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
|
||||
@@ -47,6 +49,7 @@ class Config:
|
||||
__engine_config = {}
|
||||
__default_config = {
|
||||
'read_from': 'clipboard',
|
||||
'read_from_secondary': None,
|
||||
'write_to': 'clipboard',
|
||||
'engine': '',
|
||||
'pause_at_startup': False,
|
||||
|
||||
202
owocr/ocr.py
202
owocr/ocr.py
@@ -96,6 +96,22 @@ def post_process(text):
|
||||
return text
|
||||
|
||||
|
||||
def input_to_pil_image(img):
|
||||
if isinstance(img, Image.Image):
|
||||
pil_image = img
|
||||
elif isinstance(img, (bytes, bytearray)):
|
||||
pil_image = Image.open(io.BytesIO(img))
|
||||
elif isinstance(img, Path):
|
||||
try:
|
||||
pil_image = Image.open(img)
|
||||
pil_image.load()
|
||||
except (UnidentifiedImageError, OSError) as e:
|
||||
return None
|
||||
else:
|
||||
raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}')
|
||||
return pil_image
|
||||
|
||||
|
||||
def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
|
||||
if img_format == 'png' and optimized_png_encode and not optimize:
|
||||
raw_data = img.convert('RGBA').tobytes()
|
||||
@@ -157,15 +173,14 @@ class MangaOcr:
|
||||
self.available = True
|
||||
logger.info('Manga OCR ready')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
x = (True, self.model(img))
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
class GoogleVision:
|
||||
@@ -188,13 +203,10 @@ class GoogleVision:
|
||||
except:
|
||||
logger.warning('Error parsing Google credentials, Google Vision will not work!')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
image_bytes = self._preprocess(img)
|
||||
image = vision.Image(content=image_bytes)
|
||||
@@ -207,6 +219,8 @@ class GoogleVision:
|
||||
texts = response.text_annotations
|
||||
res = texts[0].description if len(texts) > 0 else ''
|
||||
x = (True, res)
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
@@ -225,13 +239,10 @@ class GoogleLens:
|
||||
self.available = True
|
||||
logger.info('Google Lens ready')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
request = LensOverlayServerRequest()
|
||||
|
||||
@@ -298,6 +309,8 @@ class GoogleLens:
|
||||
res += '\n'
|
||||
|
||||
x = (True, res)
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
@@ -305,9 +318,10 @@ class GoogleLens:
|
||||
aspect_ratio = img.width / img.height
|
||||
new_w = int(sqrt(3000000 * aspect_ratio))
|
||||
new_h = int(new_w / aspect_ratio)
|
||||
img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
||||
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
||||
img.close()
|
||||
|
||||
return (pil_image_to_bytes(img), img.width, img.height)
|
||||
return (pil_image_to_bytes(img_resized), img_resized.width, img_resized.height)
|
||||
|
||||
class GoogleLensWeb:
|
||||
name = 'glensweb'
|
||||
@@ -323,13 +337,10 @@ class GoogleLensWeb:
|
||||
self.available = True
|
||||
logger.info('Google Lens (web) ready')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
url = 'https://lens.google.com/v3/upload'
|
||||
files = {'encoded_image': ('image.png', self._preprocess(img), 'image/png')}
|
||||
@@ -393,6 +404,8 @@ class GoogleLensWeb:
|
||||
res += '\n'
|
||||
|
||||
x = (True, res)
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
@@ -400,9 +413,10 @@ class GoogleLensWeb:
|
||||
aspect_ratio = img.width / img.height
|
||||
new_w = int(sqrt(3000000 * aspect_ratio))
|
||||
new_h = int(new_w / aspect_ratio)
|
||||
img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
||||
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
||||
img.close()
|
||||
|
||||
return pil_image_to_bytes(img)
|
||||
return pil_image_to_bytes(img_resized)
|
||||
|
||||
class Bing:
|
||||
name = 'bing'
|
||||
@@ -415,13 +429,10 @@ class Bing:
|
||||
self.available = True
|
||||
logger.info('Bing ready')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
img_bytes = self._preprocess(img)
|
||||
if not img_bytes:
|
||||
@@ -515,6 +526,8 @@ class Bing:
|
||||
res += line['text'] + '\n'
|
||||
|
||||
x = (True, res)
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
@@ -526,9 +539,10 @@ class Bing:
|
||||
resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height)
|
||||
new_w = int(img.width * resize_factor)
|
||||
new_h = int(img.height * resize_factor)
|
||||
img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
||||
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
||||
img.close()
|
||||
|
||||
img_bytes, _ = limit_image_size(img, max_byte_size)
|
||||
img_bytes, _ = limit_image_size(img_resized, max_byte_size)
|
||||
|
||||
if img_bytes:
|
||||
res = base64.b64encode(img_bytes).decode('utf-8')
|
||||
@@ -550,13 +564,10 @@ class AppleVision:
|
||||
self.available = True
|
||||
logger.info('Apple Vision ready')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
with objc.autorelease_pool():
|
||||
req = Vision.VNRecognizeTextRequest.alloc().init()
|
||||
@@ -579,6 +590,7 @@ class AppleVision:
|
||||
else:
|
||||
x = (False, 'Unknown error!')
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
@@ -631,13 +643,10 @@ class AppleLiveText:
|
||||
self.available = True
|
||||
logger.info('Apple Live Text ready')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
with objc.autorelease_pool():
|
||||
analyzer = self.VKCImageAnalyzer.alloc().init()
|
||||
@@ -691,13 +700,10 @@ class WinRTOCR:
|
||||
except:
|
||||
logger.warning('Error reading URL from config, WinRT OCR will not work!')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
if sys.platform == 'win32':
|
||||
res = winocr.recognize_pil_sync(img, lang='ja')['text']
|
||||
@@ -716,6 +722,8 @@ class WinRTOCR:
|
||||
res = res.json()['text']
|
||||
|
||||
x = (True, res)
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
@@ -749,13 +757,10 @@ class OneOCR:
|
||||
except:
|
||||
logger.warning('Error reading URL from config, OneOCR will not work!')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
if sys.platform == 'win32':
|
||||
try:
|
||||
@@ -776,6 +781,8 @@ class OneOCR:
|
||||
res = res.json()['text']
|
||||
|
||||
x = (True, res)
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
@@ -799,13 +806,10 @@ class AzureImageAnalysis:
|
||||
except:
|
||||
logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
try:
|
||||
read_result = self.client.analyze(image_data=self._preprocess(img), visual_features=[VisualFeatures.READ])
|
||||
@@ -823,6 +827,8 @@ class AzureImageAnalysis:
|
||||
return (False, 'Unknown error!')
|
||||
|
||||
x = (True, res)
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
@@ -830,9 +836,10 @@ class AzureImageAnalysis:
|
||||
resize_factor = max(50 / img.width, 50 / img.height)
|
||||
new_w = int(img.width * resize_factor)
|
||||
new_h = int(img.height * resize_factor)
|
||||
img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
||||
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
||||
img.close()
|
||||
|
||||
return pil_image_to_bytes(img)
|
||||
return pil_image_to_bytes(img_resized)
|
||||
|
||||
class EasyOCR:
|
||||
name = 'easyocr'
|
||||
@@ -850,13 +857,10 @@ class EasyOCR:
|
||||
self.available = True
|
||||
logger.info('EasyOCR ready')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
res = ''
|
||||
read_result = self.model.readtext(self._preprocess(img), detail=0)
|
||||
@@ -864,6 +868,8 @@ class EasyOCR:
|
||||
res += text + '\n'
|
||||
|
||||
x = (True, res)
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
@@ -897,13 +903,10 @@ class RapidOCR:
|
||||
self.available = True
|
||||
logger.info('RapidOCR ready')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
res = ''
|
||||
read_results, elapsed = self.model(self._preprocess(img))
|
||||
@@ -912,6 +915,8 @@ class RapidOCR:
|
||||
res += read_result[1] + '\n'
|
||||
|
||||
x = (True, res)
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
@@ -932,13 +937,10 @@ class OCRSpace:
|
||||
except:
|
||||
logger.warning('Error reading API key from config, OCRSpace will not work!')
|
||||
|
||||
def __call__(self, img_or_path):
|
||||
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||
img = Image.open(img_or_path)
|
||||
elif isinstance(img_or_path, Image.Image):
|
||||
img = img_or_path
|
||||
else:
|
||||
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||
def __call__(self, img):
|
||||
img = input_to_pil_image(img)
|
||||
if not img:
|
||||
return (False, 'Invalid image provided')
|
||||
|
||||
img_bytes, img_extension = self._preprocess(img)
|
||||
if not img_bytes:
|
||||
@@ -969,6 +971,8 @@ class OCRSpace:
|
||||
|
||||
res = res['ParsedResults'][0]['ParsedText']
|
||||
x = (True, res)
|
||||
|
||||
img.close()
|
||||
return x
|
||||
|
||||
def _preprocess(self, img):
|
||||
|
||||
306
owocr/run.py
306
owocr/run.py
@@ -93,7 +93,7 @@ class ClipboardThread(threading.Thread):
|
||||
1.0
|
||||
)
|
||||
|
||||
return new_image.TIFFRepresentation()
|
||||
return bytes(new_image.TIFFRepresentation())
|
||||
|
||||
def process_message(self, hwnd: int, msg: int, wparam: int, lparam: int):
|
||||
WM_CLIPBOARDUPDATE = 0x031D
|
||||
@@ -114,7 +114,7 @@ class ClipboardThread(threading.Thread):
|
||||
clipboard_text = win32clipboard.GetClipboardData(win32clipboard.CF_UNICODETEXT)
|
||||
if self.ignore_flag or clipboard_text != '*ocr_ignore*':
|
||||
img = win32clipboard.GetClipboardData(win32clipboard.CF_DIB)
|
||||
clipboard_queue.put(img)
|
||||
image_queue.put((img, False))
|
||||
win32clipboard.CloseClipboard()
|
||||
except pywintypes.error:
|
||||
pass
|
||||
@@ -165,7 +165,7 @@ class ClipboardThread(threading.Thread):
|
||||
clipboard_text = pasteboard.stringForType_(NSPasteboardTypeString)
|
||||
if self.ignore_flag or clipboard_text != '*ocr_ignore*':
|
||||
img = self.normalize_macos_clipboard(pasteboard.dataForType_(NSPasteboardTypeTIFF))
|
||||
clipboard_queue.put(img)
|
||||
image_queue.put((img, False))
|
||||
else:
|
||||
old_img = img
|
||||
try:
|
||||
@@ -176,7 +176,7 @@ class ClipboardThread(threading.Thread):
|
||||
if ((not just_unpaused) and isinstance(img, Image.Image) and \
|
||||
(self.ignore_flag or pyperclipfix.paste() != '*ocr_ignore*') and \
|
||||
(not self.are_images_identical(img, old_img))):
|
||||
clipboard_queue.put(img)
|
||||
image_queue.put((img, False))
|
||||
|
||||
just_unpaused = False
|
||||
|
||||
@@ -184,6 +184,41 @@ class ClipboardThread(threading.Thread):
|
||||
time.sleep(sleep_time)
|
||||
|
||||
|
||||
class DirectoryWatcher(threading.Thread):
|
||||
def __init__(self, path):
|
||||
super().__init__(daemon=True)
|
||||
self.path = path
|
||||
self.delay_secs = config.get_general('delay_secs')
|
||||
self.last_update = time.time()
|
||||
self.allowed_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')
|
||||
|
||||
def get_path_key(self, path):
|
||||
return path, path.lstat().st_mtime
|
||||
|
||||
def run(self):
|
||||
old_paths = set()
|
||||
for path in self.path.iterdir():
|
||||
if path.suffix.lower() in self.allowed_extensions:
|
||||
old_paths.add(get_path_key(path))
|
||||
|
||||
while not terminated:
|
||||
if paused:
|
||||
sleep_time = 0.5
|
||||
else:
|
||||
sleep_time = self.delay_secs
|
||||
for path in self.path.iterdir():
|
||||
if path.suffix.lower() in self.allowed_extensions:
|
||||
path_key = self.get_path_key(path)
|
||||
if path_key not in old_paths:
|
||||
old_paths.add(path_key)
|
||||
|
||||
if not paused:
|
||||
image_queue.put((path, False))
|
||||
|
||||
if not terminated:
|
||||
time.sleep(sleep_time)
|
||||
|
||||
|
||||
class WebsocketServerThread(threading.Thread):
|
||||
def __init__(self, read):
|
||||
super().__init__(daemon=True)
|
||||
@@ -206,7 +241,7 @@ class WebsocketServerThread(threading.Thread):
|
||||
try:
|
||||
async for message in websocket:
|
||||
if self.read and not paused:
|
||||
websocket_queue.put(message)
|
||||
image_queue.put((message, False))
|
||||
try:
|
||||
await websocket.send('True')
|
||||
except websockets.exceptions.ConnectionClosedOK:
|
||||
@@ -255,7 +290,7 @@ class RequestHandler(socketserver.BaseRequestHandler):
|
||||
pass
|
||||
|
||||
if not paused:
|
||||
unixsocket_queue.put(img)
|
||||
image_queue.put((img, False))
|
||||
conn.sendall(b'True')
|
||||
else:
|
||||
conn.sendall(b'False')
|
||||
@@ -266,7 +301,7 @@ class MacOSWindowTracker(threading.Thread):
|
||||
super().__init__(daemon=True)
|
||||
self.stop = False
|
||||
self.window_id = window_id
|
||||
self.window_active = False
|
||||
self.window_active = screencapture_window_active
|
||||
|
||||
def run(self):
|
||||
found = True
|
||||
@@ -302,8 +337,8 @@ class WindowsWindowTracker(threading.Thread):
|
||||
self.stop = False
|
||||
self.window_handle = window_handle
|
||||
self.only_active = only_active
|
||||
self.window_active = False
|
||||
self.window_minimized = False
|
||||
self.window_active = screencapture_window_active
|
||||
self.window_minimized = not screencapture_window_visible
|
||||
|
||||
def run(self):
|
||||
found = True
|
||||
@@ -463,14 +498,8 @@ class TextFiltering:
|
||||
|
||||
|
||||
class ScreenshotClass:
|
||||
def __init__(self, screen_capture_on_combo):
|
||||
def __init__(self):
|
||||
screen_capture_area = config.get_general('screen_capture_area')
|
||||
if type(screen_capture_area) == tuple:
|
||||
screen_capture_area = ','.join(map(str, screen_capture_area))
|
||||
global screencapture_window_active
|
||||
global screencapture_window_visible
|
||||
screencapture_window_active = True
|
||||
screencapture_window_visible = True
|
||||
self.macos_window_tracker = None
|
||||
self.windows_window_tracker = None
|
||||
if screen_capture_area == '':
|
||||
@@ -519,7 +548,7 @@ class ScreenshotClass:
|
||||
self.sct_params = {'top': coord_top, 'left': coord_left, 'width': coord_width, 'height': coord_height}
|
||||
logger.opt(ansi=True).info(f'Selected coordinates: {coord_left},{coord_top},{coord_width},{coord_height}')
|
||||
else:
|
||||
screen_capture_only_active_windows = (not screen_capture_on_combo) and config.get_general('screen_capture_only_active_windows')
|
||||
screen_capture_only_active_windows = config.get_general('screen_capture_only_active_windows')
|
||||
area_invalid_error = '"screen_capture_area" must be empty, "screen_N" where N is a screen number starting from 1, a valid set of coordinates, or a valid window name'
|
||||
if sys.platform == 'darwin':
|
||||
if int(platform.mac_ver()[0].split('.')[0]) < 14:
|
||||
@@ -554,7 +583,6 @@ class ScreenshotClass:
|
||||
window_title = window_titles[window_index]
|
||||
|
||||
if screen_capture_only_active_windows:
|
||||
screencapture_window_active = False
|
||||
self.macos_window_tracker = MacOSWindowTracker(self.window_id)
|
||||
self.macos_window_tracker.start()
|
||||
logger.opt(ansi=True).info(f'Selected window: {window_title}')
|
||||
@@ -566,8 +594,6 @@ class ScreenshotClass:
|
||||
|
||||
ctypes.windll.shcore.SetProcessDpiAwareness(1)
|
||||
|
||||
if screen_capture_only_active_windows:
|
||||
screencapture_window_active = False
|
||||
self.windows_window_tracker = WindowsWindowTracker(self.window_handle, screen_capture_only_active_windows)
|
||||
self.windows_window_tracker.start()
|
||||
logger.opt(ansi=True).info(f'Selected window: {window_title}')
|
||||
@@ -595,7 +621,7 @@ class ScreenshotClass:
|
||||
except queue.Empty:
|
||||
cg_image = None
|
||||
if not cg_image:
|
||||
return None
|
||||
return 0
|
||||
width = CGImageGetWidth(cg_image)
|
||||
height = CGImageGetHeight(cg_image)
|
||||
raw_data = CGDataProviderCopyData(CGImageGetDataProvider(cg_image))
|
||||
@@ -620,7 +646,7 @@ class ScreenshotClass:
|
||||
bmpinfo = save_bitmap.GetInfo()
|
||||
bmpstr = save_bitmap.GetBitmapBits(True)
|
||||
except pywintypes.error:
|
||||
return None
|
||||
return 0
|
||||
img = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
|
||||
try:
|
||||
win32gui.DeleteObject(save_bitmap.GetHandle())
|
||||
@@ -651,6 +677,9 @@ class AutopauseTimer:
|
||||
self.timeout = timeout
|
||||
self.timer_thread = None
|
||||
|
||||
def __del__(self):
|
||||
self.stop()
|
||||
|
||||
def start(self):
|
||||
self.stop()
|
||||
self.stop_event.clear()
|
||||
@@ -664,12 +693,12 @@ class AutopauseTimer:
|
||||
|
||||
def _countdown(self):
|
||||
seconds = self.timeout
|
||||
while seconds > 0 and not self.stop_event.is_set():
|
||||
while seconds > 0 and not self.stop_event.is_set() and not terminated:
|
||||
time.sleep(1)
|
||||
seconds -= 1
|
||||
if not self.stop_event.is_set():
|
||||
self.stop_event.set()
|
||||
if not paused:
|
||||
if not (paused or terminated):
|
||||
pause_handler(True)
|
||||
|
||||
|
||||
@@ -775,7 +804,7 @@ def on_window_minimized(minimized):
|
||||
def on_screenshot_combo():
|
||||
if not paused:
|
||||
img = take_screenshot()
|
||||
screenshot_queue.put(img)
|
||||
image_queue.put((img, True))
|
||||
|
||||
|
||||
def process_and_write_results(img_or_path, last_result, filtering):
|
||||
@@ -783,9 +812,9 @@ def process_and_write_results(img_or_path, last_result, filtering):
|
||||
auto_pause_handler.stop()
|
||||
|
||||
engine_instance = engine_instances[engine_index]
|
||||
t0 = time.time()
|
||||
start_time = time.time()
|
||||
res, text = engine_instance(img_or_path)
|
||||
t1 = time.time()
|
||||
end_time = time.time()
|
||||
|
||||
orig_text = []
|
||||
engine_color = config.get_general('engine_color')
|
||||
@@ -793,7 +822,7 @@ def process_and_write_results(img_or_path, last_result, filtering):
|
||||
if filtering:
|
||||
text, orig_text = filtering(text, last_result)
|
||||
text = post_process(text)
|
||||
logger.opt(ansi=True).info(f'Text recognized in {t1 - t0:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
|
||||
logger.opt(ansi=True).info(f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
|
||||
if config.get_general('notifications'):
|
||||
notifier.send(title='owocr', message='Text recognized: ' + text)
|
||||
|
||||
@@ -809,15 +838,11 @@ def process_and_write_results(img_or_path, last_result, filtering):
|
||||
if auto_pause_handler and not paused:
|
||||
auto_pause_handler.start()
|
||||
else:
|
||||
logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {t1 - t0:0.03f}s: {text}')
|
||||
logger.opt(ansi=True).info(f'<{engine_color}>{engine_instance.readable_name}</{engine_color}> reported an error after {end_time - start_time:0.03f}s: {text}')
|
||||
|
||||
return orig_text
|
||||
|
||||
|
||||
def get_path_key(path):
|
||||
return path, path.lstat().st_mtime
|
||||
|
||||
|
||||
def run():
|
||||
logger.configure(handlers=[{'sink': sys.stderr, 'format': config.get_general('logger_format')}])
|
||||
|
||||
@@ -861,22 +886,33 @@ def run():
|
||||
global paused
|
||||
global notifier
|
||||
global auto_pause_handler
|
||||
global websocket_server_thread
|
||||
global image_queue
|
||||
non_path_inputs = ('screencapture', 'clipboard', 'websocket', 'unixsocket')
|
||||
read_from = config.get_general('read_from')
|
||||
read_from_secondary = config.get_general('read_from_secondary')
|
||||
read_from_path = None
|
||||
read_from_readable = []
|
||||
write_to = config.get_general('write_to')
|
||||
terminated = False
|
||||
paused = config.get_general('pause_at_startup')
|
||||
auto_pause = config.get_general('auto_pause')
|
||||
clipboard_thread = None
|
||||
websocket_server_thread = None
|
||||
directory_watcher_thread = None
|
||||
unix_socket_server = None
|
||||
key_combo_listener = None
|
||||
filtering = None
|
||||
auto_pause_handler = None
|
||||
engine_index = engine_keys.index(default_engine) if default_engine != '' else 0
|
||||
engine_color = config.get_general('engine_color')
|
||||
combo_pause = config.get_general('combo_pause')
|
||||
combo_engine_switch = config.get_general('combo_engine_switch')
|
||||
screen_capture_on_combo = False
|
||||
notifier = DesktopNotifierSync()
|
||||
image_queue = queue.Queue()
|
||||
key_combos = {}
|
||||
|
||||
if read_from != 'screencapture' and auto_pause != 0:
|
||||
auto_pause_handler = AutopauseTimer(auto_pause)
|
||||
|
||||
if combo_pause != '':
|
||||
key_combos[combo_pause] = pause_handler
|
||||
if combo_engine_switch != '':
|
||||
@@ -885,64 +921,51 @@ def run():
|
||||
else:
|
||||
raise ValueError('combo_pause must also be specified')
|
||||
|
||||
if read_from == 'websocket' or write_to == 'websocket':
|
||||
global websocket_server_thread
|
||||
websocket_server_thread = WebsocketServerThread(read_from == 'websocket')
|
||||
if 'websocket' in (read_from, read_from_secondary) or write_to == 'websocket':
|
||||
websocket_server_thread = WebsocketServerThread('websocket' in (read_from, read_from_secondary))
|
||||
websocket_server_thread.start()
|
||||
|
||||
if read_from == 'websocket':
|
||||
global websocket_queue
|
||||
websocket_queue = queue.Queue()
|
||||
read_from_readable = 'websocket'
|
||||
elif read_from == 'unixsocket':
|
||||
if 'screencapture' in (read_from, read_from_secondary):
|
||||
global screencapture_window_active
|
||||
global screencapture_window_visible
|
||||
global take_screenshot
|
||||
screencapture_window_active = False
|
||||
screencapture_window_visible = True
|
||||
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
|
||||
screen_capture_combo = config.get_general('screen_capture_combo')
|
||||
last_screenshot_time = 0
|
||||
last_result = ([], engine_index)
|
||||
if screen_capture_combo != '':
|
||||
screen_capture_on_combo = True
|
||||
key_combos[screen_capture_combo] = on_screenshot_combo
|
||||
take_screenshot = ScreenshotClass()
|
||||
filtering = TextFiltering()
|
||||
read_from_readable.append('screen capture')
|
||||
if 'websocket' in (read_from, read_from_secondary):
|
||||
read_from_readable.append('websocket')
|
||||
if 'unixsocket' in (read_from, read_from_secondary):
|
||||
if sys.platform == 'win32':
|
||||
raise ValueError('"unixsocket" is not currently supported on Windows')
|
||||
|
||||
global unixsocket_queue
|
||||
unixsocket_queue = queue.Queue()
|
||||
socket_path = Path('/tmp/owocr.sock')
|
||||
if socket_path.exists():
|
||||
socket_path.unlink()
|
||||
unix_socket_server = socketserver.ThreadingUnixStreamServer(str(socket_path), RequestHandler)
|
||||
unix_socket_server_thread = threading.Thread(target=unix_socket_server.serve_forever, daemon=True)
|
||||
unix_socket_server_thread.start()
|
||||
read_from_readable = 'unix socket'
|
||||
elif read_from == 'clipboard':
|
||||
global clipboard_queue
|
||||
clipboard_queue = queue.Queue()
|
||||
read_from_readable.append('unix socket')
|
||||
if 'clipboard' in (read_from, read_from_secondary):
|
||||
clipboard_thread = ClipboardThread()
|
||||
clipboard_thread.start()
|
||||
read_from_readable = 'clipboard'
|
||||
elif read_from == 'screencapture':
|
||||
screen_capture_delay_secs = config.get_general('screen_capture_delay_secs')
|
||||
screen_capture_combo = config.get_general('screen_capture_combo')
|
||||
last_result = ([], engine_index)
|
||||
if screen_capture_combo != '':
|
||||
screen_capture_on_combo = True
|
||||
global screenshot_queue
|
||||
screenshot_queue = queue.Queue()
|
||||
key_combos[screen_capture_combo] = on_screenshot_combo
|
||||
else:
|
||||
screen_capture_on_combo = False
|
||||
global take_screenshot
|
||||
take_screenshot = ScreenshotClass(screen_capture_on_combo)
|
||||
filtering = TextFiltering()
|
||||
read_from_readable = 'screen capture'
|
||||
else:
|
||||
delay_secs = config.get_general('delay_secs')
|
||||
read_from_readable.append('clipboard')
|
||||
if any(i and i not in non_path_inputs for i in (read_from, read_from_secondary)):
|
||||
if all(i and i not in non_path_inputs for i in (read_from, read_from_secondary)):
|
||||
raise ValueError("read_from and read_from_secondary can't both be directory paths")
|
||||
delete_images = config.get_general('delete_images')
|
||||
|
||||
read_from = Path(read_from)
|
||||
if not read_from.is_dir():
|
||||
raise ValueError('read_from must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
|
||||
|
||||
allowed_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')
|
||||
old_paths = set()
|
||||
for path in read_from.iterdir():
|
||||
if path.suffix.lower() in allowed_extensions:
|
||||
old_paths.add(get_path_key(path))
|
||||
|
||||
read_from_readable = f'directory {read_from}'
|
||||
read_from_path = Path(read_from) if read_from not in non_path_inputs else Path(read_from_secondary)
|
||||
if not read_from_path.is_dir():
|
||||
raise ValueError('read_from and read_from_secondary must be either "websocket", "unixsocket", "clipboard", "screencapture", or a path to a directory')
|
||||
directory_watcher_thread = DirectoryWatcher(read_from_path)
|
||||
directory_watcher_thread.start()
|
||||
read_from_readable.append(f'directory {read_from_path}')
|
||||
|
||||
if len(key_combos) > 0:
|
||||
key_combo_listener = keyboard.GlobalHotKeys(key_combos)
|
||||
@@ -955,99 +978,62 @@ def run():
|
||||
raise ValueError('write_to must be either "websocket", "clipboard" or a path to a text file')
|
||||
write_to_readable = f'file {write_to}'
|
||||
|
||||
process_queue = (any(i in ('clipboard', 'websocket', 'unixsocket') for i in (read_from, read_from_secondary)) or read_from_path or screen_capture_on_combo)
|
||||
process_screenshots = 'screencapture' in (read_from, read_from_secondary) and not screen_capture_on_combo
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
if (not process_screenshots) and auto_pause != 0:
|
||||
auto_pause_handler = AutopauseTimer(auto_pause)
|
||||
user_input_thread = threading.Thread(target=user_input_thread_run, daemon=True)
|
||||
user_input_thread.start()
|
||||
logger.opt(ansi=True).info(f"Reading from {read_from_readable}, writing to {write_to_readable} using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
|
||||
logger.opt(ansi=True).info(f"Reading from {' and '.join(read_from_readable)}, writing to {write_to_readable} using <{engine_color}>{engine_instances[engine_index].readable_name}</{engine_color}>{' (paused)' if paused else ''}")
|
||||
|
||||
while not terminated:
|
||||
sleep_time = 0
|
||||
if read_from == 'websocket':
|
||||
while True:
|
||||
try:
|
||||
item = websocket_queue.get(timeout=0.5)
|
||||
except queue.Empty:
|
||||
break
|
||||
else:
|
||||
if not paused:
|
||||
img = Image.open(io.BytesIO(item))
|
||||
process_and_write_results(img, None, None)
|
||||
elif read_from == 'unixsocket':
|
||||
while True:
|
||||
try:
|
||||
item = unixsocket_queue.get(timeout=0.5)
|
||||
except queue.Empty:
|
||||
break
|
||||
else:
|
||||
img = Image.open(io.BytesIO(item))
|
||||
process_and_write_results(img, None, None)
|
||||
elif read_from == 'clipboard':
|
||||
while True:
|
||||
try:
|
||||
item = clipboard_queue.get(timeout=0.5)
|
||||
except queue.Empty:
|
||||
break
|
||||
else:
|
||||
img = item if isinstance(item, Image.Image) else Image.open(io.BytesIO(item))
|
||||
process_and_write_results(img, None, None)
|
||||
elif read_from == 'screencapture':
|
||||
img = None
|
||||
if screen_capture_on_combo:
|
||||
try:
|
||||
img = screenshot_queue.get(timeout=0.5)
|
||||
except queue.Empty:
|
||||
pass
|
||||
else:
|
||||
if not img:
|
||||
on_window_closed(False)
|
||||
terminated = True
|
||||
break
|
||||
else:
|
||||
sleep_time = 0.5
|
||||
if (not paused) and screencapture_window_active and screencapture_window_visible:
|
||||
img = take_screenshot()
|
||||
if not img:
|
||||
on_window_closed(False)
|
||||
terminated = True
|
||||
break
|
||||
sleep_time = screen_capture_delay_secs
|
||||
if img:
|
||||
start_time = time.time()
|
||||
img = None
|
||||
filter_img = False
|
||||
|
||||
if process_queue:
|
||||
try:
|
||||
img, filter_img = image_queue.get(timeout=0.1)
|
||||
except queue.Empty:
|
||||
pass
|
||||
|
||||
if (not img) and process_screenshots:
|
||||
if (not paused) and screencapture_window_active and screencapture_window_visible and (time.time() - last_screenshot_time) > screen_capture_delay_secs:
|
||||
img = take_screenshot()
|
||||
filter_img = True
|
||||
last_screenshot_time = time.time()
|
||||
|
||||
if img == 0:
|
||||
on_window_closed(False)
|
||||
terminated = True
|
||||
break
|
||||
elif img:
|
||||
if filter_img:
|
||||
res = process_and_write_results(img, last_result, filtering)
|
||||
if res:
|
||||
last_result = (res, engine_index)
|
||||
else:
|
||||
sleep_time = delay_secs
|
||||
for path in read_from.iterdir():
|
||||
if path.suffix.lower() in allowed_extensions:
|
||||
path_key = get_path_key(path)
|
||||
if path_key not in old_paths:
|
||||
old_paths.add(path_key)
|
||||
else:
|
||||
process_and_write_results(img, None, None)
|
||||
if isinstance(img, Path):
|
||||
if delete_images:
|
||||
Path.unlink(img)
|
||||
|
||||
if not paused:
|
||||
try:
|
||||
img = Image.open(path)
|
||||
img.load()
|
||||
except (UnidentifiedImageError, OSError) as e:
|
||||
logger.warning(f'Error while reading file {path}: {e}')
|
||||
else:
|
||||
process_and_write_results(img, None, None)
|
||||
img.close()
|
||||
if delete_images:
|
||||
Path.unlink(path)
|
||||
if not terminated:
|
||||
time.sleep(sleep_time)
|
||||
elapsed_time = time.time() - start_time
|
||||
if (not terminated) and elapsed_time < 0.1:
|
||||
time.sleep(0.1 - elapsed_time)
|
||||
|
||||
if read_from == 'websocket' or write_to == 'websocket':
|
||||
if websocket_server_thread:
|
||||
websocket_server_thread.stop_server()
|
||||
websocket_server_thread.join()
|
||||
if read_from == 'clipboard':
|
||||
if clipboard_thread:
|
||||
if sys.platform == 'win32':
|
||||
win32api.PostThreadMessage(clipboard_thread.thread_id, win32con.WM_QUIT, 0, 0)
|
||||
clipboard_thread.join()
|
||||
elif read_from == 'unixsocket':
|
||||
if directory_watcher_thread:
|
||||
directory_watcher_thread.join()
|
||||
if unix_socket_server:
|
||||
unix_socket_server.shutdown()
|
||||
unix_socket_server_thread.join()
|
||||
if len(key_combos) > 0:
|
||||
if key_combo_listener:
|
||||
key_combo_listener.stop()
|
||||
if auto_pause_handler:
|
||||
auto_pause_handler.stop()
|
||||
|
||||
Reference in New Issue
Block a user