From 9518272a5bbdf282ad3b4d2fb7aa46ff1bd8e173 Mon Sep 17 00:00:00 2001
From: AuroraWright <AuroraWright@users.noreply.github.com>
Date: Sat, 20 Jan 2024 02:45:29 +0100
Subject: [PATCH] Implement Google Lens (thanks Viola!)

---
 README.md         |  3 +++
 owocr/__init__.py |  8 +-------
 owocr/ocr.py      | 50 +++++++++++++++++++++++++++++++++++++++++++++++
 owocr/run.py      |  2 +-
 owocr_config.ini  |  2 +-
 5 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index aa2a99f..0885e17 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,7 @@ This has been tested with Python 3.11. Newer/older versions might work. For now
 - WinRT OCR: this will work on Windows 10 or later if winocr (`pip install winocr`) is installed. It can also be used by installing winocr on a Windows virtual machine and running the server (`winocr_serve`), installing requests (`pip install requests`) and specifying the IP address of the Windows VM/machine in the config file (see below) ("w" key)
 
 ## Cloud providers
+- Google Lens: Google Vision in disguise (no need for API keys!), however it needs to download a couple megabytes of data for each request. You need to install chompjs and requests (`pip install chompjs requests`) ("l" key)
 - Google Vision: you need a service account .json file named google_vision.json in `user directory/.config/` and installing google-cloud-vision (`pip install google-cloud-vision`) ("g" key)
 - Azure Computer Vision: you need to specify an api key and an endpoint in the config file (see below) and to install azure-cognitiveservices-vision-computervision (`pip install azure-cognitiveservices-vision-computervision`) ("v" key)
 
@@ -36,3 +37,5 @@ This uses code from/references these projects:
 - [Manga OCR](https://github.com/kha-white/manga-ocr)
 - [ocrmac](https://github.com/straussmaximilian/ocrmac) for the Apple Vision framework API
 - [NadeOCR](https://github.com/Natsume-197/NadeOCR) for the Google Vision API
+
+Thanks to viola for working on the Google Lens implementation!
\ No newline at end of file
diff --git a/owocr/__init__.py b/owocr/__init__.py
index 00e7eb9..657df77 100644
--- a/owocr/__init__.py
+++ b/owocr/__init__.py
@@ -1,9 +1,3 @@
 __version__ = '0.1.10'
 
-from owocr.ocr import MangaOcr
-from owocr.ocr import GoogleVision
-from owocr.ocr import AppleVision
-from owocr.ocr import WinRTOCR
-from owocr.ocr import AzureComputerVision
-from owocr.ocr import EasyOCR
-from owocr.ocr import PaddleOCR
+from owocr.ocr import *
diff --git a/owocr/ocr.py b/owocr/ocr.py
index 4b54b35..04d67d8 100644
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -56,6 +56,11 @@ try:
 except ImportError:
     pass
 
+try:
+    import chompjs
+except ImportError:
+    pass
+
 
 def post_process(text):
     text = ''.join(text.split())
@@ -138,6 +143,51 @@ class GoogleVision:
         img.save(image_bytes, format=img.format)
         return image_bytes.getvalue()
 
+class GoogleLens:
+    name = "glens"
+    readable_name = "Google Lens"
+    key = "l"
+    available = False
+
+    def __init__(self):
+        if 'chompjs' not in sys.modules:
+            logger.warning('chompjs not available, Google Lens will not work!')
+        elif 'requests' not in sys.modules:
+            logger.warning('requests not available, Google Lens will not work!')
+        else:
+            self.available = True
+            logger.info('Google Lens ready')
+
+    def __call__(self, img_or_path):
+        if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+            img = Image.open(img_or_path)
+        elif isinstance(img_or_path, Image.Image):
+            img = img_or_path
+        else:
+            raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+
+        timestamp = int(time.time() * 1000)
+        url = f"https://lens.google.com/v3/upload?stcs={timestamp}"
+        files = {"encoded_image": ('owo' + str(timestamp) +'.png', self._preprocess(img), 'image/png')}
+        res = requests.post(url, files=files)
+
+        x = ''
+        if res.status_code == 200:
+            regex = re.compile(r">AF_initDataCallback\(({key: 'ds:1'.*?);</script>")
+            match = regex.search(res.text)
+            if match != None:
+                lines = chompjs.parse_js_object(match.group(1))["data"][3][4][0][0]
+                for line in lines:
+                    x += line + ' '
+                x = post_process(x)
+
+        return x
+
+    def _preprocess(self, img):
+        image_bytes = io.BytesIO()
+        img.save(image_bytes, format="png")
+        return image_bytes.getvalue()
+
 class AppleVision:
     name = "avision"
     readable_name = "Apple Vision"
diff --git a/owocr/run.py b/owocr/run.py
index 63e8676..5e37acd 100644
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -156,7 +156,7 @@ def run(read_from='clipboard',
     :param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", or a path to a directory.
     :param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
     :param delay_secs: How often to check for new images, in seconds.
-    :param engine: OCR engine to use. Available: "mangaocr", "gvision", "avision", "azure", "winrtocr", "easyocr", "paddleocr".
+    :param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "azure", "winrtocr", "easyocr", "paddleocr".
     :param pause_at_startup: Pause at startup.
     :param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
     :param delete_images: Delete image files after processing when reading from a directory.
diff --git a/owocr_config.ini b/owocr_config.ini
index c15a8b0..4c1701f 100644
--- a/owocr_config.ini
+++ b/owocr_config.ini
@@ -1,5 +1,5 @@
 [general]
-;engines = avision,gvision,azure,mangaocr,winrtocr,easyocr,paddleocr
+;engines = avision,glens,gvision,azure,mangaocr,winrtocr,easyocr,paddleocr
 ;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
 ;engine_color = cyan
 ;websocket_port = 7331