Implement Google Lens (thanks Viola!)
This commit is contained in:
@@ -16,6 +16,7 @@ This has been tested with Python 3.11. Newer/older versions might work. For now
|
|||||||
- WinRT OCR: this will work on Windows 10 or later if winocr (`pip install winocr`) is installed. It can also be used by installing winocr on a Windows virtual machine and running the server (`winocr_serve`), installing requests (`pip install requests`) and specifying the IP address of the Windows VM/machine in the config file (see below) ("w" key)
|
- WinRT OCR: this will work on Windows 10 or later if winocr (`pip install winocr`) is installed. It can also be used by installing winocr on a Windows virtual machine and running the server (`winocr_serve`), installing requests (`pip install requests`) and specifying the IP address of the Windows VM/machine in the config file (see below) ("w" key)
|
||||||
|
|
||||||
## Cloud providers
|
## Cloud providers
|
||||||
|
- Google Lens: Google Vision in disguise (no need for API keys!), however it needs to download a couple megabytes of data for each request. You need to install chompjs and requests (`pip install chompjs requests`) ("l" key)
|
||||||
- Google Vision: you need a service account .json file named google_vision.json in `user directory/.config/` and installing google-cloud-vision (`pip install google-cloud-vision`) ("g" key)
|
- Google Vision: you need a service account .json file named google_vision.json in `user directory/.config/` and installing google-cloud-vision (`pip install google-cloud-vision`) ("g" key)
|
||||||
- Azure Computer Vision: you need to specify an api key and an endpoint in the config file (see below) and to install azure-cognitiveservices-vision-computervision (`pip install azure-cognitiveservices-vision-computervision`) ("v" key)
|
- Azure Computer Vision: you need to specify an api key and an endpoint in the config file (see below) and to install azure-cognitiveservices-vision-computervision (`pip install azure-cognitiveservices-vision-computervision`) ("v" key)
|
||||||
|
|
||||||
@@ -36,3 +37,5 @@ This uses code from/references these projects:
|
|||||||
- [Manga OCR](https://github.com/kha-white/manga-ocr)
|
- [Manga OCR](https://github.com/kha-white/manga-ocr)
|
||||||
- [ocrmac](https://github.com/straussmaximilian/ocrmac) for the Apple Vision framework API
|
- [ocrmac](https://github.com/straussmaximilian/ocrmac) for the Apple Vision framework API
|
||||||
- [NadeOCR](https://github.com/Natsume-197/NadeOCR) for the Google Vision API
|
- [NadeOCR](https://github.com/Natsume-197/NadeOCR) for the Google Vision API
|
||||||
|
|
||||||
|
Thanks to viola for working on the Google Lens implementation!
|
||||||
@@ -1,9 +1,3 @@
|
|||||||
__version__ = '0.1.10'
|
__version__ = '0.1.10'
|
||||||
|
|
||||||
from owocr.ocr import MangaOcr
|
from owocr.ocr import *
|
||||||
from owocr.ocr import GoogleVision
|
|
||||||
from owocr.ocr import AppleVision
|
|
||||||
from owocr.ocr import WinRTOCR
|
|
||||||
from owocr.ocr import AzureComputerVision
|
|
||||||
from owocr.ocr import EasyOCR
|
|
||||||
from owocr.ocr import PaddleOCR
|
|
||||||
|
|||||||
50
owocr/ocr.py
50
owocr/ocr.py
@@ -56,6 +56,11 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
import chompjs
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def post_process(text):
|
def post_process(text):
|
||||||
text = ''.join(text.split())
|
text = ''.join(text.split())
|
||||||
@@ -138,6 +143,51 @@ class GoogleVision:
|
|||||||
img.save(image_bytes, format=img.format)
|
img.save(image_bytes, format=img.format)
|
||||||
return image_bytes.getvalue()
|
return image_bytes.getvalue()
|
||||||
|
|
||||||
|
class GoogleLens:
|
||||||
|
name = "glens"
|
||||||
|
readable_name = "Google Lens"
|
||||||
|
key = "l"
|
||||||
|
available = False
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if 'chompjs' not in sys.modules:
|
||||||
|
logger.warning('chompjs not available, Google Lens will not work!')
|
||||||
|
elif 'requests' not in sys.modules:
|
||||||
|
logger.warning('requests not available, Google Lens will not work!')
|
||||||
|
else:
|
||||||
|
self.available = True
|
||||||
|
logger.info('Google Lens ready')
|
||||||
|
|
||||||
|
def __call__(self, img_or_path):
|
||||||
|
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
|
||||||
|
img = Image.open(img_or_path)
|
||||||
|
elif isinstance(img_or_path, Image.Image):
|
||||||
|
img = img_or_path
|
||||||
|
else:
|
||||||
|
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
||||||
|
|
||||||
|
timestamp = int(time.time() * 1000)
|
||||||
|
url = f"https://lens.google.com/v3/upload?stcs={timestamp}"
|
||||||
|
files = {"encoded_image": ('owo' + str(timestamp) +'.png', self._preprocess(img), 'image/png')}
|
||||||
|
res = requests.post(url, files=files)
|
||||||
|
|
||||||
|
x = ''
|
||||||
|
if res.status_code == 200:
|
||||||
|
regex = re.compile(r">AF_initDataCallback\(({key: 'ds:1'.*?);</script>")
|
||||||
|
match = regex.search(res.text)
|
||||||
|
if match != None:
|
||||||
|
lines = chompjs.parse_js_object(match.group(1))["data"][3][4][0][0]
|
||||||
|
for line in lines:
|
||||||
|
x += line + ' '
|
||||||
|
x = post_process(x)
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
def _preprocess(self, img):
|
||||||
|
image_bytes = io.BytesIO()
|
||||||
|
img.save(image_bytes, format="png")
|
||||||
|
return image_bytes.getvalue()
|
||||||
|
|
||||||
class AppleVision:
|
class AppleVision:
|
||||||
name = "avision"
|
name = "avision"
|
||||||
readable_name = "Apple Vision"
|
readable_name = "Apple Vision"
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ def run(read_from='clipboard',
|
|||||||
:param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", or a path to a directory.
|
:param read_from: Specifies where to read input images from. Can be either "clipboard", "websocket", or a path to a directory.
|
||||||
:param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
|
:param write_to: Specifies where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.
|
||||||
:param delay_secs: How often to check for new images, in seconds.
|
:param delay_secs: How often to check for new images, in seconds.
|
||||||
:param engine: OCR engine to use. Available: "mangaocr", "gvision", "avision", "azure", "winrtocr", "easyocr", "paddleocr".
|
:param engine: OCR engine to use. Available: "mangaocr", "glens", "gvision", "avision", "azure", "winrtocr", "easyocr", "paddleocr".
|
||||||
:param pause_at_startup: Pause at startup.
|
:param pause_at_startup: Pause at startup.
|
||||||
:param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
|
:param ignore_flag: Process flagged clipboard images (images that are copied to the clipboard with the *ocr_ignore* string).
|
||||||
:param delete_images: Delete image files after processing when reading from a directory.
|
:param delete_images: Delete image files after processing when reading from a directory.
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[general]
|
[general]
|
||||||
;engines = avision,gvision,azure,mangaocr,winrtocr,easyocr,paddleocr
|
;engines = avision,glens,gvision,azure,mangaocr,winrtocr,easyocr,paddleocr
|
||||||
;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
|
;logger_format = <green>{time:HH:mm:ss.SSS}</green> | <level>{message}</level>
|
||||||
;engine_color = cyan
|
;engine_color = cyan
|
||||||
;websocket_port = 7331
|
;websocket_port = 7331
|
||||||
|
|||||||
Reference in New Issue
Block a user