From aa0cba6ee5c18be04149afc1130044e27becd4b6 Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Sat, 1 Feb 2025 08:31:42 +0100 Subject: [PATCH] Take Lens url headers/parameters from https://github.com/dimdenGD/chrome-lens-ocr --- README.md | 1 + owocr/ocr.py | 53 ++++++++++++++++++++++++++++++++++++++++++++-------- setup.py | 2 +- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index c32a505..2fffd74 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ Additionally: This uses code from/references these projects: - Viola for working on the Google Lens implementation and helping with the pyobjc VisionKit code! +- [google-lens-ocr](https://github.com/dimdenGD/chrome-lens-ocr) for additional Lens reverse engineering and the headers/URL parameters I currently use - @ronaldoussoren for helping with the pyobjc VisionKit code - [Manga OCR](https://github.com/kha-white/manga-ocr) - [ocrmac](https://github.com/straussmaximilian/ocrmac) for the Apple Vision framework API diff --git a/owocr/ocr.py b/owocr/ocr.py index cc2f068..09b4c46 100644 --- a/owocr/ocr.py +++ b/owocr/ocr.py @@ -64,8 +64,6 @@ except ImportError: try: import pyjson5 - import random - import string except ImportError: pass @@ -203,14 +201,53 @@ class GoogleLens: else: raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}') - timestamp = int(time.time() * 1000) - random_filename = ''.join(random.choices(string.ascii_letters, k=8)) - url = f'https://lens.google.com/v3/upload?st={timestamp}' - headers = {'User-Agent': 'Mozilla/5.0 (SMART-TV; Linux; Tizen 6.0) AppleWebKit/538.1 (KHTML, like Gecko) Version/6.0 TV Safari/538.1 STvPlus/9e6462f14a056031e5b32ece2af7c3ca,gzip(gfe),gzip(gfe)'} + fake_chromium_config = { + 'viewport': (1920, 1080), + 'major_version': '109', + 'version': '109.0.5414.87', + 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5414.87 Safari/537.36' + } + + url = 'https://lens.google.com/v3/upload' + files = {'encoded_image': ('image.png', self._preprocess(img), 'image/png')} + params = { + 'ep': 'ccm', #EntryPoint + 're': 'dcsp', #RenderingEnvironment - DesktopChromeSurfaceProto + 's': '4', #SurfaceProtoValue - Surface.CHROMIUM + 'st': str(int(time.time() * 1000)), + 'sideimagesearch': '1', + 'vpw': str(fake_chromium_config['viewport'][0]), + 'vph': str(fake_chromium_config['viewport'][1]) + } + headers = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Accept-Encoding': 'gzip, deflate, br', + 'Accept-Language': 'en-US,en;q=0.9', + 'Cache-Control': 'max-age=0', + 'Origin': 'https://lens.google.com', + 'Referer': 'https://lens.google.com/', + 'Sec-Ch-Ua': f'"Not A(Brand";v="99", "Google Chrome";v="{fake_chromium_config["major_version"]}", "Chromium";v="{fake_chromium_config["major_version"]}"', + 'Sec-Ch-Ua-Arch': '"x86"', + 'Sec-Ch-Ua-Bitness': '"64"', + 'Sec-Ch-Ua-Full-Version': f'"{fake_chromium_config["version"]}"', + 'Sec-Ch-Ua-Full-Version-List': f'"Not A(Brand";v="99.0.0.0", "Google Chrome";v="{fake_chromium_config["major_version"]}", "Chromium";v="{fake_chromium_config["major_version"]}"', + 'Sec-Ch-Ua-Mobile': '?0', + 'Sec-Ch-Ua-Model': '""', + 'Sec-Ch-Ua-Platform': '"Windows"', + 'Sec-Ch-Ua-Platform-Version': '"15.0.0"', + 'Sec-Ch-Ua-Wow64': '?0', + 'Sec-Fetch-Dest': 'document', + 'Sec-Fetch-Mode': 'navigate', + 'Sec-Fetch-Site': 'same-origin', + 'Sec-Fetch-User': '?1', + 'Upgrade-Insecure-Requests': '1', + 'User-Agent': fake_chromium_config['user_agent'], + 'X-Client-Data': 'CIW2yQEIorbJAQipncoBCIH+ygEIkqHLAQiKo8sBCPWYzQEIhaDNAQji0M4BCLPTzgEI19TOAQjy1c4BCJLYzgEIwNjOAQjM2M4BGM7VzgE=' + } cookies = {'SOCS': 'CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg'} - files = {'encoded_image': (random_filename + '.png', self._preprocess(img), 'image/png')} + try: - res = requests.post(url, files=files, headers=headers, cookies=cookies, timeout=20) + res = requests.post(url, files=files, params=params, headers=headers, cookies=cookies, timeout=20) except requests.exceptions.Timeout: return (False, 'Request timeout!') except requests.exceptions.ConnectionError: diff --git a/setup.py b/setup.py index d4859c1..112cad0 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ long_description = (Path(__file__).parent / "README.md").read_text('utf-8') setup( name="owocr", - version='1.9.0', + version='1.9.1', description="Japanese OCR", long_description=long_description, long_description_content_type="text/markdown",