The document was a little confusing, so I'll summarize it.
Both OCR output structures are
TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
It has become
import base64
import json
from requests import Request, Session
from io import BytesIO
from PIL import Image
import numpy as np
How to recognize characters from Python using OCR of Google Cloud Vision API
def recognize_image1(input_image):#Finally str_encode_Change to file
    #When changing from path to base64
    def pil_image_to_base64(img_path):
        pil_image = Image.open(img_path)
        buffered = BytesIO()
        pil_image.save(buffered, format="PNG")
        str_encode_file = base64.b64encode(buffered.getvalue()).decode("utf-8")
        return str_encode_file
    #When changing from array to base64
    def array_to_base64(img_array):
        pil_image = Image.fromarray(np.uint8(img_array))
        buffered = BytesIO()
        pil_image.save(buffered, format="PNG")
        str_encode_file = base64.b64encode(buffered.getvalue()).decode("utf-8")
        return str_encode_file 
    
    def get_fullTextAnnotation(json_data):
        text_dict = json.loads(json_data)
        try:
            text = text_dict["responses"][0]["fullTextAnnotation"]["text"]
            return text
        except:
            print(None)
            return None
        
   
    str_encode_file = pil_image_to_base64(input_image) # input_Select this when you want to set image to the image PATH
    #str_encode_file = array_to_base64(input_image)# input_Select this when you want to make the image an array
    str_url = "https://vision.googleapis.com/v1/images:annotate?key="
    str_api_key = ""#Put your API key here
    str_headers = {'Content-Type': 'application/json'}
    str_json_data = {
        'requests': [
            {
                'image': {
                    'content': str_encode_file
                },
                'features': [
                    {
                        'type': "DOCUMENT_TEXT_DETECTION",#Select type here
                        'maxResults': 1
                    }
                ]
            }
        ]
    }
    obj_session = Session()
    obj_request = Request("POST",
                            str_url + str_api_key,
                            data=json.dumps(str_json_data),
                            headers=str_headers
                            )
    obj_prepped = obj_session.prepare_request(obj_request)
    obj_response = obj_session.send(obj_prepped,
                                    verify=True,
                                    timeout=60
                                    )
    if obj_response.status_code == 200:
        text = get_fullTextAnnotation(obj_response.text)
        
        return text
    else:
        return "error"
Release Notes Feature Recognize vertical text using Google Cloud Vision
Recommended Posts