[PYTHON] How to use GCP's Cloud Vision API

The document was a little confusing, so I'll summarize it.

API parameters

Type
maxResults
model

There are two types of GCP Cloud Vision API TYPE.

Text detection "TEXT_DETECTION" (optimized for sparse areas of text in large images)
Document text detection "DOCUMENT_TEXT_DETECTION" (suitable for high density text)

Both OCR output structures are

TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol

It has become

Import what you need

import base64
import json
from requests import Request, Session
from io import BytesIO
from PIL import Image
import numpy as np

API key acquisition

How to recognize characters from Python using OCR of Google Cloud Vision API

How to use the API

def recognize_image1(input_image):#Finally str_encode_Change to file

    #When changing from path to base64
    def pil_image_to_base64(img_path):
        pil_image = Image.open(img_path)
        buffered = BytesIO()
        pil_image.save(buffered, format="PNG")
        str_encode_file = base64.b64encode(buffered.getvalue()).decode("utf-8")
        return str_encode_file

    #When changing from array to base64
    def array_to_base64(img_array):
        pil_image = Image.fromarray(np.uint8(img_array))
        buffered = BytesIO()
        pil_image.save(buffered, format="PNG")
        str_encode_file = base64.b64encode(buffered.getvalue()).decode("utf-8")
        return str_encode_file 
    
    def get_fullTextAnnotation(json_data):
        text_dict = json.loads(json_data)
        try:
            text = text_dict["responses"][0]["fullTextAnnotation"]["text"]
            return text
        except:
            print(None)
            return None
        
   


    str_encode_file = pil_image_to_base64(input_image) # input_Select this when you want to set image to the image PATH
    #str_encode_file = array_to_base64(input_image)# input_Select this when you want to make the image an array
    str_url = "https://vision.googleapis.com/v1/images:annotate?key="
    str_api_key = ""#Put your API key here
    str_headers = {'Content-Type': 'application/json'}
    str_json_data = {
        'requests': [
            {
                'image': {
                    'content': str_encode_file
                },
                'features': [
                    {
                        'type': "DOCUMENT_TEXT_DETECTION",#Select type here
                        'maxResults': 1
                    }
                ]
            }
        ]
    }

    obj_session = Session()
    obj_request = Request("POST",
                            str_url + str_api_key,
                            data=json.dumps(str_json_data),
                            headers=str_headers
                            )
    obj_prepped = obj_session.prepare_request(obj_request)
    obj_response = obj_session.send(obj_prepped,
                                    verify=True,
                                    timeout=60
                                    )

    if obj_response.status_code == 200:
        text = get_fullTextAnnotation(obj_response.text)
        
        return text
    else:
        return "error"

reference

Release Notes Feature Recognize vertical text using Google Cloud Vision