[PYTHON] Transcription of images with GCP's Vision API

Transcription of images with Vision API

'type': is "TEXT_DETECTION" There are two, "DOCUMENT_TEXT_DETECTION", and the latter is selected.

#coding:utf-8
import base64
import json
from requests import Request, Session
from io import BytesIO
from PIL import Image
import pandas as pd

#Throw an image opened in PIL to the Cloud Vision API
def recognize_image(pil_image):
        def pil_image_to_base64(img_path):
            pil_image = Image.open(img_path)
            buffered = BytesIO()
            pil_image.save(buffered, format="PNG")
            str_encode_file = base64.b64encode(buffered.getvalue()).decode("utf-8")
            return str_encode_file
        
        def get_fullTextAnnotation(json_data):
            text_dict = json.loads(json_data)
            try:
                text = text_dict["responses"][0]["fullTextAnnotation"]["text"]
                return text
            except:
                print(None)
                return None

        str_encode_file = pil_image_to_base64(pil_image)###Erase here at the end
        str_url = "https://vision.googleapis.com/v1/images:annotate?key="
        str_api_key = "API key"
        str_headers = {'Content-Type': 'application/json'}
        str_json_data = {
            'requests': [
                {
                    'image': {
                        'content': str_encode_file
                    },
                    'features': [
                        {
                            'type': "DOCUMENT_TEXT_DETECTION",
                            'maxResults': 10
                        }
                    ]
                }
            ]
        }

        obj_session = Session()
        obj_request = Request("POST",
                              str_url + str_api_key,
                              data=json.dumps(str_json_data),
                              headers=str_headers
                              )
        obj_prepped = obj_session.prepare_request(obj_request)
        obj_response = obj_session.send(obj_prepped,
                                        verify=True,
                                        timeout=60
                                        )

        if obj_response.status_code == 200:
            text = get_fullTextAnnotation(obj_response.text)
            
            return text

recognize_image ("image path")

Recommended Posts

Transcription of images with GCP's Vision API

Flow of extracting text in PDF with Cloud Vision API

Multi-class, multi-label classification of images with pytorch

How to use GCP's Cloud Vision API

Make API of switchbot thermo-hygrometer with Node-RED

Shuffle hundreds of thousands of images evenly with tensorflow.

Problems with output results with Google's Cloud Vision API

Text extraction with GCP Cloud Vision API (Python3.6)

View images on S3 with API Gateway + Lambda

I tried "License OCR" with Google Vision API