end0tknr's kipple - web写経開発

太宰府天満宮の狛犬って、妙にカワイイ

Google Cloud Vision API で、localにあるtiffやpdfの OCR

Google Cloud Vision API による OCR - end0tknr's kipple - web写経開発

先日の上記entryの関連です。

PDF や TIFF のOCRは、Cloud Storage に保存されているファイルのみ対象

Google Cloud Vision APIのドキュメントである以下によれば、以下の通りです。

現在のところ、PDF や TIFF ドキュメントの検出は Cloud Storage バケットに保存されているファイルに対してのみ実行できます。

ファイル内のテキストを検出する（PDF / TIFF） | Cloud Vision API | Google Cloud

対策 - PDF や TIFF は、png画像に事前変換しましょう

以下のpython scriptの通りです

#!python
# -*- coding: utf-8 -*-

from google.cloud import vision
from google.oauth2 import service_account
import io
import os
import PIL
import pdf2image
import sys

API_KEY_JSON = 'brave-airship-387700-ないしょ.json'
POPPER_DIR   = "c:/Users/end0tknr/local/poppler/Library/bin"

def main():
    org_file_path = sys.argv[1]
    tmp_val, ext = os.path.splitext( org_file_path )
    ext = ext.lower()

    txts = []
    if ext in [".pdf"]:
        txts = text_detection_pdf( org_file_path )
    elif ext in [".tiff"]:
        txts = text_detection_tiff( org_file_path )
    else:
        txts = text_detection_normal_img( org_file_path )
    print(txts)


def text_detection_pdf( org_file_path ):
    # google vision api client作成 & 認証
    credentials = service_account.Credentials.from_service_account_file( API_KEY_JSON )
    vision_api = vision.ImageAnnotatorClient(credentials=credentials)
    
    org_imgs = pdf2image.convert_from_path( org_file_path, poppler_path=POPPER_DIR )
    txts = []
    for org_img in org_imgs:
        # 一旦、pdf->png変換
        img_bytes = io.BytesIO()
        org_img.save(img_bytes, format="PNG")

        # vision apiによるocr処理
        image = vision.Image(content=img_bytes.getvalue())
        response = vision_api.text_detection(image=image)

        if response.error.message:
            logger.error(response)
            return []

        for text in response.text_annotations:
            txts.append( text.description )
    return txts

        
def text_detection_tiff( org_file_path ):
    # google vision api client作成 & 認証
    credentials = service_account.Credentials.from_service_account_file( API_KEY_JSON )
    vision_api = vision.ImageAnnotatorClient(credentials=credentials)
    
    # 一旦、tiff->png変換
    org_img = PIL.Image.open(org_file_path)
    img_bytes = io.BytesIO()
    org_img.save(img_bytes, format="PNG")

    # vision apiによるocr処理
    image = vision.Image(content=img_bytes.getvalue())
    response = vision_api.text_detection(image=image)

    if response.error.message:
        logger.error(response)
        return []
    
    txts = []
    for text in response.text_annotations:
        txts.append( text.description )
    return txts

def text_detection_normal_img( org_file_path ):
    # google vision api client作成 & 認証
    credentials = service_account.Credentials.from_service_account_file( API_KEY_JSON )
    vision_api = vision.ImageAnnotatorClient(credentials=credentials)
    
    with open(org_file_path,'rb') as f:
        img_content_bytes = f.read()

    # vision apiによるocr処理
    image = vision.Image(content=img_content_bytes)
    response = vision_api.text_detection(image=image)

    if response.error.message:
        logger.error(response)
        return []
    
    txts = []
    for text in response.text_annotations:
        txts.append( text.description )
    return txts


if __name__ == '__main__':
    main()