end0tknr's kipple - web写経開発

太宰府天満宮の狛犬って、妙にカワイイ

MobileNet , tf2onnx for python 等による類似画像検索

先程のentryで構築した環境を用い、参考urlにある類似画像検索を写経。

目次

参考url

TODO - 224*224よりも大きな画像の類似検索

python script等は以下に記載している通りで、それっぽく動作しました。

類似検索の対象にした 2D CAD図面は、 特徴量算出モデル MobileNet v3の入力に合わせ、 224*224サイズに縮小する必要があります。

この縮小により、CAD図面の線画の情報が失われている気がします。

224*224よりも大きなサイズの類似検索はどうしたものか

1. mobilenet_v3 の取得とonnx形式への変換

mobilenet_v3 の TensorFlow Hub形式での取得

https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5 にある Download → Download as tar.gz ボタンで mobilenet-v3-tensorflow2-large-100-224-feature-vector-v1.tar.gz をダウンロードし、 これを解凍します。

PS C:\Users\end0t\tmp\similar_img>tree /F
C:.
└─mobilenet-v3-tensorflow2-large-100-224-feature-vector-v1
    │  saved_model.pb
    └─variables
            variables.data-00000-of-00001
            variables.index

onnx形式への変換

miniconda cuda> python -m tf2onnx.convert \
   --saved-model mobilenet-v3-tensorflow2-large-100-224-feature-vector-v1 \
   --output mobilenet_v3_large_100_224_feature_vector_v5.onnx

C:\Users\end0t\miniconda3\envs\cuda\lib\runpy.py:126: RuntimeWarning: 'tf2onnx.convert' found in sys.modules after import of package 'tf2onnx', but prior to execution of 'tf2onnx.convert'; this may result in unpredictable behaviour
  warn(RuntimeWarning(msg))
2024-08-14 16:39:01,033 - WARNING - '--tag' not specified for saved_model. Using --tag serve
2024-08-14 16:39:01,843 - INFO - Fingerprint not found. Saved model loading will continue.
2024-08-14 16:39:01,843 - INFO - Signatures found in model: [serving_default].
2024-08-14 16:39:01,843 - WARNING - '--signature_def' not specified, using first signature: serving_default
2024-08-14 16:39:01,854 - INFO - Output names: ['feature_vector']
2024-08-14 16:39:03,539 - INFO - Using tensorflow=2.13.1, onnx=1.16.2, tf2onnx=1.16.1/15c810
2024-08-14 16:39:03,539 - INFO - Using opset <onnx, 15>
2024-08-14 16:39:03,665 - INFO - Computed 0 values for constant folding
2024-08-14 16:39:03,934 - INFO - Optimizing ONNX model
2024-08-14 16:39:05,841 - INFO - After optimization: BatchNormalization -46 (46->0), Const -262 (406->144), GlobalAveragePool +8 (0->8), Identity -2 (2->0), ReduceMean -8 (8->0), Reshape +3 (15->18), Transpose -236 (237->1)
2024-08-14 16:39:06,010 - INFO -
2024-08-14 16:39:06,010 - INFO - Successfully converted TensorFlow model mobilenet-v3-tensorflow2-large-100-224-feature-vector-v1 to ONNX
2024-08-14 16:39:06,010 - INFO - Model inputs: ['inputs']
2024-08-14 16:39:06,010 - INFO - Model outputs: ['feature_vector']
2024-08-14 16:39:06,010 - INFO - ONNX model is saved at mobilenet-v3-tensorflow2-large-100-224-feature-vector-v1.onnx

推論することで、変換できていることを確認

import numpy as np
import onnxruntime

print( onnxruntime.get_device() )
print( onnxruntime.get_available_providers() )

provider = ['CUDAExecutionProvider','CPUExecutionProvider']

session = onnxruntime.InferenceSession(
    "./mobilenet-v3-tensorflow2-large-100-224-feature-vector-v1.onnx",
    providers=provider)

print( session.get_providers() )

tmp_result = session.run(
    ["feature_vector"],
    {"inputs": np.zeros((1, 224, 224, 3), dtype=np.float32)} )
print( tmp_result )

tmp_result = session.run(
    ["feature_vector"],
    {"inputs": np.ones((1, 224, 224, 3), dtype=np.float32)} )

print( tmp_result )

↑こう書いて、↓こう表示されればOK

(cuda) C:\Users\end0t\tmp\similar_img>python foo1.py
GPU
['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
['CUDAExecutionProvider', 'CPUExecutionProvider']
[array([[-0.19626826, -0.3297085 ,  0.01850062, ...,  1.1618388 ,
        -0.3663718 , -0.33905375]], dtype=float32)]
[array([[ 0.46943867,  0.20897101,  0.30629852, ..., -0.36712584,
        -0.31481627, -0.33279896]], dtype=float32)]

2. 各画像ファイルの特徴量算出

import glob
import os
import numpy as np
import onnxruntime
import PIL.Image

img_base_dir       = os.path.abspath( "./png" )
feature_base_dir   = os.path.abspath( "./feature" )
merged_feature_dir = os.path.abspath( "./merged_feature" )
merge_limit = 200
onnx_model_path = \
    "./mobilenet-v3-tensorflow2-large-100-224-feature-vector-v1.onnx"
#provider = ['CUDAExecutionProvider','CPUExecutionProvider']
# CPUでも十分、短時間で処理できます
provider = ['CPUExecutionProvider']

def main():
    # onnx modelのロード
    onnx_session = onnxruntime.InferenceSession( onnx_model_path,
                                                 providers=provider )

    # 各画像の特徴量算出
    for img_path in glob.glob(img_base_dir + "/**/*.png", recursive=True):
        img_dir_basename = os.path.split( img_path )
        feature_path = img_path.replace(img_base_dir,feature_base_dir)
        feature = calc_feature(onnx_session, img_path)

        # print( feature_path )
        np.save(feature_path, feature)
        
    # 各特徴量fileを集約
    features  = []
    img_paths = []
    i = 0
    for feature_path in glob.glob(feature_base_dir+"/**/*.npy",recursive=True):
        if len(features) < merge_limit:
            feature = np.load( feature_path )
            features.append(feature)
            img_path = feature_path.replace(feature_base_dir, img_base_dir)
            img_paths.append(img_path)
            continue

        features_path = os.path.join(merged_feature_dir,
                                     "features_{:03d}.npy".format(i) )
        np.save(features_path, features)
        features = []
        img_paths_path = os.path.join(merged_feature_dir,
                                      "img_paths_{:03d}.npy".format(i) )
        np.save(img_paths_path, img_paths)
        img_paths = []
        i += 1
        
def calc_feature(onnx_session, img_path):

    image = PIL.Image.open( img_path )
    #image = image.convert("RGB")
    #image = image.convert("L")
    image = image.resize((224, 224)) # model入力が 224*224の為
    image = np.array(image, dtype=np.float32)
    image = image / 255

    # model入力に合わせ、1チャンネルのモノクロ画像を3チャンネルに拡張
    image = np.stack([image] * 3, axis=-1)

    feature = onnx_session.run(["feature_vector"],
                               {"inputs": np.expand_dims(image, 0)})[0][0]
    return feature

if __name__ == "__main__":
    main()

3. 類似画像検索

import os
import sys
import glob
import numpy as np
import onnxruntime
import PIL.Image

merged_feature_dir = os.path.abspath( "./merged_feature" )
merge_limit = 200
onnx_model_path = \
    "./mobilenet-v3-tensorflow2-large-100-224-feature-vector-v1.onnx"
#provider = ['CUDAExecutionProvider','CPUExecutionProvider']
# CPUでも十分、短時間で処理できます
provider = ['CPUExecutionProvider']

def main():
    # onnx modelのロード
    onnx_session = onnxruntime.InferenceSession( onnx_model_path,
                                                 providers=provider )
    
    img_path = "./png/CVAD45-06-000.60.png"
    query_feature = calc_feature(onnx_session, img_path)

    features  = load_merged_features()
    img_paths = load_img_paths()

    # 配列ndarrayを任意のtileに並べる
    query_features = np.tile(query_feature, (len(features), 1))
    #print( query_features )
    
    # 距離算出
    distances = np.linalg.norm(query_features - features, axis=1)
    # print( distances )

    # 類似画像検索の結果出力
    find_limit = 10
    distance_idxs = np.argsort(distances)[:find_limit]
    print( distance_idxs )
    for idx in distance_idxs:
        print( img_paths[idx], distances[idx] )

    
def load_merged_features():
    ret_datas = []
    for feature_path in glob.glob(merged_feature_dir+"/**/features_*.npy",
                                  recursive=True ):
        ret_datas += list( np.load( feature_path ) )
    return ret_datas

def load_img_paths():
    ret_datas = []
    for imgs_list in glob.glob(merged_feature_dir+"/**/img_paths_*.npy",
                                   recursive=True ):
        ret_datas += list( np.load( imgs_list ) )
    return ret_datas
    
def calc_feature(onnx_session, img_path):

    image = PIL.Image.open( img_path )
    #image = image.convert("L")
    image = image.resize((224, 224)) # model入力が 224*224の為
    image = np.array(image, dtype=np.float32)
    image = image / 255

    # model入力に合わせ、1チャンネルのモノクロ画像を3チャンネルに拡張
    image = np.stack([image] * 3, axis=-1)

    feature = onnx_session.run(["feature_vector"],
                               {"inputs": np.expand_dims(image, 0)})[0][0]
    return feature

if __name__ == "__main__":
    main()