Merge pull request teticio#61 from nnyj/onnx

teticio · web-flow · commit e8a1be4b8dad · 2023-07-17T14:07:21.000+01:00
Add tf_to_onnx script
diff --git a/train/requirements.txt b/train/requirements.txt
@@ -4,6 +4,9 @@ gensim
 huggingface-hub
 pandas
 lightning
+onnx
+onnxruntime
+protobuf==3.19.6
 pyyaml
 requests
 spotipy
diff --git a/train/tf_to_onnx.py b/train/tf_to_onnx.py
@@ -0,0 +1,130 @@
+import argparse
+import os
+from collections import OrderedDict
+from typing import Optional
+
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+import numpy as np
+import tensorflow as tf
+import torch
+import onnxruntime as ort
+from audiodiffusion.audio_encoder import AudioEncoder
+from keras.models import load_model
+from torch import Tensor
+
+if __name__ == "__main__":
+    """
+    Entry point for the tf_to_onnx script.
+
+    Converts a TensorFlow MP3ToVec model to a ONNX MP3ToVec model.
+
+    Args:
+        --onnx_model_file (str): Path to the ONNX model file. Default is "models/speccy_model.onnx".
+        --tf_model_file (str): Path to the TensorFlow model file. Default is "models/speccymodel".
+
+    Returns:
+        None
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--onnx_model_file",
+        type=str,
+        default="models/speccy_model.onnx",
+        help="ONNX model path",
+    )
+    parser.add_argument(
+        "--tf_model_file",
+        type=str,
+        default="models/speccy_model",
+        help="TensorFlow model path",
+    )
+    args = parser.parse_args()
+
+    model: Optional[tf.keras.Model] = load_model(
+        args.tf_model_file,
+        custom_objects={"cosine_proximity": tf.compat.v1.keras.losses.cosine_proximity},
+    )
+    if model is None:
+        raise ValueError("Model did not load correctly.")
+
+    pytorch_model = AudioEncoder()
+    new_state_dict = OrderedDict()
+    for conv_block in range(3):
+        new_state_dict[f"conv_blocks.{conv_block}.sep_conv.depthwise.weight"] = Tensor(
+            model.get_layer(
+                f"separable_conv2d_{conv_block + 1}"
+            ).depthwise_kernel.numpy()
+        ).permute(2, 3, 0, 1)
+        new_state_dict[f"conv_blocks.{conv_block}.sep_conv.pointwise.weight"] = Tensor(
+            model.get_layer(
+                f"separable_conv2d_{conv_block + 1}"
+            ).pointwise_kernel.numpy()
+        ).permute(3, 2, 0, 1)
+        new_state_dict[f"conv_blocks.{conv_block}.sep_conv.pointwise.bias"] = Tensor(
+            model.get_layer(f"separable_conv2d_{conv_block + 1}").bias.numpy()
+        )
+        new_state_dict[f"conv_blocks.{conv_block}.batch_norm.weight"] = Tensor(
+            model.get_layer(f"batch_normalization_{conv_block + 1}").gamma.numpy()
+        )
+        new_state_dict[f"conv_blocks.{conv_block}.batch_norm.running_mean"] = Tensor(
+            model.get_layer(f"batch_normalization_{conv_block + 1}").moving_mean.numpy()
+        )
+        new_state_dict[f"conv_blocks.{conv_block}.batch_norm.running_var"] = Tensor(
+            model.get_layer(
+                f"batch_normalization_{conv_block + 1}"
+            ).moving_variance.numpy()
+        )
+        new_state_dict[f"conv_blocks.{conv_block}.batch_norm.bias"] = Tensor(
+            model.get_layer(f"batch_normalization_{conv_block + 1}").beta.numpy()
+        )
+
+    new_state_dict[f"dense_block.batch_norm.weight"] = Tensor(
+        model.get_layer(f"batch_normalization_{conv_block + 2}").gamma.numpy()  # type: ignore
+    )
+    new_state_dict[f"dense_block.batch_norm.running_mean"] = Tensor(
+        model.get_layer(f"batch_normalization_{conv_block + 2}").moving_mean.numpy()  # type: ignore
+    )
+    new_state_dict[f"dense_block.batch_norm.running_var"] = Tensor(
+        model.get_layer(f"batch_normalization_{conv_block + 2}").moving_variance.numpy()  # type: ignore
+    )
+    new_state_dict[f"dense_block.batch_norm.bias"] = Tensor(
+        model.get_layer(f"batch_normalization_{conv_block + 2}").beta.numpy()  # type: ignore
+    )
+
+    new_state_dict[f"dense_block.dense.weight"] = Tensor(
+        model.get_layer(f"dense_1").kernel.numpy()
+    ).permute(1, 0)
+    new_state_dict[f"dense_block.dense.bias"] = Tensor(
+        model.get_layer(f"dense_1").bias.numpy()
+    )
+    new_state_dict[f"embedding.weight"] = Tensor(
+        model.get_layer(f"dense_2").kernel.numpy()
+    ).permute(1, 0)
+    new_state_dict[f"embedding.bias"] = Tensor(model.get_layer(f"dense_2").bias.numpy())
+
+    pytorch_model.eval()
+    pytorch_model.load_state_dict(new_state_dict, strict=False)
+    
+    dummy_input = torch.randn(1, 1, 96, 216)
+    dynamic_axes = {'input': {0: 'batch_size'},    # variable length axes
+                    'output' : {0 : 'batch_size'}}  # Map dynamic axis to its name
+    torch.onnx.export(pytorch_model,
+                      dummy_input,
+                      args.onnx_model_file,
+                      input_names = ['input'],
+                      output_names = ['output'],
+                      dynamic_axes=dynamic_axes)
+
+    # test
+    np.random.seed(42)
+    ort_session = ort.InferenceSession(args.onnx_model_file, providers=["CPUExecutionProvider"])
+    example = np.random.random_sample((1, 96, 216, 1))
+    with torch.no_grad():
+        assert (
+            np.abs(
+                ort_session.run(None, {"input": Tensor(example).permute(0, 3, 1, 2).numpy()})
+                - model(example).numpy()
+            ).max()
+            < 2e-3
+        )