Documentation Index
Fetch the complete documentation index at: https://mintlify.com/microsoft/onnxruntime/llms.txt
Use this file to discover all available pages before exploring further.
Converting TensorFlow Models to ONNX
TensorFlow models can be converted to ONNX format using the tf2onnx library, which provides robust conversion capabilities for both TensorFlow and Keras models.
Prerequisites
pip install tensorflow onnx tf2onnx onnxruntime
Basic Conversion
Converting a Keras Model
import tensorflow as tf
import tf2onnx
import onnx
# Create a simple Keras model
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(10,)),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(5, activation='softmax')
])
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy')
# Convert to ONNX
spec = (tf.TensorSpec((None, 10), tf.float32, name="input"),)
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=14)
# Save the model
onnx.save(onnx_model, "keras_model.onnx")
Converting from SavedModel
import tf2onnx
# Export TensorFlow SavedModel
model = YourTensorFlowModel()
tf.saved_model.save(model, "saved_model_dir")
# Convert SavedModel to ONNX
python -m tf2onnx.convert \
--saved-model saved_model_dir \
--output model.onnx \
--opset 14
Example workflow for converting BERT models from TensorFlow:
import tensorflow as tf
import tf2onnx
from transformers import TFAutoModel, AutoTokenizer, AutoConfig
import numpy as np
# Load pre-trained TensorFlow model
model_name = "bert-base-uncased"
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModel.from_pretrained(model_name, config=config)
# Prepare example inputs
max_length = 128
example_text = "This is a sample input"
example_inputs = tokenizer(
example_text,
return_tensors="tf",
max_length=max_length,
padding="max_length",
truncation=True
)
# Create input specifications with dynamic axes
specs = []
for name, value in example_inputs.items():
dims = [None] * len(value.shape) # None for dynamic dimensions
specs.append(tf.TensorSpec(tuple(dims), value.dtype, name=name))
# Convert to ONNX
onnx_model, _ = tf2onnx.convert.from_keras(
model,
input_signature=tuple(specs),
opset=14,
output_path="bert_tf.onnx"
)
print(f"Model converted successfully to bert_tf.onnx")
Handling Encoder-Decoder Models
For sequence-to-sequence models like T5:
import tensorflow as tf
import tf2onnx
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer
model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)
# Disable cache for ONNX export
if hasattr(model.config, 'use_cache'):
model.config.use_cache = False
max_length = 128
example_inputs = tokenizer(
"translate English to German: Hello world",
return_tensors="tf",
max_length=max_length,
padding="max_length",
truncation=True
)
# Add decoder inputs
example_inputs["decoder_input_ids"] = tokenizer(
"Hallo Welt",
return_tensors="tf",
max_length=max_length,
padding="max_length",
truncation=True
).input_ids
# Create specs with dynamic dimensions
specs = []
for name, value in example_inputs.items():
dims = [None] * len(value.shape)
specs.append(tf.TensorSpec(tuple(dims), value.dtype, name=name))
# Convert
onnx_model, _ = tf2onnx.convert.from_keras(
model,
input_signature=tuple(specs),
opset=14,
output_path="t5_model.onnx"
)
Large Model Conversion
For models larger than 2GB, use the large model format:
import tf2onnx
import zipfile
import os
# Convert with large_model flag
onnx_model, _ = tf2onnx.convert.from_keras(
model,
input_signature=tuple(specs),
opset=14,
large_model=True, # Enables external data storage
output_path="large_model.zip"
)
# Extract the zip file
with zipfile.ZipFile("large_model.zip", "r") as z:
z.extractall("model_output")
# Rename the extracted model
model_path = os.path.join("model_output", "__MODEL_PROTO.onnx")
if os.path.exists("large_model.onnx"):
os.remove("large_model.onnx")
os.rename(model_path, "large_model.onnx")
Command Line Conversion
From SavedModel
python -m tf2onnx.convert \
--saved-model saved_model_dir \
--output model.onnx \
--opset 14 \
--verbose
From Checkpoint
python -m tf2onnx.convert \
--checkpoint checkpoint.ckpt \
--output model.onnx \
--inputs input:0 \
--outputs output:0 \
--opset 14
From Frozen Graph
python -m tf2onnx.convert \
--input frozen_graph.pb \
--output model.onnx \
--inputs input:0 \
--outputs output:0 \
--opset 14
Validating TensorFlow to ONNX Conversion
import tensorflow as tf
import onnxruntime as ort
import numpy as np
# Prepare test input
test_input = np.random.randn(1, 128).astype(np.float32)
# Get TensorFlow output
tf_output = model(test_input, training=False)
# Get ONNX Runtime output
session = ort.InferenceSession("model.onnx")
onnx_inputs = {session.get_inputs()[0].name: test_input}
onnx_output = session.run(None, onnx_inputs)
# Compare outputs
if isinstance(tf_output, dict):
tf_output = tf_output['last_hidden_state']
rtol = 1e-3
atol = 1e-3
is_close = np.allclose(tf_output.numpy(), onnx_output[0], rtol=rtol, atol=atol)
if is_close:
print("✓ Conversion validated successfully")
print(f"Max difference: {np.max(np.abs(tf_output.numpy() - onnx_output[0]))}")
else:
print("✗ Validation failed - outputs differ significantly")
Handling Special Cases
Models with Custom Layers
For models with custom layers, you may need to register custom operators:
import tf2onnx
from tf2onnx import tf_loader
# Define custom op conversion
@tf2onnx.tfonnx.register_tensorflow_op("CustomOp")
class CustomOpConverter:
@classmethod
def version_1(cls, ctx, node, **kwargs):
# Implement custom conversion logic
pass
# Then proceed with conversion
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec)
Fixing Pad Token Issues
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Fix "Using pad_token, but it is not set yet" error
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
model.resize_token_embeddings(len(tokenizer))
When loading TensorFlow models, you may need to manage CPU affinity:
import tensorflow as tf
# Disable GPU for export
tf.config.set_visible_devices([], "GPU")
# Load and convert model
model = TFAutoModel.from_pretrained(model_name)
# ... conversion code ...
Best Practices
- Disable training mode: Set
training=False when running the model
- Disable caching: Set
use_cache=False for models that support it
- Use dynamic shapes: Specify
None for batch and sequence dimensions
- Validate conversion: Always compare TensorFlow and ONNX outputs
- Handle special tokens: Configure tokenizer properly before conversion
- Set opset version: Use opset 14 or higher for better compatibility
- Test edge cases: Validate with various input sizes
Troubleshooting
Common Errors
“Op type not supported”: Update tf2onnx or use a different opset version
pip install --upgrade tf2onnx
Shape inference issues: Provide explicit input shapes in the spec
Memory errors: Use large_model=True for models > 2GB
Next Steps