AIMET TensorFlow Quantization SIM API
User Guide Link
To learn more about Quantization Simulation, please see Quantization Sim
Top-level API
The following API can be used to Compute Encodings for Model
The following API can be used to Export the Model to target
Encoding format is described in the Quantization Encoding Specification
Code Examples
Required imports
import numpy as np
import tensorflow as tf
from aimet_tensorflow.keras import quantsim
# Optional import only required for fine-tuning
from aimet_tensorflow.keras.quant_sim.qc_quantize_wrapper import QcQuantizeWrapper
Quantize with Fine tuning
def quantize_model():
model = tf.keras.applications.resnet50.ResNet50(weights=None, classes=10)
sim = quantsim.QuantizationSimModel(model)
# Generate some dummy data
dummy_x = np.random.randn(10, 224, 224, 3)
dummy_y = np.random.randint(0, 10, size=(10,))
dummy_y = tf.keras.utils.to_categorical(dummy_y, num_classes=10)
# Compute encodings
sim.model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001),loss='categorical_crossentropy',metrics=['accuracy'])
sim.compute_encodings(evaluate, forward_pass_callback_args=(dummy_x, dummy_y))
# Do some fine-tuning
# Note:: For GPU workloads and models with non-trainable BatchNorms is not supported,
# So user need to explicitly set the BatchNorms to trainable.
# Below code snippet sets the BatchNorms to trainable
for layer in sim.model.layers:
if isinstance(layer, QcQuantizeWrapper) and isinstance(layer._layer_to_wrap, tf.keras.layers.BatchNormalization):
layer._layer_to_wrap.trainable = True
sim.model.fit(x=dummy_x, y=dummy_y, epochs=10)