AIMET TensorFlow Quantization SIM API

Top-level API


The following API can be used to Compute Encodings for Model


The following API can be used to Export the Model to target


Encoding format is described in the Quantization Encoding Specification


Code Examples

Required imports

import numpy as np
import tensorflow as tf

from aimet_tensorflow.keras import quantsim
# Optional import only required for fine-tuning
from aimet_tensorflow.keras.quant_sim.qc_quantize_wrapper import QcQuantizeWrapper

Quantize with Fine tuning

def quantize_model():
    model = tf.keras.applications.resnet50.ResNet50(weights=None, classes=10)
    sim = quantsim.QuantizationSimModel(model)

    # Generate some dummy data
    dummy_x = np.random.randn(10, 224, 224, 3)
    dummy_y = np.random.randint(0, 10, size=(10,))
    dummy_y = tf.keras.utils.to_categorical(dummy_y, num_classes=10)

    # Compute encodings
    sim.model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001),loss='categorical_crossentropy',metrics=['accuracy'])
    sim.compute_encodings(evaluate, forward_pass_callback_args=(dummy_x, dummy_y))

    # Do some fine-tuning
    # Note:: For GPU workloads and models with non-trainable BatchNorms is not supported,
    # So user need to explicitly set the BatchNorms to trainable.
    # Below code snippet sets the BatchNorms to trainable
    for layer in sim.model.layers:
        if isinstance(layer, QcQuantizeWrapper) and isinstance(layer._layer_to_wrap, tf.keras.layers.BatchNormalization):
            layer._layer_to_wrap.trainable = True

    sim.model.fit(x=dummy_x, y=dummy_y, epochs=10)