Source code for aimet_onnx.lite_mp

# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: BSD-3-Clause

import math
from typing import Dict
from aimet_onnx import qtype, int16, float16, QuantizationSimModel


[docs] def flip_layers_to_higher_precision( sim: QuantizationSimModel, layer_sensitivity_dict: Dict[str, float], percent_to_flip: int = 10, override_precision: qtype = float16, ): """ Given a sim object and a layer-sensitivity dictionary, flip a given percentage of the layers to higher precision. :param sim: QuantizationSimModel instance initialized with the base precision :param layer_sensitivity_dict: Dict of (layer_name: sqnr_metric) that is output from analyze_per_layer_sensitivity :param percent_to_flip: Percentage of layers to flip :param override_precision: Precision to sets layers to. At present, either int16 (w16a16) or float16 are supported. """ # Sanity check if override_precision not in (int16, float16): raise ValueError("higher_precision must be int16 or float16") sqnr_list = sorted(layer_sensitivity_dict.items(), key=lambda item: item[1]) sqnr_list = sqnr_list[: math.ceil(len(sqnr_list) * percent_to_flip / 100)] cg_ops = sim.connected_graph.get_all_ops() for layer_name, _ in sqnr_list: op = cg_ops[layer_name] ( input_quantizers, output_quantizers, param_quantizers, ) = sim.get_op_quantizers(op) for q in input_quantizers + output_quantizers: if override_precision == int16: q.set_bitwidth(16) else: q.enabled = False for _, q in param_quantizers.items(): if override_precision == int16: q.set_bitwidth(16) else: q.enabled = False