Source code for aimet_torch.amp.quantizer_groups

# -*- mode: python -*-
# =============================================================================
#  @@-COPYRIGHT-START-@@
#
#  Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are met:
#
#  1. Redistributions of source code must retain the above copyright notice,
#     this list of conditions and the following disclaimer.
#
#  2. Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions and the following disclaimer in the documentation
#     and/or other materials provided with the distribution.
#
#  3. Neither the name of the copyright holder nor the names of its contributors
#     may be used to endorse or promote products derived from this software
#     without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#  POSSIBILITY OF SUCH DAMAGE.
#
#  SPDX-License-Identifier: BSD-3-Clause
#
#  @@-COPYRIGHT-END-@@
# =============================================================================

""" Find quantizer groups in a model """
import itertools
from typing import Dict, List, Tuple, Optional
from collections import defaultdict
from dataclasses import dataclass, field
import torch

from aimet_common.connected_graph.connectedgraph_utils import CG_SPLIT
from aimet_common.connected_graph.operation import Op
from aimet_common.utils import AimetLogger
from aimet_common.amp.utils import CANDIDATE_WITH_DTYPE
from aimet_common.amp.quantizer_groups import QuantizerGroupBase, get_supported_candidates_for_quantizers, \
    compute_baseline_candidate_options

from aimet_torch.meta.connectedgraph import ConnectedGraph
from aimet_torch.v1.qc_quantize_op import QcQuantizeWrapper
from aimet_torch.v1.quantsim import QuantizationSimModel
from aimet_torch import onnx_utils
from aimet_torch.translation_mapping import aimet_op_to_backend_op_name_map

logger = AimetLogger.get_area_logger(AimetLogger.LogAreas.MixedPrecision)


[docs]@dataclass(frozen=True) class QuantizerGroup(QuantizerGroupBase): """ Group of modules and quantizers """ input_quantizers: Tuple[str, ...] = field(default_factory=tuple) output_quantizers: Tuple[str, ...] = field(default_factory=tuple) parameter_quantizers: Tuple[str, ...] = field(default_factory=tuple) supported_kernel_ops: Tuple[str, ...] = field(default_factory=tuple)
[docs] def get_candidate(self, name_to_quantizer_dict: Dict) -> CANDIDATE_WITH_DTYPE: """ Gets Activation & parameter bitwidth :param name_to_quantizer_dict: Gets module from module name :return: Tuple of Activation, parameter bitwidth and data type """ activation_bw, parameter_bw = None, None activation_data_type, parameter_data_type = None, None for quantizer in self._get_input_quantizers(name_to_quantizer_dict) +\ self._get_output_quantizers(name_to_quantizer_dict): activation_bw = quantizer.bitwidth activation_data_type = quantizer.data_type break for quantizer in self._get_param_quantizers(name_to_quantizer_dict): if quantizer.enabled: parameter_bw = quantizer.bitwidth parameter_data_type = quantizer.data_type break return (activation_bw, activation_data_type), (parameter_bw, parameter_data_type)
[docs] def set_quantizers_to_candidate(self, name_to_quantizer_dict: Dict, candidate: CANDIDATE_WITH_DTYPE) -> None: """ Sets a quantizer group to a given candidate bitwidth :param name_to_quantizer_dict: Gets module from module name :param candidate: candidate with act and param bw and data types """ if len(candidate) == 1: (activation_bw, activation_data_type), = candidate param_bw, param_data_type = None, None else: (activation_bw, activation_data_type), (param_bw, param_data_type) = candidate for quantizer in self._get_input_quantizers(name_to_quantizer_dict) +\ self._get_output_quantizers(name_to_quantizer_dict): quantizer.bitwidth = activation_bw quantizer.data_type = activation_data_type if param_bw is not None: for quantizer in self._get_param_quantizers(name_to_quantizer_dict): quantizer.bitwidth = param_bw quantizer.data_type = param_data_type
[docs] def to_list(self) -> List[Tuple[str, str]]: """ Converts quantizer group to a list :return: List containing input/output quantizers & weight quantizers """ return list(itertools.chain( (("input", module_name) for module_name in self.input_quantizers), (("output", module_name) for module_name in self.output_quantizers), (("weight", module_name) for module_name in self.parameter_quantizers), ))
[docs] def get_active_quantizers(self, name_to_quantizer_dict): """ Find all active tensor quantizers associated with this quantizer group """ quantizers = self._get_input_quantizers(name_to_quantizer_dict) +\ self._get_output_quantizers(name_to_quantizer_dict) +\ self._get_param_quantizers(name_to_quantizer_dict) return [quantizer for quantizer in quantizers if quantizer.enabled]
def _get_input_quantizers(self, name_to_quantizer_dict): result = [] for quantizer_name in self.input_quantizers: out = quantizer_name.split("_input_quantizer_idx_") assert len(out) == 2 module_name, quantizer_idx = out[0], int(out[1]) module = name_to_quantizer_dict[module_name] result.append(module.input_quantizers[quantizer_idx]) return result def _get_output_quantizers(self, name_to_quantizer_dict): result = [] for module_name in self.output_quantizers: module = name_to_quantizer_dict[module_name] result += module.output_quantizers return result def _get_param_quantizers(self, name_to_quantizer_dict): result = [] for module_name in self.parameter_quantizers: module = name_to_quantizer_dict[module_name] for _, param_quantizer in module.param_quantizers.items(): result.append(param_quantizer) return result
[docs] def get_input_quantizer_modules(self): """helper method to get the module names corresponding to input_quantizers""" result = set() for quantizer_name in self.input_quantizers: out = quantizer_name.split("_input_quantizer_idx_") assert len(out) == 2 result.add(out[0]) return tuple(sorted(result))
def find_wrapper_module(op_name: str, module_name_to_quantizer_dict: Dict) -> \ Tuple[Optional[str], Optional[torch.nn.Module]]: """ Finds quantization (wrapping) module corresponding to the wrapper module's dotted name :param op_name: Dotted name of op as represented in connected graph :param module_name_to_quantizer_dict: Dict key: name of wrapped module value: quantization wrapper :return: Module name and the corresponding torch module in the sim """ # pylint:disable = protected-access module_name = op_name[op_name.find('.') + 1:] if module_name in module_name_to_quantizer_dict: return module_name, module_name_to_quantizer_dict[module_name] # Else it is a functional op return None, None def get_module_name_to_module_dict(sim: QuantizationSimModel) -> Dict: """ Creates a dictionary of wrapped module's name to quantizer module :param sim: quantization sim :return: Dict key: name of wrapped module value: quantization wrapper """ module_name_to_quantizer_dict = {} for name, ref_module in sim.model.named_modules(): if isinstance(ref_module, QcQuantizeWrapper): module_name_to_quantizer_dict[name] = ref_module return module_name_to_quantizer_dict ops_to_skip = ['view', 'NumToTensor', 'Split', CG_SPLIT, 'PythonOp', 'Tile', 'transpose', 'reshape', 'flatten', 'permute', 'Permute', # tensor.transpose() results in Permute. Name obtained after mpp 'Reshape', # Name obtained after MPP 'ChannelShuffle', # Obtained without going thru mpp. torch.nn.ChannelShuffle fails mpp 'TopK', # Name obtained after MPP 'PixelShuffle', # Name obtained after MPP 'Expand', # Name obtained after MPP. Reproduce using tensor.expand 'Pad', # Name obtained after MPP 'Slice', # Name obtained after MPP 'Gather', # Name obtained after MPP 'ScatterElements', # Name obtained after MPP 'ReduceMin', # Name obtained after MPP 'ReduceMax', # Name obtained after MPP 'Upsample', # Name obtained after MPP 'RoIPool', # Name obtained after MPP 'MaxPool', # Name obtained after MPP 'Transpose' # Name obtained after MPP ] ops_not_to_traverse = ['size'] def find_output_quantizer_groups(op: Op, parent_child_op_groups: Dict, map_for_skipped_ops: Dict): """ Finds quantizer groups along the parent to child flow :param op: pytorch module :param parent_child_op_groups: parent child relationships in graph :param map_for_skipped_ops: map to find first skipped parents of skipped ops """ output = op.output if output: consumers = output.consumers for consumer in consumers: if consumer.type in ops_not_to_traverse: continue dotted_name = op.dotted_name if op.dotted_name in map_for_skipped_ops: dotted_name = map_for_skipped_ops[op.dotted_name] if consumer.type in ops_to_skip: map_for_skipped_ops[consumer.dotted_name] = dotted_name find_output_quantizer_groups(consumer, parent_child_op_groups, map_for_skipped_ops) # If there is a one to one connection between quantizers else: parent_child_op_groups[dotted_name].append(consumer.dotted_name) else: if op.dotted_name in map_for_skipped_ops: parent_child_op_groups[map_for_skipped_ops[op.dotted_name]] = [] def find_op_groups(graph: ConnectedGraph) -> Dict: """ Finds parent children relationship based on three rules 1) If there is a direct connection between two ops, op1 and op2, then op1 is parent of op2 and they form a group 2) If the input to an op (op1) is shared with another op (op2), the op producing the input (op0) is the parent, and op1 and op2 are the children :param graph: connected graph :return: Dict of parent (key) and children (value) relationship """ parent_child_op_groups = defaultdict(list) map_for_skipped_ops = {} for op in graph.ordered_ops: # Add 1st op as child if not op.input_ops: parent_child_op_groups['input_ops'].append(op.dotted_name) # Add output op as child to put output of model as a quantizer group if op.output is None: parent_child_op_groups['output_ops'].append(op.dotted_name) for op in graph.get_all_ops().values(): if op.type in ops_to_skip or op.type in ops_not_to_traverse: continue find_output_quantizer_groups(op, parent_child_op_groups, map_for_skipped_ops) return parent_child_op_groups # This code is not currently called anywhere but it can be used to combine two ops who feed into an elementwise op def find_input_quantizer_groups(graph, map_for_skipped_ops, parent_child_op_groups): """ Combines two groups which share the same output :param graph: connected graph :param map_for_skipped_ops: map to find first skipped parents of skipped ops :param parent_child_op_groups: parent child relationships in graph """ for op in graph.get_all_ops().values(): inputs = op.input_ops if len(inputs) > 1: new_parents = set() new_children = set() for input_op in inputs: dotted_name = input_op.dotted_name if input_op.type in ops_to_skip: dotted_name = map_for_skipped_ops[dotted_name] new_parents.add(dotted_name) if dotted_name in parent_child_op_groups: for name in parent_child_op_groups[dotted_name]: new_children.add(name) del parent_child_op_groups[dotted_name] if len(new_parents) == 1: parent_child_op_groups[tuple(new_parents)[0]] = new_children else: parent_child_op_groups[tuple(new_parents)] = new_children def get_input_and_param_quantizers( child: str, module_name_to_module_dict: Dict ) -> Tuple[Tuple[str, ...], Tuple[str, ...]]: """ Adds child's input quantizer and param quantizer to quantizer group :param child: name of child :param module_name_to_module_dict: name to module ref dict :param quantizer_group: quantizer group """ input_quantizers = [] parameter_quantizers = [] module_name, module = find_wrapper_module(child, module_name_to_module_dict) if module_name is not None: for idx, input_quantizer in enumerate(module.input_quantizers): if input_quantizer.enabled: input_quantizers.append(module_name + '_input_quantizer_idx_' + str(idx)) for _, param_quantizer in module.param_quantizers.items(): if param_quantizer.enabled: parameter_quantizers.append(module_name) return tuple(input_quantizers), tuple(parameter_quantizers) # pylint: disable=too-many-branches, too-many-locals def find_quantizer_group(sim: QuantizationSimModel) -> Tuple[Dict, List[QuantizerGroup]]: """ Finds quantizer groups in a quantization sim model :param sim: Quantization sim :return: List of Quantizer groups """ # Get connected graph from quantsim for model without wrappers connected_graph = sim.connected_graph if connected_graph is None: raise AssertionError('Aborting Auto Mixed Precision, connected graph needs to exist for Auto Mixed precision') quantizer_groups = [] parent_child_op_groups = find_op_groups(connected_graph) module_name_to_module_dict = get_module_name_to_module_dict(sim) if 'input_ops' in parent_child_op_groups: for child in parent_child_op_groups['input_ops']: # Add one quantizer group for each input and it's weight param input_quantizers, parameter_quantizers = get_input_and_param_quantizers(child, module_name_to_module_dict) if input_quantizers or parameter_quantizers: child_module_name, _ = find_wrapper_module(child, module_name_to_module_dict) supported_kernel_ops = [] if child_module_name is not None: supported_kernel_ops.append(child_module_name) quantizer_group = QuantizerGroup( input_quantizers=input_quantizers, parameter_quantizers=parameter_quantizers, supported_kernel_ops=tuple(supported_kernel_ops) ) quantizer_groups.append(quantizer_group) logger.debug('\n Quantizer Group Added: %s', quantizer_group) # Based on which quantizers are enabled, create a list of quantizer_groups for parents, children in parent_child_op_groups.items(): input_quantizers = () output_quantizers = () parameter_quantizers = () if parents in ['input_ops', 'output_ops']: continue if not isinstance(parents, tuple): parents = [parents] for parent in parents: module_name, module = find_wrapper_module(parent, module_name_to_module_dict) if module is not None: for output_quantizer in module.output_quantizers: if output_quantizer.enabled: output_quantizers += (module_name,) supported_kernel_ops = [] for child in children: input_q, param_q = get_input_and_param_quantizers(child, module_name_to_module_dict) input_quantizers += input_q parameter_quantizers += param_q child_module_name, _ = find_wrapper_module(child, module_name_to_module_dict) if child_module_name is not None: supported_kernel_ops.append(child_module_name) # Don't add quantizer group if it is empty if input_quantizers or output_quantizers or parameter_quantizers: quantizer_group = QuantizerGroup( input_quantizers=input_quantizers, output_quantizers=output_quantizers, parameter_quantizers=parameter_quantizers, supported_kernel_ops=tuple(supported_kernel_ops) ) quantizer_groups.append(quantizer_group) logger.debug('\n Quantizer Group added: %s', quantizer_group) if 'output_ops' in parent_child_op_groups: for parent in parent_child_op_groups['output_ops']: # Add one quantizer group for each input and it's weight param module_name, module = find_wrapper_module(parent, module_name_to_module_dict) if module is not None: for output_quantizer in module.output_quantizers: if output_quantizer.enabled: # Using empty supported kernel ops so that model output quantizers are able to consider all # default candidates quantizer_group = QuantizerGroup( output_quantizers=(module_name,), supported_kernel_ops=tuple() ) quantizer_groups.append(quantizer_group) logger.debug('\n Quantizer Group added: %s', quantizer_group) return module_name_to_module_dict, quantizer_groups def find_supported_candidates(quantizer_groups: List[QuantizerGroup], amp_candidates: List[CANDIDATE_WITH_DTYPE], supported_kernels: Dict, module_name_to_module_dict: Dict, use_all_amp_candidates: bool) -> Tuple[Dict, List]: """ Computes 1. a list of supported candidates per Quantizer and 2. List of candidate options for max_candidate :param quantizer_groups: List of quantizer groups computed for the given model :param amp_candidates: List of candidates specified by the user to be used for the AMP algorithm :param supported_kernels: Dict of supported kernels for a given op/defaults specified in the config file :param module_name_to_module_dict: Dict mapping module name to module/quantizer :param use_all_amp_candidates: Boolean value representing whether the unsupported candidates in the "candidates" list need to be considered for creating the output lists. If set to True, all the AMP candidates are directly used for all the Quantizers, else the candidates per Quantizers are computed. """ quantizers_with_supported_candidates = defaultdict(list) # pylint: disable=too-many-nested-blocks # pylint: disable=protected-access for quantizer_group in quantizer_groups: quantizers = sorted(set(itertools.chain(quantizer_group.get_input_quantizer_modules(), quantizer_group.output_quantizers, quantizer_group.parameter_quantizers))) # quantizers are now unique ops present in the given quantizer_group onnx_ops = defaultdict(list) supported_kernel_types = set() for supported_kernel_op in quantizer_group.supported_kernel_ops: module = module_name_to_module_dict[supported_kernel_op]._module_to_wrap try: backend_type = aimet_op_to_backend_op_name_map[module.__class__] except KeyError: backend_type = aimet_op_to_backend_op_name_map.get(module.__class__.__name__) if backend_type in supported_kernels: supported_kernel_types.add(backend_type) else: onnx_types = onnx_utils.map_torch_types_to_onnx.get( type(module_name_to_module_dict[supported_kernel_op]._module_to_wrap), []) if not onnx_types: logger.warning("No mapping found for %s in the torch to onnx op type mapping dictionary.", str(type(module_name_to_module_dict[supported_kernel_op]._module_to_wrap))) supported_kernel_types.update(onnx_types) for onnx_type in onnx_types: if onnx_type not in supported_kernels.keys(): if module in supported_kernels: supported_kernels[onnx_type] = supported_kernels[module] for quantizer in quantizers: onnx_ops[quantizer] = list(supported_kernel_types) supported_kernels_for_quantizers = get_supported_candidates_for_quantizers(quantizers, onnx_ops, supported_kernels, amp_candidates, use_all_amp_candidates) quantizers_with_supported_candidates[quantizer_group] = supported_kernels_for_quantizers.copy() max_candidate_options = compute_baseline_candidate_options(quantizers_with_supported_candidates, amp_candidates, use_all_amp_candidates) return quantizers_with_supported_candidates, max_candidate_options