Source code for QEfficient.diffusers.pipelines.pipeline_module
# -----------------------------------------------------------------------------
#
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: BSD-3-Clause
#
# ----------------------------------------------------------------------------
from typing import Dict, List, Tuple
import torch
import torch.nn as nn
from diffusers.models.transformers.transformer_wan import WanTransformerBlock
from QEfficient.base.modeling_qeff import QEFFBaseModel
from QEfficient.base.onnx_transforms import FP16ClipTransform, SplitTensorsTransform
from QEfficient.diffusers.models.pytorch_transforms import (
AttentionTransform,
CustomOpsTransform,
NormalizationTransform,
)
from QEfficient.diffusers.models.transformers.transformer_flux import (
QEffFluxSingleTransformerBlock,
QEffFluxTransformerBlock,
)
from QEfficient.transformers.models.pytorch_transforms import (
T5ModelTransform,
)
from QEfficient.utils import constants
[docs]class QEffTextEncoder(QEFFBaseModel):
"""
Wrapper for text encoder models with ONNX export and QAIC compilation capabilities.
This class handles text encoder models (CLIP, T5) with specific transformations and
optimizations for efficient inference on Qualcomm AI hardware. It applies custom
PyTorch and ONNX transformations to prepare models for deployment.
Attributes:
model (nn.Module): The wrapped text encoder model (deep copy of original)
_pytorch_transforms (List): PyTorch transformations applied before ONNX export
_onnx_transforms (List): ONNX transformations applied after export
"""
_pytorch_transforms = [CustomOpsTransform, T5ModelTransform]
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
@property
def get_model_config(self) -> Dict:
"""
Get the model configuration as a dictionary.
Returns:
Dict: The configuration dictionary of the underlying text encoder model
"""
return self.model.config.__dict__
def __init__(self, model: nn.Module) -> None:
"""
Initialize the text encoder wrapper.
Args:
model (nn.Module): The text encoder model to wrap (CLIP or T5)
"""
super().__init__(model)
self.model = model
[docs] def get_onnx_params(self) -> Tuple[Dict, Dict, List[str]]:
"""
Generate ONNX export configuration for the text encoder.
Creates example inputs, dynamic axes specifications, and output names
tailored to the specific text encoder type (CLIP vs T5).
Returns:
Tuple containing:
- example_inputs (Dict): Sample inputs for ONNX export
- dynamic_axes (Dict): Specification of dynamic dimensions
- output_names (List[str]): Names of model outputs
"""
bs = constants.ONNX_EXPORT_EXAMPLE_BATCH_SIZE
# Create example input with max sequence length
example_inputs = {
"input_ids": torch.zeros((bs, self.model.config.max_position_embeddings), dtype=torch.int64),
}
# Define which dimensions can vary at runtime
dynamic_axes = {"input_ids": {0: "batch_size", 1: "seq_len"}}
# T5 only outputs hidden states, CLIP outputs both hidden states and pooled output
if self.model.__class__.__name__ == "T5EncoderModel":
output_names = ["last_hidden_state"]
else:
output_names = ["last_hidden_state", "pooler_output"]
example_inputs["output_hidden_states"] = False
return example_inputs, dynamic_axes, output_names
[docs] def export(
self,
inputs: Dict,
output_names: List[str],
dynamic_axes: Dict,
export_dir: str = None,
export_kwargs: Dict = {},
) -> str:
"""
Export the text encoder model to ONNX format.
Args:
inputs (Dict): Example inputs for ONNX export
output_names (List[str]): Names of model outputs
dynamic_axes (Dict): Specification of dynamic dimensions
export_dir (str, optional): Directory to save ONNX model
export_kwargs (Dict, optional): Additional export arguments
Returns:
str: Path to the exported ONNX model
"""
return self._export(
example_inputs=inputs,
output_names=output_names,
dynamic_axes=dynamic_axes,
export_dir=export_dir,
**export_kwargs,
)
[docs] def compile(self, specializations: List[Dict], **compiler_options) -> None:
"""
Compile the ONNX model for Qualcomm AI hardware.
Args:
specializations (List[Dict]): Model specialization configurations
**compiler_options: Additional compiler options (e.g., num_cores, aic_num_of_activations)
"""
self._compile(specializations=specializations, **compiler_options)
[docs]class QEffUNet(QEFFBaseModel):
"""
Wrapper for UNet models with ONNX export and QAIC compilation capabilities.
This class handles UNet models with specific transformations and optimizations
for efficient inference on Qualcomm AI hardware. UNet is commonly used in
diffusion models for image generation tasks.
Attributes:
model (nn.Module): The wrapped UNet model
_pytorch_transforms (List): PyTorch transformations applied before ONNX export
_onnx_transforms (List): ONNX transformations applied after export
"""
_pytorch_transforms = [CustomOpsTransform]
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
@property
def get_model_config(self) -> Dict:
"""
Get the model configuration as a dictionary.
Returns:
Dict: The configuration dictionary of the underlying UNet model
"""
return self.model.config.__dict__
def __init__(self, model: nn.Module) -> None:
"""
Initialize the UNet wrapper.
Args:
model (nn.Module): The pipeline model containing the UNet
"""
super().__init__(model.unet)
self.model = model.unet
[docs] def export(
self,
inputs: Dict,
output_names: List[str],
dynamic_axes: Dict,
export_dir: str = None,
export_kwargs: Dict = {},
) -> str:
"""
Export the UNet model to ONNX format.
Args:
inputs (Dict): Example inputs for ONNX export
output_names (List[str]): Names of model outputs
dynamic_axes (Dict): Specification of dynamic dimensions
export_dir (str, optional): Directory to save ONNX model
export_kwargs (Dict, optional): Additional export arguments
Returns:
str: Path to the exported ONNX model
"""
return self._export(
example_inputs=inputs,
output_names=output_names,
dynamic_axes=dynamic_axes,
export_dir=export_dir,
**export_kwargs,
)
[docs] def compile(self, specializations: List[Dict], **compiler_options) -> None:
"""
Compile the ONNX model for Qualcomm AI hardware.
Args:
specializations (List[Dict]): Model specialization configurations
**compiler_options: Additional compiler options
"""
self._compile(specializations=specializations, **compiler_options)
[docs]class QEffVAE(QEFFBaseModel):
"""
Wrapper for Variational Autoencoder (VAE) models with ONNX export and QAIC compilation.
This class handles VAE models with specific transformations and optimizations
for efficient inference on Qualcomm AI hardware. VAE models are used in diffusion
pipelines for encoding images to latent space and decoding latents back to images.
Attributes:
model (nn.Module): The wrapped VAE model (deep copy of original)
type (str): VAE operation type ("encoder" or "decoder")
_pytorch_transforms (List): PyTorch transformations applied before ONNX export
_onnx_transforms (List): ONNX transformations applied after export
"""
_pytorch_transforms = [CustomOpsTransform, AttentionTransform]
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
@property
def get_model_config(self) -> Dict:
"""
Get the model configuration as a dictionary.
Returns:
Dict: The configuration dictionary of the underlying VAE model
"""
return self.model.config.__dict__
def __init__(self, model: nn.Module, type: str) -> None:
"""
Initialize the VAE wrapper.
Args:
model (nn.Module): The pipeline model containing the VAE
type (str): VAE operation type ("encoder" or "decoder")
"""
super().__init__(model)
self.model = model
# To have different hashing for encoder/decoder
self.model.config["type"] = type
[docs] def get_onnx_params(self, latent_height: int = 32, latent_width: int = 32) -> Tuple[Dict, Dict, List[str]]:
"""
Generate ONNX export configuration for the VAE decoder.
Args:
latent_height (int): Height of latent representation (default: 32)
latent_width (int): Width of latent representation (default: 32)
Returns:
Tuple containing:
- example_inputs (Dict): Sample inputs for ONNX export
- dynamic_axes (Dict): Specification of dynamic dimensions
- output_names (List[str]): Names of model outputs
"""
bs = constants.ONNX_EXPORT_EXAMPLE_BATCH_SIZE
# VAE decoder takes latent representation as input
example_inputs = {
"latent_sample": torch.randn(bs, 16, latent_height, latent_width),
"return_dict": False,
}
output_names = ["sample"]
# All dimensions except channels can be dynamic
dynamic_axes = {
"latent_sample": {0: "batch_size", 1: "channels", 2: "latent_height", 3: "latent_width"},
}
return example_inputs, dynamic_axes, output_names
[docs] def get_video_onnx_params(self) -> Tuple[Dict, Dict, List[str]]:
"""
Generate ONNX export configuration for the VAE decoder.
Args:
latent_height (int): Height of latent representation (default: 32)
latent_width (int): Width of latent representation (default: 32)
Returns:
Tuple containing:
- example_inputs (Dict): Sample inputs for ONNX export
- dynamic_axes (Dict): Specification of dynamic dimensions
- output_names (List[str]): Names of model outputs
"""
bs = constants.ONNX_EXPORT_EXAMPLE_BATCH_SIZE
latent_frames = constants.WAN_ONNX_EXPORT_LATENT_FRAMES
latent_height = constants.WAN_ONNX_EXPORT_LATENT_HEIGHT_180P
latent_width = constants.WAN_ONNX_EXPORT_LATENT_WIDTH_180P
# VAE decoder takes latent representation as input
example_inputs = {
"latent_sample": torch.randn(bs, 16, latent_frames, latent_height, latent_width),
"return_dict": False,
}
output_names = ["sample"]
# All dimensions except channels can be dynamic
dynamic_axes = {
"latent_sample": {0: "batch_size", 2: "latent_frames", 3: "latent_height", 4: "latent_width"},
}
return example_inputs, dynamic_axes, output_names
[docs] def export(
self,
inputs: Dict,
output_names: List[str],
dynamic_axes: Dict,
export_dir: str = None,
export_kwargs: Dict = {},
) -> str:
"""
Export the VAE model to ONNX format.
Args:
inputs (Dict): Example inputs for ONNX export
output_names (List[str]): Names of model outputs
dynamic_axes (Dict): Specification of dynamic dimensions
export_dir (str, optional): Directory to save ONNX model
export_kwargs (Dict, optional): Additional export arguments
Returns:
str: Path to the exported ONNX model
"""
if hasattr(self.model.config, "_use_default_values"):
self.model.config["_use_default_values"].sort()
return self._export(
example_inputs=inputs,
output_names=output_names,
dynamic_axes=dynamic_axes,
export_dir=export_dir,
**export_kwargs,
)
[docs] def compile(self, specializations: List[Dict], **compiler_options) -> None:
"""
Compile the ONNX model for Qualcomm AI hardware.
Args:
specializations (List[Dict]): Model specialization configurations
**compiler_options: Additional compiler options
"""
self._compile(specializations=specializations, **compiler_options)
[docs]class QEffFluxTransformerModel(QEFFBaseModel):
"""
Wrapper for Flux Transformer2D models with ONNX export and QAIC compilation capabilities.
This class handles Flux Transformer2D models with specific transformations and optimizations
for efficient inference on Qualcomm AI hardware. Flux uses a transformer-based diffusion
architecture instead of traditional UNet, with dual transformer blocks and adaptive layer
normalization (AdaLN) for conditioning.
Attributes:
model (nn.Module): The wrapped Flux transformer model
_pytorch_transforms (List): PyTorch transformations applied before ONNX export
_onnx_transforms (List): ONNX transformations applied after export
"""
_pytorch_transforms = [AttentionTransform, NormalizationTransform, CustomOpsTransform]
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
@property
def get_model_config(self) -> Dict:
"""
Get the model configuration as a dictionary.
Returns:
Dict: The configuration dictionary of the underlying Flux transformer model
"""
return self.model.config.__dict__
def __init__(self, model: nn.Module) -> None:
"""
Initialize the Flux transformer wrapper.
Args:
model (nn.Module): The Flux transformer model to wrap
"""
super().__init__(model)
[docs] def get_onnx_params(
self,
batch_size: int = constants.ONNX_EXPORT_EXAMPLE_BATCH_SIZE,
seq_length: int = constants.FLUX_ONNX_EXPORT_SEQ_LENGTH,
cl: int = constants.FLUX_ONNX_EXPORT_COMPRESSED_LATENT_DIM,
) -> Tuple[Dict, Dict, List[str]]:
"""
Generate ONNX export configuration for the Flux transformer.
Creates example inputs for all Flux-specific inputs including hidden states,
text embeddings, timestep conditioning, and AdaLN embeddings.
Args:
batch_size (int): Batch size for example inputs (default: FLUX_ONNX_EXPORT_BATCH_SIZE)
seq_length (int): Text sequence length (default: FLUX_ONNX_EXPORT_SEQ_LENGTH)
cl (int): Compressed latent dimension (default: FLUX_ONNX_EXPORT_COMPRESSED_LATENT_DIM)
Returns:
Tuple containing:
- example_inputs (Dict): Sample inputs for ONNX export
- dynamic_axes (Dict): Specification of dynamic dimensions
- output_names (List[str]): Names of model outputs
"""
example_inputs = {
# Latent representation of the image
"hidden_states": torch.randn(batch_size, cl, self.model.config.in_channels, dtype=torch.float32),
"encoder_hidden_states": torch.randn(
batch_size, seq_length, self.model.config.joint_attention_dim, dtype=torch.float32
),
"pooled_projections": torch.randn(batch_size, self.model.config.pooled_projection_dim, dtype=torch.float32),
"timestep": torch.tensor([1.0], dtype=torch.float32),
"img_ids": torch.randn(cl, 3, dtype=torch.float32),
"txt_ids": torch.randn(seq_length, 3, dtype=torch.float32),
# AdaLN embeddings for dual transformer blocks
# Shape: [num_layers, FLUX_ADALN_DUAL_BLOCK_CHUNKS, FLUX_ADALN_HIDDEN_DIM]
"adaln_emb": torch.randn(
self.model.config["num_layers"],
constants.FLUX_ADALN_DUAL_BLOCK_CHUNKS,
constants.FLUX_ADALN_HIDDEN_DIM,
dtype=torch.float32,
),
# AdaLN embeddings for single transformer blocks
# Shape: [num_single_layers, FLUX_ADALN_SINGLE_BLOCK_CHUNKS, FLUX_ADALN_HIDDEN_DIM]
"adaln_single_emb": torch.randn(
self.model.config["num_single_layers"],
constants.FLUX_ADALN_SINGLE_BLOCK_CHUNKS,
constants.FLUX_ADALN_HIDDEN_DIM,
dtype=torch.float32,
),
# Output AdaLN embedding
# Shape: [batch_size, FLUX_ADALN_OUTPUT_DIM] for final projection
"adaln_out": torch.randn(batch_size, constants.FLUX_ADALN_OUTPUT_DIM, dtype=torch.float32),
}
output_names = ["output"]
# Define dynamic dimensions for runtime flexibility
dynamic_axes = {
"hidden_states": {0: "batch_size", 1: "cl"},
"encoder_hidden_states": {0: "batch_size", 1: "seq_len"},
"pooled_projections": {0: "batch_size"},
"timestep": {0: "steps"},
"img_ids": {0: "cl"},
}
return example_inputs, dynamic_axes, output_names
[docs] def export(
self,
inputs: Dict,
output_names: List[str],
dynamic_axes: Dict,
export_dir: str = None,
export_kwargs: Dict = {},
use_onnx_subfunctions: bool = False,
) -> str:
"""
Export the Flux transformer model to ONNX format.
Args:
inputs (Dict): Example inputs for ONNX export
output_names (List[str]): Names of model outputs
dynamic_axes (Dict): Specification of dynamic dimensions
export_dir (str, optional): Directory to save ONNX model
export_kwargs (Dict, optional): Additional export arguments (e.g., export_modules_as_functions)
use_onnx_subfunctions (bool): Whether to export transformer blocks as ONNX functions
for better modularity and potential optimization
Returns:
str: Path to the exported ONNX model
"""
if use_onnx_subfunctions:
export_kwargs = {
"export_modules_as_functions": {QEffFluxTransformerBlock, QEffFluxSingleTransformerBlock},
"use_onnx_subfunctions": True,
}
# Sort _use_default_values in config to ensure consistent hash generation during export
self.model.config["_use_default_values"].sort()
return self._export(
example_inputs=inputs,
output_names=output_names,
dynamic_axes=dynamic_axes,
export_dir=export_dir,
offload_pt_weights=False, # As weights are needed with AdaLN changes
**export_kwargs,
)
[docs] def compile(self, specializations: List[Dict], **compiler_options) -> None:
"""
Compile the ONNX model for Qualcomm AI hardware.
Args:
specializations (List[Dict]): Model specialization configurations
**compiler_options: Additional compiler options (e.g., num_cores, aic_num_of_activations)
"""
self._compile(specializations=specializations, **compiler_options)
[docs]class QEffWanUnifiedTransformer(QEFFBaseModel):
"""
Wrapper for WAN Unified Transformer with ONNX export and QAIC compilation capabilities.
This class handles the unified WAN transformer model that combines high and low noise transformers
into a single model for efficient deployment. Based on the timestep shape, the model dynamically
selects between high and low noise transformers during inference.
The wrapper applies specific transformations and optimizations for efficient inference on
Qualcomm AI hardware, particularly for video diffusion models.
Attributes:
model (nn.Module): The QEffWanUnifiedWrapper model that combines high/low noise transformers
_pytorch_transforms (List): PyTorch transformations applied before ONNX export
_onnx_transforms (List): ONNX transformations applied after export
"""
_pytorch_transforms = [AttentionTransform, CustomOpsTransform, NormalizationTransform]
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
def __init__(self, unified_transformer):
"""
Initialize the Wan unified transformer.
Args:
model (nn.Module): Wan unified transformer model
"""
super().__init__(unified_transformer)
self.model = unified_transformer
@property
def get_model_config(self) -> Dict:
"""
Get the model configuration as a dictionary.
Returns:
Dict: The configuration dictionary of the underlying Wan transformer model
"""
return self.model.config.__dict__
[docs] def get_onnx_params(self):
"""
Generate ONNX export configuration for the Wan transformer.
Creates example inputs for all Wan-specific inputs including hidden states,
text embeddings, timestep conditioning,
Returns:
Tuple containing:
- example_inputs (Dict): Sample inputs for ONNX export
- dynamic_axes (Dict): Specification of dynamic dimensions
- output_names (List[str]): Names of model outputs
"""
batch_size = constants.WAN_ONNX_EXPORT_BATCH_SIZE
example_inputs = {
# hidden_states = [ bs, in_channels, frames, latent_height, latent_width]
"hidden_states": torch.randn(
batch_size,
self.model.config.in_channels,
constants.WAN_ONNX_EXPORT_LATENT_FRAMES,
constants.WAN_ONNX_EXPORT_LATENT_HEIGHT_180P,
constants.WAN_ONNX_EXPORT_LATENT_WIDTH_180P,
dtype=torch.float32,
),
# encoder_hidden_states = [BS, seq len , text dim]
"encoder_hidden_states": torch.randn(
batch_size, constants.WAN_ONNX_EXPORT_SEQ_LEN, constants.WAN_TEXT_EMBED_DIM, dtype=torch.float32
),
# Rotary position embeddings: [2, context_length, 1, rotary_dim]; 2 is from tuple of cos, sin freqs
"rotary_emb": torch.randn(
2, constants.WAN_ONNX_EXPORT_CL_180P, 1, constants.WAN_ONNX_EXPORT_ROTARY_DIM, dtype=torch.float32
),
# Timestep embeddings: [batch_size=1, embedding_dim]
"temb": torch.randn(batch_size, constants.WAN_TEXT_EMBED_DIM, dtype=torch.float32),
# Projected timestep embeddings: [batch_size=1, projection_dim, embedding_dim]
"timestep_proj": torch.randn(
batch_size,
constants.WAN_PROJECTION_DIM,
constants.WAN_TEXT_EMBED_DIM,
dtype=torch.float32,
),
# Timestep parameter: Controls high/low noise transformer selection based on shape
"tsp": torch.ones(1, dtype=torch.int64),
}
output_names = ["output"]
dynamic_axes = {
"hidden_states": {
0: "batch_size",
1: "num_channels",
2: "latent_frames",
3: "latent_height",
4: "latent_width",
},
"timestep": {0: "steps"},
"encoder_hidden_states": {0: "batch_size", 1: "sequence_length"},
"rotary_emb": {1: "cl"},
"tsp": {0: "model_type"},
}
return example_inputs, dynamic_axes, output_names
[docs] def export(
self,
inputs: Dict,
output_names: List[str],
dynamic_axes: Dict,
export_dir: str = None,
export_kwargs: Dict = {},
use_onnx_subfunctions: bool = False,
) -> str:
"""Export the Wan transformer model to ONNX format.
Args:
inputs (Dict): Example inputs for ONNX export
output_names (List[str]): Names of model outputs
dynamic_axes (Dict): Specification of dynamic dimensions
export_dir (str, optional): Directory to save ONNX model
export_kwargs (Dict, optional): Additional export arguments (e.g., export_modules_as_functions)
use_onnx_subfunctions (bool): Whether to export transformer blocks as ONNX functions
for better modularity and potential optimization
Returns:
str: Path to the exported ONNX model
"""
if use_onnx_subfunctions:
export_kwargs = {"export_modules_as_functions": {WanTransformerBlock}, "use_onnx_subfunctions": True}
return self._export(
example_inputs=inputs,
output_names=output_names,
dynamic_axes=dynamic_axes,
export_dir=export_dir,
offload_pt_weights=True,
**export_kwargs,
)
[docs] def compile(self, specializations, **compiler_options) -> None:
"""
Compile the ONNX model for Qualcomm AI hardware.
Args:
specializations (List[Dict]): Model specialization configurations
**compiler_options: Additional compiler options (e.g., num_cores, aic_num_of_activations)
"""
self._compile(specializations=specializations, **compiler_options)