From 930c2fd1424e273ebd4c4cc31dfbab8d23df8006 Mon Sep 17 00:00:00 2001 From: Giuseppe Franco Date: Mon, 12 Feb 2024 11:40:39 +0000 Subject: [PATCH] Fix (examples/ptq): fix execution device --- .../imagenet_classification/ptq/ptq_common.py | 28 ++++++++++++++----- .../ptq/ptq_evaluate.py | 14 ++++++---- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py b/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py index 541f0085f..62752f1d0 100644 --- a/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py +++ b/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py @@ -146,7 +146,8 @@ def quantize_model( weight_quant_type='sym', act_quant_granularity='per_tensor', uint_sym_act_for_unsigned_values=True, - dtype=torch.float32): + dtype=torch.float32, + device='cpu'): # Define what quantize function to use and, based on the given configuration, its arguments quantize_fn = QUANTIZE_MAP[backend] weight_scale_type = scale_factor_type @@ -222,6 +223,7 @@ def layerwise_bit_width_fn_weight(module): quant_layer_map, quant_layerwise_layer_map, quant_act_map, quant_identity_map = create_quant_maps(dtype=dtype, + device=device, uint_sym_act_for_unsigned_values=uint_sym_act_for_unsigned_values, bias_bit_width=bias_bit_width, weight_param_method=weight_param_method, @@ -274,7 +276,8 @@ def create_quant_maps( act_param_method=None, act_quant_type=None, act_quant_granularity=None, - act_quant_percentile=None): + act_quant_percentile=None, + device='cpu'): """ Starting from pre-defined quantizers, modify them to match the desired configuration """ @@ -323,17 +326,19 @@ def kwargs_prefix(prefix, weight_kwargs): if weight_quant_type == 'asym': weight_quant = weight_quant.let(zero_point_impl=ParameterFromStatsFromParameterZeroPoint) if act_quant is not None: - act_quant = act_quant.let(**{'high_percentile_q': act_quant_percentile, 'dtype': dtype}) + act_quant = act_quant.let( + **{ + 'high_percentile_q': act_quant_percentile, 'dtype': dtype, 'device': device}) if act_quant_type == 'asym' and act_quant_percentile is not None: act_quant = act_quant.let(**{'low_percentile_q': 100 - act_quant_percentile}) if sym_act_quant is not None: sym_act_quant = sym_act_quant.let( **{ - 'high_percentile_q': act_quant_percentile, 'dtype': dtype}) + 'high_percentile_q': act_quant_percentile, 'dtype': dtype, 'device': device}) if per_tensor_act_quant is not None: per_tensor_act_quant = per_tensor_act_quant.let( **{ - 'high_percentile_q': act_quant_percentile, 'dtype': dtype}) + 'high_percentile_q': act_quant_percentile, 'dtype': dtype, 'device': device}) if act_quant_type == 'asym' and act_quant_percentile is not None: per_tensor_act_quant = per_tensor_act_quant.let( **{'low_percentile_q': 100 - act_quant_percentile}) @@ -341,7 +346,11 @@ def kwargs_prefix(prefix, weight_kwargs): weight_quant_dict = {'weight_quant': weight_quant} quant_wbiol_kwargs = { - **weight_quant_dict, 'dtype': dtype, 'return_quant_tensor': False, 'bias_quant': bias_quant} + **weight_quant_dict, + 'dtype': dtype, + 'device': device, + 'return_quant_tensor': False, + 'bias_quant': bias_quant} # yapf: disable quant_mha_kwargs = { @@ -361,6 +370,7 @@ def kwargs_prefix(prefix, weight_kwargs): # since it supports only self-attention 'packed_in_proj': True, 'dtype': dtype, + 'device': device, 'return_quant_tensor': False} # yapf: enable @@ -451,8 +461,12 @@ def apply_act_equalization(model, calib_loader, layerwise): model.eval() dtype = next(model.parameters()).dtype device = next(model.parameters()).device + add_mul_node = layerwise with torch.no_grad(): - with activation_equalization_mode(model, alpha=0.5, layerwise=layerwise): + with activation_equalization_mode(model, + alpha=0.5, + layerwise=layerwise, + add_mul_node=add_mul_node): for i, (images, target) in enumerate(tqdm(calib_loader)): images = images.to(device) images = images.to(dtype) diff --git a/src/brevitas_examples/imagenet_classification/ptq/ptq_evaluate.py b/src/brevitas_examples/imagenet_classification/ptq/ptq_evaluate.py index 740a207ac..0938b69f2 100644 --- a/src/brevitas_examples/imagenet_classification/ptq/ptq_evaluate.py +++ b/src/brevitas_examples/imagenet_classification/ptq/ptq_evaluate.py @@ -362,14 +362,21 @@ def main(): else: raise RuntimeError(f"{args.target_backend} backend not supported.") + # If available, use the selected GPU + if args.gpu is not None: + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + cudnn.benchmark = False + if args.act_equalization is not None: print("Applying activation equalization:") apply_act_equalization(model, calib_loader, layerwise=args.act_equalization == 'layerwise') - + device = next(iter(model.parameters())).device # Define the quantized model quant_model = quantize_model( model, dtype=dtype, + device=device, backend=args.target_backend, scale_factor_type=args.scale_factor_type, bias_bit_width=args.bias_bit_width, @@ -390,11 +397,6 @@ def main(): weight_exponent_bit_width=args.weight_exponent_bit_width, act_mantissa_bit_width=args.act_mantissa_bit_width, act_exponent_bit_width=args.act_exponent_bit_width) - # If available, use the selected GPU - if args.gpu is not None: - torch.cuda.set_device(args.gpu) - quant_model = quant_model.cuda(args.gpu) - cudnn.benchmark = False # Calibrate the quant_model on the calibration dataloader print("Starting activation calibration:")