From cd7d62298999bdcfd54621251aad7e0e82ddb3ae Mon Sep 17 00:00:00 2001 From: David Fan Date: Thu, 30 Jan 2025 02:21:30 +0000 Subject: [PATCH] Exclude nodes for quantization Signed-off-by: David Fan --- neural_compressor/adaptor/ox_utils/weight_only.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/neural_compressor/adaptor/ox_utils/weight_only.py b/neural_compressor/adaptor/ox_utils/weight_only.py index f6e575fd9f3..1fd09dd7471 100644 --- a/neural_compressor/adaptor/ox_utils/weight_only.py +++ b/neural_compressor/adaptor/ox_utils/weight_only.py @@ -299,6 +299,7 @@ def rtn_quantize( ratios={}, accuracy_level=0, providers=["CPUExecutionProvider"], + nodes_to_exclude=[], ): """Quant the model with round to nearst method. @@ -323,6 +324,7 @@ def rtn_quantize( 2 (fp16 compute type of jblas kernel), 3 (bf16 compute type of jblas kernel), 4 (int8 compute type of jblas kernel) providers (list): providers to use + nodes_to_exclude (list): nodes to exclude quantization. Returns: model: fake quantized ONNXModel @@ -334,6 +336,8 @@ def rtn_quantize( total_num = len([i for i in model.nodes() if i.op_type in ["MatMul"]]) curr_id = 0 for node in model.nodes(): + if node.name in nodes_to_exclude: + continue if node.op_type in ["MatMul"]: curr_id += 1 simple_progress_bar(total_num, curr_id)