From cd7d62298999bdcfd54621251aad7e0e82ddb3ae Mon Sep 17 00:00:00 2001
From: David Fan <jiafa@microsoft.com>
Date: Thu, 30 Jan 2025 02:21:30 +0000
Subject: [PATCH] Exclude nodes for quantization

Signed-off-by: David Fan <jiafa@microsoft.com>
---
 neural_compressor/adaptor/ox_utils/weight_only.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/neural_compressor/adaptor/ox_utils/weight_only.py b/neural_compressor/adaptor/ox_utils/weight_only.py
index f6e575fd9f3..1fd09dd7471 100644
--- a/neural_compressor/adaptor/ox_utils/weight_only.py
+++ b/neural_compressor/adaptor/ox_utils/weight_only.py
@@ -299,6 +299,7 @@ def rtn_quantize(
     ratios={},
     accuracy_level=0,
     providers=["CPUExecutionProvider"],
+    nodes_to_exclude=[],
 ):
     """Quant the model with round to nearst method.
 
@@ -323,6 +324,7 @@ def rtn_quantize(
                               2 (fp16 compute type of jblas kernel), 3 (bf16 compute type of jblas kernel),
                               4 (int8 compute type of jblas kernel)
         providers (list): providers to use
+        nodes_to_exclude (list): nodes to exclude quantization.
 
     Returns:
         model: fake quantized ONNXModel
@@ -334,6 +336,8 @@ def rtn_quantize(
     total_num = len([i for i in model.nodes() if i.op_type in ["MatMul"]])
     curr_id = 0
     for node in model.nodes():
+        if node.name in nodes_to_exclude:
+            continue
         if node.op_type in ["MatMul"]:
             curr_id += 1
             simple_progress_bar(total_num, curr_id)