Skip to content

Commit 8852b12

Browse files
committed
address reviews
Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
1 parent 2c2f6a3 commit 8852b12

File tree

1 file changed

+15
-11
lines changed

1 file changed

+15
-11
lines changed

modelopt/torch/export/convert_hf_config.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
"""Convert modelopt quantization export config to align with llm-compressor config format."""
1717

18+
import warnings
1819
from collections import defaultdict
1920
from typing import Any
2021

@@ -51,23 +52,21 @@ def _quant_algo_to_group_config(quant_algo: str, group_size: int | None = None)
5152
},
5253
"weights": {"dynamic": False, "num_bits": 4, "type": "float", "group_size": gs},
5354
}
54-
elif quant_algo in ("NVFP4_AWQ", "W4A16_AWQ", "W4A8_AWQ"):
55+
elif quant_algo == "W4A16_AWQ":
56+
gs = group_size or 128
57+
return {
58+
"weights": {"dynamic": False, "num_bits": 4, "type": "int", "group_size": gs},
59+
}
60+
elif quant_algo in ("NVFP4_AWQ", "W4A8_AWQ"):
5561
gs = group_size or 128
56-
weight_bits = 4
57-
act_bits = 8 if "A8" in quant_algo else 4
5862
return {
5963
"input_activations": {
6064
"dynamic": False,
61-
"num_bits": act_bits,
62-
"type": "float",
63-
"group_size": gs,
64-
},
65-
"weights": {
66-
"dynamic": False,
67-
"num_bits": weight_bits,
65+
"num_bits": 8,
6866
"type": "float",
6967
"group_size": gs,
7068
},
69+
"weights": {"dynamic": False, "num_bits": 4, "type": "float", "group_size": gs},
7170
}
7271
elif quant_algo == "W8A16":
7372
return {
@@ -101,7 +100,12 @@ def _quant_algo_to_group_config(quant_algo: str, group_size: int | None = None)
101100
"weights": {"dynamic": False, "num_bits": 8, "type": "float", "group_size": gs},
102101
}
103102
else:
104-
# Fallback: store the raw algo name so downstream consumers can still inspect it.
103+
warnings.warn(
104+
f"Unsupported quantization algorithm '{quant_algo}' in "
105+
f"_quant_algo_to_group_config. The resulting config group will not contain "
106+
f"'input_activations' or 'weights' keys and may not be compatible with "
107+
f"compressed-tensors consumers. Please add explicit support for this algorithm."
108+
)
105109
return {"quant_algo": quant_algo}
106110

107111

0 commit comments

Comments
 (0)