|
15 | 15 |
|
16 | 16 | """Convert modelopt quantization export config to align with llm-compressor config format.""" |
17 | 17 |
|
| 18 | +import warnings |
18 | 19 | from collections import defaultdict |
19 | 20 | from typing import Any |
20 | 21 |
|
@@ -51,23 +52,21 @@ def _quant_algo_to_group_config(quant_algo: str, group_size: int | None = None) |
51 | 52 | }, |
52 | 53 | "weights": {"dynamic": False, "num_bits": 4, "type": "float", "group_size": gs}, |
53 | 54 | } |
54 | | - elif quant_algo in ("NVFP4_AWQ", "W4A16_AWQ", "W4A8_AWQ"): |
| 55 | + elif quant_algo == "W4A16_AWQ": |
| 56 | + gs = group_size or 128 |
| 57 | + return { |
| 58 | + "weights": {"dynamic": False, "num_bits": 4, "type": "int", "group_size": gs}, |
| 59 | + } |
| 60 | + elif quant_algo in ("NVFP4_AWQ", "W4A8_AWQ"): |
55 | 61 | gs = group_size or 128 |
56 | | - weight_bits = 4 |
57 | | - act_bits = 8 if "A8" in quant_algo else 4 |
58 | 62 | return { |
59 | 63 | "input_activations": { |
60 | 64 | "dynamic": False, |
61 | | - "num_bits": act_bits, |
62 | | - "type": "float", |
63 | | - "group_size": gs, |
64 | | - }, |
65 | | - "weights": { |
66 | | - "dynamic": False, |
67 | | - "num_bits": weight_bits, |
| 65 | + "num_bits": 8, |
68 | 66 | "type": "float", |
69 | 67 | "group_size": gs, |
70 | 68 | }, |
| 69 | + "weights": {"dynamic": False, "num_bits": 4, "type": "float", "group_size": gs}, |
71 | 70 | } |
72 | 71 | elif quant_algo == "W8A16": |
73 | 72 | return { |
@@ -101,7 +100,12 @@ def _quant_algo_to_group_config(quant_algo: str, group_size: int | None = None) |
101 | 100 | "weights": {"dynamic": False, "num_bits": 8, "type": "float", "group_size": gs}, |
102 | 101 | } |
103 | 102 | else: |
104 | | - # Fallback: store the raw algo name so downstream consumers can still inspect it. |
| 103 | + warnings.warn( |
| 104 | + f"Unsupported quantization algorithm '{quant_algo}' in " |
| 105 | + f"_quant_algo_to_group_config. The resulting config group will not contain " |
| 106 | + f"'input_activations' or 'weights' keys and may not be compatible with " |
| 107 | + f"compressed-tensors consumers. Please add explicit support for this algorithm." |
| 108 | + ) |
105 | 109 | return {"quant_algo": quant_algo} |
106 | 110 |
|
107 | 111 |
|
|
0 commit comments