Merge branch 'main' into transformers-v5-pr

sayakpaul · web-flow · commit a768ad1c5cce · 2026-02-20T10:49:30.000+05:30
diff --git a/.github/workflows/pr_modular_tests.yml b/.github/workflows/pr_modular_tests.yml
@@ -117,7 +117,7 @@ jobs:
 
     - name: Install dependencies
       run: |
-        uv pip install -e ".[quality,test]"
+        uv pip install -e ".[quality]"
         #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
         uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
         uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml
@@ -115,7 +115,7 @@ jobs:
 
     - name: Install dependencies
       run: |
-        uv pip install -e ".[quality,test]"
+        uv pip install -e ".[quality]"
         #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
         uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
         uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
@@ -192,7 +192,7 @@ jobs:
 
     - name: Install dependencies
       run: |
-        uv pip install -e ".[quality,test]"
+        uv pip install -e ".[quality]"
 
     - name: Environment
       run: |
@@ -245,7 +245,7 @@ jobs:
 
     - name: Install dependencies
       run: |
-        uv pip install -e ".[quality,test]"
+        uv pip install -e ".[quality]"
         # TODO (sayakpaul, DN6): revisit `--no-deps`
         uv pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
         uv pip install -U tokenizers
diff --git a/.github/workflows/push_tests_mps.yml b/.github/workflows/push_tests_mps.yml
@@ -41,7 +41,7 @@ jobs:
       shell: arch -arch arm64 bash {0}
       run: |
         ${CONDA_RUN} python -m pip install --upgrade pip uv
-        ${CONDA_RUN} python -m uv pip install -e ".[quality,test]"
+        ${CONDA_RUN} python -m uv pip install -e ".[quality]"
         ${CONDA_RUN} python -m uv pip install torch torchvision torchaudio
         ${CONDA_RUN} python -m uv pip install accelerate@git+https://github.com/huggingface/accelerate.git
         ${CONDA_RUN} python -m uv pip install transformers --upgrade
diff --git a/src/diffusers/quantizers/gguf/utils.py b/src/diffusers/quantizers/gguf/utils.py
@@ -516,6 +516,9 @@ def dequantize_gguf_tensor(tensor):
 
     block_size, type_size = GGML_QUANT_SIZES[quant_type]
 
+    # Conver to plain tensor to avoid unnecessary __torch_function__ overhead.
+    tensor = tensor.as_tensor()
+
     tensor = tensor.view(torch.uint8)
     shape = _quant_shape_from_byte_shape(tensor.shape, type_size, block_size)
 
@@ -525,7 +528,7 @@ def dequantize_gguf_tensor(tensor):
     dequant = dequant_fn(blocks, block_size, type_size)
     dequant = dequant.reshape(shape)
 
-    return dequant.as_tensor()
+    return dequant
 
 
 class GGUFParameter(torch.nn.Parameter):
diff --git a/tests/models/testing_utils/lora.py b/tests/models/testing_utils/lora.py
@@ -375,7 +375,7 @@ def _check_model_hotswap(
             # additionally check if dynamic compilation works.
             if different_shapes is not None:
                 for height, width in different_shapes:
-                    new_inputs_dict = self.prepare_dummy_input(height=height, width=width)
+                    new_inputs_dict = self.get_dummy_inputs(height=height, width=width)
                     _ = model(**new_inputs_dict)
             else:
                 output0_after = model(**inputs_dict)["sample"]
@@ -390,7 +390,7 @@ def _check_model_hotswap(
         with torch.inference_mode():
             if different_shapes is not None:
                 for height, width in different_shapes:
-                    new_inputs_dict = self.prepare_dummy_input(height=height, width=width)
+                    new_inputs_dict = self.get_dummy_inputs(height=height, width=width)
                     _ = model(**new_inputs_dict)
             else:
                 output1_after = model(**inputs_dict)["sample"]