pandas-dev · jorisvandenbossche · Apr 20, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1957,8 +1957,17 @@ def _consolidate_check(self) -> None:
             self._is_consolidated = True
             self._known_consolidated = True
             return
-        dtypes = [blk.dtype for blk in self.blocks if blk._can_consolidate]
-        self._is_consolidated = len(dtypes) == len(set(dtypes))
+        # Exit early on first duplicate dtype rather than collecting all dtypes
+        dtypes: set[DtypeObj] = set()
+        for blk in self.blocks:
+            if blk._can_consolidate:
+                dtype = blk.dtype
+                if dtype in dtypes:
+                    self._is_consolidated = False
+                    self._known_consolidated = True
+                    return
+                dtypes.add(dtype)
+        self._is_consolidated = True
         self._known_consolidated = True
 
     def _consolidate_inplace(self) -> None:
@@ -2374,17 +2383,18 @@ def raise_construction_error(
 # -----------------------------------------------------------------------
 
 
-def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, DtypeObj]:
+def _grouping_key(tup: tuple[int, ArrayLike]) -> Hashable:
     dtype = tup[1].dtype
 
-    if is_1d_only_ea_dtype(dtype):
-        # We know these won't be consolidated, so don't need to group these.
-        # This avoids expensive comparisons of CategoricalDtype objects
-        sep = id(dtype)
+    if isinstance(dtype, np.dtype):
+        # Only numpy dtypes get stacked into 2D blocks in _form_blocks,
+        # so only they need real grouping by dtype.
+        return dtype.name
     else:
-        sep = 0
-
-    return sep, dtype
+        # Extension dtypes each get their own block regardless, so grouping
+        # doesn't matter. Use id() to avoid potentially expensive __hash__
+        # (e.g. CategoricalDtype hashes all categories).
+        return id(dtype)
 
 
 def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list[Block]:
@@ -2396,11 +2406,18 @@ def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list
     # when consolidating, we can ignore refs (either stacking always copies,
     # or the EA is already copied in the calling dict_to_mgr)
 
-    # group by dtype
-    grouper = itertools.groupby(tuples, _grouping_func)
+    # group by dtype using a dict faster than old itertools.groupby
+    groups: dict[Hashable, list[tuple[int, ArrayLike]]] = {}
+    for tup in tuples:
+        key = _grouping_key(tup)
+        try:
+            groups[key].append(tup)
+        except KeyError:
+            groups[key] = [tup]
 
     nbs: list[Block] = []
-    for (_, dtype), tup_block in grouper:
+    for tup_block in groups.values():
+        dtype = tup_block[0][1].dtype
         block_type = get_block_type(dtype)
 
         if isinstance(dtype, np.dtype):
@@ -2487,19 +2504,19 @@ def _merge_blocks(
         new_values: ArrayLike
 
         if isinstance(blocks[0].dtype, np.dtype):
-            # error: List comprehension has incompatible type List[Union[ndarray,
-            # ExtensionArray]]; expected List[Union[complex, generic,
-            # Sequence[Union[int, float, complex, str, bytes, generic]],
-            # Sequence[Sequence[Any]], SupportsArray]]
-            new_values = np.vstack([b.values for b in blocks])  # type: ignore[misc]
+            # Use np.concatenate directly instead of np.vstack to avoid the
+            # overhead of atleast_2d calls (block values are always 2D)
+            new_values = np.concatenate([b.values for b in blocks], axis=0)
         else:
             bvals = [blk.values for blk in blocks]
             bvals2 = cast("Sequence[NDArrayBackedExtensionArray]", bvals)
             new_values = bvals2[0]._concat_same_type(bvals2, axis=0)
 
-        argsort = np.argsort(new_mgr_locs)
-        new_values = new_values[argsort]
-        new_mgr_locs = new_mgr_locs[argsort]
+        # Only sort if locations are not already in order
+        if not libalgos.is_monotonic(new_mgr_locs, False)[0]:
+            argsort = np.argsort(new_mgr_locs)
+            new_values = new_values[argsort]
+            new_mgr_locs = new_mgr_locs[argsort]
 
         bp = BlockPlacement(new_mgr_locs)
         return [new_block_2d(new_values, placement=bp)], True

diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py
@@ -15,14 +15,18 @@ def test_dataframe_not_equal(self):
         df2 = DataFrame({"a": ["s", "d"], "b": [1, 2]})
         assert df1.equals(df2) is False
 
-    def test_equals_different_blocks(self, using_infer_string):
+    def test_equals_different_blocks(self):
         # GH#9330
-        df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]})
-        df1 = df0.reset_index()[["A", "B", "C"]]
-        if not using_infer_string:
-            # this assert verifies that the above operations have
-            # induced a block rearrangement
-            assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype
+        df0 = DataFrame(
+            {"A": [1.0, 2.0], "B": np.array([1, 2], dtype=np.int64), "C": [3.0, 4.0]}
+        )
+        # build df1 via sequential __setitem__ so the float columns end up
+        # in separate blocks instead of being consolidated upfront
+        df1 = DataFrame({"A": [1.0, 2.0]})
+        df1["B"] = np.array([1, 2], dtype=np.int64)
+        df1["C"] = np.array([3.0, 4.0])
+        assert len(df0._mgr.blocks) == 2
+        assert len(df1._mgr.blocks) == 3
 
         # do the real tests
         tm.assert_frame_equal(df0, df1)