3434except ImportError : # pragma: NO COVER
3535 pyarrow = None
3636
37+ try :
38+ import db_dtypes # type: ignore
39+ except ImportError : # pragma: NO COVER
40+ db_dtypes = None
41+
3742try :
3843 import geopandas # type: ignore
3944except ImportError :
8893
8994_TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"'
9095
96+ _NO_SUPPORTED_DTYPE = (
97+ "The dtype cannot to be converted to a pandas ExtensionArray "
98+ "because the necessary `__from_arrow__` attribute is missing."
99+ )
100+
91101
92102def _reference_getter (table ):
93103 """A :class:`~google.cloud.bigquery.table.TableReference` pointing to
@@ -1920,6 +1930,10 @@ def to_dataframe(
19201930 progress_bar_type : str = None ,
19211931 create_bqstorage_client : bool = True ,
19221932 geography_as_object : bool = False ,
1933+ bool_dtype : Union [Any , None ] = pandas .BooleanDtype (),
1934+ int_dtype : Union [Any , None ] = pandas .Int64Dtype (),
1935+ float_dtype : Union [Any , None ] = None ,
1936+ string_dtype : Union [Any , None ] = None ,
19231937 ) -> "pandas.DataFrame" :
19241938 """Create a pandas DataFrame by loading all pages of a query.
19251939
@@ -1958,6 +1972,7 @@ def to_dataframe(
19581972 progress bar as a graphical dialog box.
19591973
19601974 .. versionadded:: 1.11.0
1975+
19611976 create_bqstorage_client (Optional[bool]):
19621977 If ``True`` (default), create a BigQuery Storage API client
19631978 using the default API settings. The BigQuery Storage API
@@ -1975,6 +1990,46 @@ def to_dataframe(
19751990
19761991 .. versionadded:: 2.24.0
19771992
1993+ bool_dtype (Optional[pandas.Series.dtype, None]):
1994+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)
1995+ to convert BigQuery Boolean type, instead of relying on the default
1996+ ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,
1997+ then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean
1998+ type can be found at:
1999+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type
2000+
2001+ .. versionadded:: 3.7.1
2002+
2003+ int_dtype (Optional[pandas.Series.dtype, None]):
2004+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)
2005+ to convert BigQuery Integer types, instead of relying on the default
2006+ ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,
2007+ then the data type will be ``numpy.dtype("int64")``. A list of BigQuery
2008+ Integer types can be found at:
2009+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
2010+
2011+ .. versionadded:: 3.7.1
2012+
2013+ float_dtype (Optional[pandas.Series.dtype, None]):
2014+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)
2015+ to convert BigQuery Float type, instead of relying on the default
2016+ ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,
2017+ then the data type will be ``numpy.dtype("float64")``. BigQuery Float
2018+ type can be found at:
2019+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
2020+
2021+ .. versionadded:: 3.7.1
2022+
2023+ string_dtype (Optional[pandas.Series.dtype, None]):
2024+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to
2025+ convert BigQuery String type, instead of relying on the default
2026+ ``numpy.dtype("object")``. If you explicitly set the value to ``None``,
2027+ then the data type will be ``numpy.dtype("object")``. BigQuery String
2028+ type can be found at:
2029+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type
2030+
2031+ .. versionadded:: 3.7.1
2032+
19782033 Returns:
19792034 pandas.DataFrame:
19802035 A :class:`~pandas.DataFrame` populated with row data and column
@@ -1987,14 +2042,28 @@ def to_dataframe(
19872042 the :mod:`google.cloud.bigquery_storage_v1` module is
19882043 required but cannot be imported. Also if
19892044 `geography_as_object` is `True`, but the
1990- :mod:`shapely` library cannot be imported.
2045+ :mod:`shapely` library cannot be imported. Also if
2046+ `bool_dtype`, `int_dtype` or other dtype parameters
2047+ is not supported dtype.
19912048
19922049 """
19932050 _pandas_helpers .verify_pandas_imports ()
19942051
19952052 if geography_as_object and shapely is None :
19962053 raise ValueError (_NO_SHAPELY_ERROR )
19972054
2055+ if bool_dtype is not None and not hasattr (bool_dtype , "__from_arrow__" ):
2056+ raise ValueError ("bool_dtype" , _NO_SUPPORTED_DTYPE )
2057+
2058+ if int_dtype is not None and not hasattr (int_dtype , "__from_arrow__" ):
2059+ raise ValueError ("int_dtype" , _NO_SUPPORTED_DTYPE )
2060+
2061+ if float_dtype is not None and not hasattr (float_dtype , "__from_arrow__" ):
2062+ raise ValueError ("float_dtype" , _NO_SUPPORTED_DTYPE )
2063+
2064+ if string_dtype is not None and not hasattr (string_dtype , "__from_arrow__" ):
2065+ raise ValueError ("string_dtype" , _NO_SUPPORTED_DTYPE )
2066+
19982067 if dtypes is None :
19992068 dtypes = {}
20002069
@@ -2019,15 +2088,15 @@ def to_dataframe(
20192088 for col in record_batch
20202089 # Type can be date32 or date64 (plus units).
20212090 # See: https://arrow.apache.org/docs/python/api/datatypes.html
2022- if str (col .type ). startswith ( "date" )
2091+ if pyarrow . types . is_date (col .type )
20232092 )
20242093
20252094 timestamp_as_object = not all (
20262095 self .__can_cast_timestamp_ns (col )
20272096 for col in record_batch
2028- # Type can be timestamp (plus units and time zone).
2097+ # Type can be datetime and timestamp (plus units and time zone).
20292098 # See: https://arrow.apache.org/docs/python/api/datatypes.html
2030- if str (col .type ). startswith ( "timestamp" )
2099+ if pyarrow . types . is_timestamp (col .type )
20312100 )
20322101
20332102 if len (record_batch ) > 0 :
@@ -2036,7 +2105,11 @@ def to_dataframe(
20362105 timestamp_as_object = timestamp_as_object ,
20372106 integer_object_nulls = True ,
20382107 types_mapper = _pandas_helpers .default_types_mapper (
2039- date_as_object = date_as_object
2108+ date_as_object = date_as_object ,
2109+ bool_dtype = bool_dtype ,
2110+ int_dtype = int_dtype ,
2111+ float_dtype = float_dtype ,
2112+ string_dtype = string_dtype ,
20402113 ),
20412114 )
20422115 else :
@@ -2233,6 +2306,10 @@ def to_dataframe(
22332306 progress_bar_type = None ,
22342307 create_bqstorage_client = True ,
22352308 geography_as_object = False ,
2309+ bool_dtype = None ,
2310+ int_dtype = None ,
2311+ float_dtype = None ,
2312+ string_dtype = None ,
22362313 ) -> "pandas.DataFrame" :
22372314 """Create an empty dataframe.
22382315
@@ -2241,6 +2318,11 @@ def to_dataframe(
22412318 dtypes (Any): Ignored. Added for compatibility with RowIterator.
22422319 progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
22432320 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
2321+ geography_as_object (bool): Ignored. Added for compatibility with RowIterator.
2322+ bool_dtype (Any): Ignored. Added for compatibility with RowIterator.
2323+ int_dtype (Any): Ignored. Added for compatibility with RowIterator.
2324+ float_dtype (Any): Ignored. Added for compatibility with RowIterator.
2325+ string_dtype (Any): Ignored. Added for compatibility with RowIterator.
22442326
22452327 Returns:
22462328 pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
0 commit comments