From 584b04f9ac710297a7d40a8b119bec97923f90e4 Mon Sep 17 00:00:00 2001 From: ncclementi Date: Tue, 14 Dec 2021 16:46:35 -0500 Subject: [PATCH 1/3] remove pandas-gbq from testing --- ci/environment-3.7.yaml | 1 - ci/environment-3.8.yaml | 1 - ci/environment-3.9.yaml | 1 - dask_bigquery/tests/test_core.py | 31 +++++++++++++++++++++++-------- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/ci/environment-3.7.yaml b/ci/environment-3.7.yaml index e660f12..a2211bd 100644 --- a/ci/environment-3.7.yaml +++ b/ci/environment-3.7.yaml @@ -12,6 +12,5 @@ dependencies: # Including grpcio-status as a temporary workaround for # https://github.com/googleapis/python-api-core/issues/301 - grpcio-status - - pandas-gbq<=0.15 - google-cloud-bigquery>=2.11.0 - google-cloud-bigquery-storage \ No newline at end of file diff --git a/ci/environment-3.8.yaml b/ci/environment-3.8.yaml index a5756cd..6b69756 100644 --- a/ci/environment-3.8.yaml +++ b/ci/environment-3.8.yaml @@ -12,6 +12,5 @@ dependencies: # Including grpcio-status as a temporary workaround for # https://github.com/googleapis/python-api-core/issues/301 - grpcio-status - - pandas-gbq<=0.15 - google-cloud-bigquery>=2.11.0 - google-cloud-bigquery-storage \ No newline at end of file diff --git a/ci/environment-3.9.yaml b/ci/environment-3.9.yaml index 77a6ce3..801c67c 100644 --- a/ci/environment-3.9.yaml +++ b/ci/environment-3.9.yaml @@ -12,6 +12,5 @@ dependencies: # Including grpcio-status as a temporary workaround for # https://github.com/googleapis/python-api-core/issues/301 - grpcio-status - - pandas-gbq<=0.15 - google-cloud-bigquery>=2.11.0 - google-cloud-bigquery-storage \ No newline at end of file diff --git a/dask_bigquery/tests/test_core.py b/dask_bigquery/tests/test_core.py index 82b922b..831485e 100644 --- a/dask_bigquery/tests/test_core.py +++ b/dask_bigquery/tests/test_core.py @@ -1,6 +1,7 @@ import os import random import uuid +from datetime import datetime, timedelta import google.auth import pandas as pd @@ -19,6 +20,7 @@ def df(): { "name": random.choice(["fred", "wilma", "barney", "betty"]), "number": random.randint(0, 100), + "timestamp": datetime.now() - timedelta(days=i % 2), "idx": i, } for i in range(10) @@ -35,13 +37,26 @@ def dataset(df): dataset_id = uuid.uuid4().hex table_id = "table_test" # push data to gbq - pd.DataFrame.to_gbq( - df, - destination_table=f"{dataset_id}.{table_id}", - project_id=project_id, - chunksize=5, - if_exists="append", + + time_partitioning = bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="timestamp", + ) # field to use for partitioning + + job_config = bigquery.LoadJobConfig( + write_disposition="WRITE_TRUNCATE", time_partitioning=time_partitioning ) + + with bigquery.Client() as bq_client: + dataset = bigquery.Dataset(f"{project_id}.{dataset_id}") + bq_client.create_dataset(dataset) + job = bq_client.load_table_from_dataframe( + df, + destination=f"{project_id}.{dataset_id}.{table_id}", + job_config=job_config, + ) # Make an API request. + job.result() + yield (project_id, dataset_id, table_id) with bigquery.Client() as bq_client: @@ -55,7 +70,7 @@ def test_read_gbq(df, dataset, client): project_id, dataset_id, table_id = dataset ddf = read_gbq(project_id=project_id, dataset_id=dataset_id, table_id=table_id) - assert list(ddf.columns) == ["name", "number", "idx"] + assert list(ddf.columns) == ["name", "number", "timestamp", "idx"] assert ddf.npartitions == 2 assert assert_eq(ddf.set_index("idx"), df.set_index("idx")) @@ -69,7 +84,7 @@ def test_read_row_filter(df, dataset, client): row_filter="idx < 5", ) - assert list(ddf.columns) == ["name", "number", "idx"] + assert list(ddf.columns) == ["name", "number", "timestamp", "idx"] assert ddf.npartitions == 2 assert assert_eq(ddf.set_index("idx").loc[:4], df.set_index("idx").loc[:4]) From 8615837e1a5eb28a096489b85d1d102d33395f2e Mon Sep 17 00:00:00 2001 From: ncclementi Date: Tue, 14 Dec 2021 16:59:55 -0500 Subject: [PATCH 2/3] make local df utc aware --- dask_bigquery/tests/test_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dask_bigquery/tests/test_core.py b/dask_bigquery/tests/test_core.py index 831485e..5155cc6 100644 --- a/dask_bigquery/tests/test_core.py +++ b/dask_bigquery/tests/test_core.py @@ -1,7 +1,7 @@ import os import random import uuid -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone import google.auth import pandas as pd @@ -20,7 +20,7 @@ def df(): { "name": random.choice(["fred", "wilma", "barney", "betty"]), "number": random.randint(0, 100), - "timestamp": datetime.now() - timedelta(days=i % 2), + "timestamp": datetime.now(timezone.utc) - timedelta(days=i % 2), "idx": i, } for i in range(10) From 0940a63b1496a9761b5ce9fb4e5b99053e03c5b8 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Wed, 15 Dec 2021 13:22:31 -0600 Subject: [PATCH 3/3] Trigger CI