Skip to content

Commit 2cd7c45

Browse files
author
Jesse Whitehouse
committed
Re-implement fix from #326 behind an environment variable flag
Signed-off-by: Jesse Whitehouse <jesse.whitehouse@databricks.com>
1 parent 3a254ef commit 2cd7c45

3 files changed

Lines changed: 46 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
## dbt-databricks 1.6.x (Release TBD)
22

3+
### Features
4+
5+
- Follow up: re-implement fix for issue where the show tables extended command is limited to 2048 characters. ([#326](https://github.com/databricks/dbt-databricks/pull/326)). Set `DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS` to `true` to enable this behaviour.
6+
37
## dbt-databricks 1.6.1 (August 2, 2023)
48

59
### Fixes

dbt/adapters/databricks/impl.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from contextlib import contextmanager
33
from itertools import chain
44
from dataclasses import dataclass
5+
import os
56
import re
67
from typing import (
78
Any,
@@ -79,6 +80,22 @@ def check_not_found_error(errmsg: str) -> bool:
7980
return new_error or old_error is not None
8081

8182

83+
def get_identifier_list_string(table_names: set[str]) -> str:
84+
"""Returns "|".join(table_names) by default.
85+
86+
Returns "*" if DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS == "true"
87+
and the joined string exceeds 2048 characters
88+
89+
This is for AWS Glue Catalog users See issue #325.
90+
"""
91+
92+
_identifier = "|".join(table_names)
93+
bypass_2048_char_limit = os.environ.get("DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS", "false")
94+
if bypass_2048_char_limit == "true":
95+
_identifier = _identifier if len(_identifier) < 2048 else "*"
96+
return _identifier
97+
98+
8299
@undefined_proof
83100
class DatabricksAdapter(SparkAdapter):
84101
Relation = DatabricksRelation
@@ -448,11 +465,12 @@ def _get_one_catalog(
448465
table_names.add(relation.identifier)
449466

450467
columns: List[Dict[str, Any]] = []
468+
451469
if len(table_names) > 0:
452470
schema_relation = self.Relation.create(
453471
database=database,
454472
schema=schema,
455-
identifier="|".join(table_names),
473+
identifier=get_identifier_list_string(table_names),
456474
quote_policy=self.config.quoting,
457475
)
458476
for relation, information in self._list_relations_with_information(schema_relation):

tests/unit/test_adapter.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from dbt.adapters.databricks import __version__
99
from dbt.adapters.databricks import DatabricksAdapter, DatabricksRelation
1010
from dbt.adapters.databricks.impl import check_not_found_error
11+
from dbt.adapters.databricks.impl import get_identifier_list_string
1112
from dbt.adapters.databricks.connections import (
1213
CATALOG_KEY_IN_SESSION_PROPERTIES,
1314
DBT_DATABRICKS_INVOCATION_ENV,
@@ -947,6 +948,28 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel
947948
},
948949
)
949950

951+
def test_describe_table_extended_2048_char_limit(self):
952+
"""GIVEN a list of table_names whos total character length exceeds 2048 characters
953+
WHEN the environment variable DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS is "true"
954+
THEN the identifier list is replaced with "*"
955+
"""
956+
957+
table_names: set(str) = set([f"customers_{i}" for i in range(200)])
958+
959+
# By default, don't limit the number of characters
960+
self.assertEqual(get_identifier_list_string(table_names), "|".join(table_names))
961+
962+
# If environment variable is set, then limit the number of characters
963+
with mock.patch.dict("os.environ", **{"DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS": "true"}):
964+
965+
# Long list of table names is capped
966+
self.assertEqual(get_identifier_list_string(table_names), "*")
967+
968+
# Short list of table names is not capped
969+
self.assertEqual(
970+
get_identifier_list_string(list(table_names)[:5]), "|".join(list(table_names)[:5])
971+
)
972+
950973

951974
class TestCheckNotFound(unittest.TestCase):
952975
def test_prefix(self):

0 commit comments

Comments
 (0)