|
2 | 2 | from contextlib import contextmanager |
3 | 3 | from itertools import chain |
4 | 4 | from dataclasses import dataclass |
| 5 | +import os |
5 | 6 | import re |
6 | 7 | from typing import ( |
7 | 8 | Any, |
@@ -79,6 +80,22 @@ def check_not_found_error(errmsg: str) -> bool: |
79 | 80 | return new_error or old_error is not None |
80 | 81 |
|
81 | 82 |
|
| 83 | +def get_identifier_list_string(table_names: set[str]) -> str: |
| 84 | + """Returns "|".join(table_names) by default. |
| 85 | +
|
| 86 | + Returns "*" if DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS == "true" |
| 87 | + and the joined string exceeds 2048 characters |
| 88 | +
|
| 89 | + This is for AWS Glue Catalog users See issue #325. |
| 90 | + """ |
| 91 | + |
| 92 | + _identifier = "|".join(table_names) |
| 93 | + bypass_2048_char_limit = os.environ.get("DBT_DESCRIBE_TABLE_2048_CHAR_BYPASS", "false") |
| 94 | + if bypass_2048_char_limit == "true": |
| 95 | + _identifier = _identifier if len(_identifier) < 2048 else "*" |
| 96 | + return _identifier |
| 97 | + |
| 98 | + |
82 | 99 | @undefined_proof |
83 | 100 | class DatabricksAdapter(SparkAdapter): |
84 | 101 | Relation = DatabricksRelation |
@@ -448,11 +465,12 @@ def _get_one_catalog( |
448 | 465 | table_names.add(relation.identifier) |
449 | 466 |
|
450 | 467 | columns: List[Dict[str, Any]] = [] |
| 468 | + |
451 | 469 | if len(table_names) > 0: |
452 | 470 | schema_relation = self.Relation.create( |
453 | 471 | database=database, |
454 | 472 | schema=schema, |
455 | | - identifier="|".join(table_names), |
| 473 | + identifier=get_identifier_list_string(table_names), |
456 | 474 | quote_policy=self.config.quoting, |
457 | 475 | ) |
458 | 476 | for relation, information in self._list_relations_with_information(schema_relation): |
|
0 commit comments