Skip to content

Commit f1b7acd

Browse files
branch-2.1: [fix](tvf) support compressed json file for tvf and refactor code (#51983) (#53063)
bp: #51983
1 parent 2f7c15f commit f1b7acd

4 files changed

Lines changed: 27 additions & 1 deletion

File tree

be/src/vec/exec/format/json/new_json_reader.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,9 @@ Status NewJsonReader::get_columns(std::unordered_map<std::string, TypeDescriptor
248248
Status NewJsonReader::get_parsed_schema(std::vector<std::string>* col_names,
249249
std::vector<TypeDescriptor>* col_types) {
250250
RETURN_IF_ERROR(_get_range_params());
251-
251+
// create decompressor.
252+
// _decompressor may be nullptr if this is not a compressed file
253+
RETURN_IF_ERROR(Decompressor::create_decompressor(_file_compress_type, &_decompressor));
252254
RETURN_IF_ERROR(_open_file_reader(true));
253255
if (_read_json_by_line) {
254256
RETURN_IF_ERROR(_open_line_reader());

regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,20 @@
286286
8 chengdu 2345678
287287
9 xian 2345679
288288

289+
-- !json_compressed --
290+
1 beijing 2345671
291+
10 hefei 23456710
292+
11 \N 23456711
293+
12 hefei \N
294+
2 shanghai 2345672
295+
3 guangzhou 2345673
296+
4 shenzhen 2345674
297+
5 hangzhou 2345675
298+
6 nanjing 2345676
299+
7 wuhan 2345677
300+
8 chengdu 2345678
301+
9 xian 2345679
302+
289303
-- !json_limit1 --
290304
1 beijing 2345671
291305
10 hefei 23456710

regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,16 @@ suite("test_hdfs_tvf","external,hive,tvf,external_docker") {
143143
"strip_outer_array" = "false",
144144
"read_json_by_line" = "true") order by id; """
145145

146+
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/simple_object_json.json.gz"
147+
format = "json"
148+
qt_json_compressed """ select * from HDFS(
149+
"uri" = "${uri}",
150+
"hadoop.username" = "${hdfsUserName}",
151+
"format" = "${format}",
152+
"compress_type" = "GZ",
153+
"strip_outer_array" = "false",
154+
"read_json_by_line" = "true") order by id; """
155+
146156

147157
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/simple_object_json.json"
148158
format = "json"

0 commit comments

Comments
 (0)