1515// specific language governing permissions and limitations
1616// under the License.
1717
18+ #include < glog/logging.h>
1819#include < rapidjson/allocators.h>
1920#include < rapidjson/document.h>
2021#include < rapidjson/encodings.h>
@@ -144,45 +145,7 @@ rapidjson::Value* match_value(const std::vector<JsonPath>& parsed_paths, rapidjs
144145 const std::string& col = parsed_paths[i].key ;
145146 int index = parsed_paths[i].idx ;
146147 if (LIKELY (!col.empty ())) {
147- if (root->IsArray ()) {
148- array_obj = static_cast <rapidjson::Value*>(
149- mem_allocator.Malloc (sizeof (rapidjson::Value)));
150- array_obj->SetArray ();
151- bool is_null = true ;
152-
153- // if array ,loop the array,find out all Objects,then find the results from the objects
154- for (int j = 0 ; j < root->Size (); j++) {
155- rapidjson::Value* json_elem = &((*root)[j]);
156-
157- if (json_elem->IsArray () || json_elem->IsNull ()) {
158- continue ;
159- } else {
160- if (!json_elem->IsObject ()) {
161- continue ;
162- }
163- if (!json_elem->HasMember (col.c_str ())) {
164- if (is_insert_null) { // not found item, then insert a null object.
165- is_null = false ;
166- rapidjson::Value nullObject (rapidjson::kNullType );
167- array_obj->PushBack (nullObject, mem_allocator);
168- }
169- continue ;
170- }
171- rapidjson::Value* obj = &((*json_elem)[col.c_str ()]);
172- if (obj->IsArray ()) {
173- is_null = false ;
174- for (int k = 0 ; k < obj->Size (); k++) {
175- array_obj->PushBack ((*obj)[k], mem_allocator);
176- }
177- } else if (!obj->IsNull ()) {
178- is_null = false ;
179- array_obj->PushBack (*obj, mem_allocator);
180- }
181- }
182- }
183-
184- root = is_null ? &(array_obj->SetNull ()) : array_obj;
185- } else if (root->IsObject ()) {
148+ if (root->IsObject ()) {
186149 if (!root->HasMember (col.c_str ())) {
187150 return nullptr ;
188151 } else {
@@ -233,8 +196,17 @@ rapidjson::Value* get_json_object(std::string_view json_string, std::string_view
233196
234197 // Cannot use '\' as the last character, return NULL
235198 if (path_string.back () == ' \\ ' ) {
236- document->SetNull ();
237- return document;
199+ return nullptr ;
200+ }
201+
202+ std::string fixed_string;
203+ if (path_string.size () >= 2 && path_string[0 ] == ' $' && path_string[1 ] != ' .' ) {
204+ // Boost tokenizer requires explicit "." after "$" to correctly extract JSON path tokens.
205+ // Without this, expressions like "$[0].key" cannot be properly split.
206+ // This commit ensures a "." is automatically added after "$" to maintain consistent token parsing behavior.
207+ fixed_string = " $." ;
208+ fixed_string += path_string.substr (1 );
209+ path_string = fixed_string;
238210 }
239211
240212 try {
@@ -251,13 +223,13 @@ rapidjson::Value* get_json_object(std::string_view json_string, std::string_view
251223 }
252224 } catch (boost::escaped_list_error&) {
253225 // meet unknown escape sequence, example '$.name\k'
254- return document ;
226+ return nullptr ;
255227 }
256228
257229 parsed_paths = &tmp_parsed_paths;
258230
259231 if (!(*parsed_paths)[0 ].is_valid ) {
260- return document ;
232+ return nullptr ;
261233 }
262234
263235 if (UNLIKELY ((*parsed_paths).size () == 1 )) {
@@ -272,8 +244,7 @@ rapidjson::Value* get_json_object(std::string_view json_string, std::string_view
272244 if (UNLIKELY (document->HasParseError ())) {
273245 // VLOG_CRITICAL << "Error at offset " << document->GetErrorOffset() << ": "
274246 // << GetParseError_En(document->GetParseError());
275- document->SetNull ();
276- return document;
247+ return nullptr ;
277248 }
278249
279250 return match_value (*parsed_paths, document, document->GetAllocator ());
@@ -858,9 +829,10 @@ template <typename Name, bool remove_quotes>
858829struct FunctionJsonExtractImpl {
859830 static constexpr auto name = Name::name;
860831
861- static rapidjson::Value parse_json (const ColumnString* json_col, const ColumnString* path_col,
862- rapidjson::Document::AllocatorType& allocator, const int row,
863- const int col, std::vector<bool >& column_is_consts) {
832+ static std::pair<bool , rapidjson::Value> parse_json (
833+ const ColumnString* json_col, const ColumnString* path_col,
834+ rapidjson::Document::AllocatorType& allocator, const int row, const int col,
835+ std::vector<bool >& column_is_consts) {
864836 rapidjson::Value value;
865837 rapidjson::Document document;
866838
@@ -869,10 +841,13 @@ struct FunctionJsonExtractImpl {
869841 const auto path = path_col->get_data_at (index_check_const (row, column_is_consts[col]));
870842 std::string_view path_string (path.data , path.size );
871843 auto * root = get_json_object<JSON_FUN_STRING>(json_string, path_string, &document);
844+ bool found = false ;
872845 if (root != nullptr ) {
846+ found = true ;
873847 value.CopyFrom (*root, allocator);
874848 }
875- return value;
849+
850+ return {found, std::move (value)};
876851 }
877852
878853 static rapidjson::Value* get_document (const ColumnString* path_col,
@@ -913,8 +888,9 @@ struct FunctionJsonExtractImpl {
913888 rapidjson::StringBuffer buf;
914889 rapidjson::Writer<rapidjson::StringBuffer> writer (buf);
915890 const auto * json_col = data_columns[0 ];
916- auto insert_result_lambda = [&](rapidjson::Value& value, int row) {
917- if (value.IsNull ()) {
891+
892+ auto insert_result_lambda = [&](rapidjson::Value& value, bool is_null, int row) {
893+ if (is_null) {
918894 null_map[row] = 1 ;
919895 result_column.insert_default ();
920896 } else {
@@ -935,12 +911,13 @@ struct FunctionJsonExtractImpl {
935911 }
936912 };
937913 if (data_columns.size () == 2 ) {
938- rapidjson::Value value;
939914 if (column_is_consts[1 ]) {
940915 std::vector<JsonPath> parsed_paths;
941916 auto * root = get_document (data_columns[1 ], &document, parsed_paths, 0 ,
942917 column_is_consts[1 ]);
943918 for (size_t row = 0 ; row < input_rows_count; row++) {
919+ bool is_null = false ;
920+ rapidjson::Value value;
944921 if (root != nullptr ) {
945922 const auto & obj = json_col->get_data_at (row);
946923 std::string_view json_string (obj.data , obj.size );
@@ -957,17 +934,18 @@ struct FunctionJsonExtractImpl {
957934 if (root_val != nullptr ) {
958935 value.CopyFrom (*root_val, allocator);
959936 } else {
960- rapidjson::Value tmp;
961- value.Swap (tmp);
937+ is_null = true ;
962938 }
939+ } else {
940+ is_null = true ;
963941 }
964- insert_result_lambda (value, row);
942+ insert_result_lambda (value, is_null, row);
965943 }
966944 } else {
967945 for (size_t row = 0 ; row < input_rows_count; row++) {
968- value = parse_json (json_col, data_columns[1 ], allocator, row, 1 ,
969- column_is_consts);
970- insert_result_lambda (value , row);
946+ auto result = parse_json (json_col, data_columns[1 ], allocator, row, 1 ,
947+ column_is_consts);
948+ insert_result_lambda (result. second , !result. first , row);
971949 }
972950 }
973951
@@ -977,12 +955,16 @@ struct FunctionJsonExtractImpl {
977955 value.Reserve (data_columns.size () - 1 , allocator);
978956 for (size_t row = 0 ; row < input_rows_count; row++) {
979957 value.Clear ();
958+ bool found_any = false ;
980959 for (size_t col = 1 ; col < data_columns.size (); ++col) {
981- value.PushBack (parse_json (json_col, data_columns[col], allocator, row, col,
982- column_is_consts),
983- allocator);
960+ auto result = parse_json (json_col, data_columns[col], allocator, row, col,
961+ column_is_consts);
962+ if (result.first ) {
963+ found_any = true ;
964+ value.PushBack (std::move (result.second ), allocator);
965+ }
984966 }
985- insert_result_lambda (value, row);
967+ insert_result_lambda (value, !found_any, row);
986968 }
987969 }
988970 }
0 commit comments