1515// specific language governing permissions and limitations
1616// under the License.
1717
18+ #include < glog/logging.h>
1819#include < rapidjson/allocators.h>
1920#include < rapidjson/document.h>
2021#include < rapidjson/encodings.h>
@@ -140,45 +141,7 @@ rapidjson::Value* match_value(const std::vector<JsonPath>& parsed_paths, rapidjs
140141 const std::string& col = parsed_paths[i].key ;
141142 int index = parsed_paths[i].idx ;
142143 if (LIKELY (!col.empty ())) {
143- if (root->IsArray ()) {
144- array_obj = static_cast <rapidjson::Value*>(
145- mem_allocator.Malloc (sizeof (rapidjson::Value)));
146- array_obj->SetArray ();
147- bool is_null = true ;
148-
149- // if array ,loop the array,find out all Objects,then find the results from the objects
150- for (int j = 0 ; j < root->Size (); j++) {
151- rapidjson::Value* json_elem = &((*root)[j]);
152-
153- if (json_elem->IsArray () || json_elem->IsNull ()) {
154- continue ;
155- } else {
156- if (!json_elem->IsObject ()) {
157- continue ;
158- }
159- if (!json_elem->HasMember (col.c_str ())) {
160- if (is_insert_null) { // not found item, then insert a null object.
161- is_null = false ;
162- rapidjson::Value nullObject (rapidjson::kNullType );
163- array_obj->PushBack (nullObject, mem_allocator);
164- }
165- continue ;
166- }
167- rapidjson::Value* obj = &((*json_elem)[col.c_str ()]);
168- if (obj->IsArray ()) {
169- is_null = false ;
170- for (int k = 0 ; k < obj->Size (); k++) {
171- array_obj->PushBack ((*obj)[k], mem_allocator);
172- }
173- } else if (!obj->IsNull ()) {
174- is_null = false ;
175- array_obj->PushBack (*obj, mem_allocator);
176- }
177- }
178- }
179-
180- root = is_null ? &(array_obj->SetNull ()) : array_obj;
181- } else if (root->IsObject ()) {
144+ if (root->IsObject ()) {
182145 if (!root->HasMember (col.c_str ())) {
183146 return nullptr ;
184147 } else {
@@ -229,8 +192,17 @@ rapidjson::Value* get_json_object(std::string_view json_string, std::string_view
229192
230193 // Cannot use '\' as the last character, return NULL
231194 if (path_string.back () == ' \\ ' ) {
232- document->SetNull ();
233- return document;
195+ return nullptr ;
196+ }
197+
198+ std::string fixed_string;
199+ if (path_string.size () >= 2 && path_string[0 ] == ' $' && path_string[1 ] != ' .' ) {
200+ // Boost tokenizer requires explicit "." after "$" to correctly extract JSON path tokens.
201+ // Without this, expressions like "$[0].key" cannot be properly split.
202+ // This commit ensures a "." is automatically added after "$" to maintain consistent token parsing behavior.
203+ fixed_string = " $." ;
204+ fixed_string += path_string.substr (1 );
205+ path_string = fixed_string;
234206 }
235207
236208 try {
@@ -247,13 +219,13 @@ rapidjson::Value* get_json_object(std::string_view json_string, std::string_view
247219 }
248220 } catch (boost::escaped_list_error&) {
249221 // meet unknown escape sequence, example '$.name\k'
250- return document ;
222+ return nullptr ;
251223 }
252224
253225 parsed_paths = &tmp_parsed_paths;
254226
255227 if (!(*parsed_paths)[0 ].is_valid ) {
256- return document ;
228+ return nullptr ;
257229 }
258230
259231 if (UNLIKELY ((*parsed_paths).size () == 1 )) {
@@ -270,8 +242,7 @@ rapidjson::Value* get_json_object(std::string_view json_string, std::string_view
270242 if (UNLIKELY (document->HasParseError ())) {
271243 // VLOG_CRITICAL << "Error at offset " << document->GetErrorOffset() << ": "
272244 // << GetParseError_En(document->GetParseError());
273- document->SetNull ();
274- return document;
245+ return nullptr ;
275246 }
276247
277248 return match_value (*parsed_paths, document, document->GetAllocator ());
@@ -856,10 +827,10 @@ template <typename Name, bool remove_quotes>
856827struct FunctionJsonExtractImpl {
857828 static constexpr auto name = Name::name;
858829
859- static rapidjson::Value parse_json (const ColumnString* json_col, const ColumnString* path_col,
860- rapidjson::Document::AllocatorType& allocator ,
861- const size_t row, const size_t col,
862- std::vector<bool >& column_is_consts) {
830+ static std::pair< bool , rapidjson::Value> parse_json (
831+ const ColumnString* json_col, const ColumnString* path_col ,
832+ rapidjson::Document::AllocatorType& allocator, const size_t row, const size_t col,
833+ std::vector<bool >& column_is_consts) {
863834 rapidjson::Value value;
864835 rapidjson::Document document;
865836
@@ -868,10 +839,13 @@ struct FunctionJsonExtractImpl {
868839 const auto path = path_col->get_data_at (index_check_const (row, column_is_consts[col]));
869840 std::string_view path_string (path.data , path.size );
870841 auto * root = get_json_object<JSON_FUN_STRING>(json_string, path_string, &document);
842+ bool found = false ;
871843 if (root != nullptr ) {
844+ found = true ;
872845 value.CopyFrom (*root, allocator);
873846 }
874- return value;
847+
848+ return {found, std::move (value)};
875849 }
876850
877851 static rapidjson::Value* get_document (const ColumnString* path_col,
@@ -912,8 +886,8 @@ struct FunctionJsonExtractImpl {
912886 rapidjson::StringBuffer buf;
913887 rapidjson::Writer<rapidjson::StringBuffer> writer (buf);
914888 const auto * json_col = data_columns[0 ];
915- auto insert_result_lambda = [&](rapidjson::Value& value, size_t row) {
916- if (value. IsNull () ) {
889+ auto insert_result_lambda = [&](rapidjson::Value& value, bool is_null, size_t row) {
890+ if (is_null ) {
917891 null_map[row] = 1 ;
918892 result_column.insert_default ();
919893 } else {
@@ -934,12 +908,13 @@ struct FunctionJsonExtractImpl {
934908 }
935909 };
936910 if (data_columns.size () == 2 ) {
937- rapidjson::Value value;
938911 if (column_is_consts[1 ]) {
939912 std::vector<JsonPath> parsed_paths;
940913 auto * root = get_document (data_columns[1 ], &document, parsed_paths, 0 ,
941914 column_is_consts[1 ]);
942915 for (size_t row = 0 ; row < input_rows_count; row++) {
916+ bool is_null = false ;
917+ rapidjson::Value value;
943918 if (root != nullptr ) {
944919 const auto & obj = json_col->get_data_at (row);
945920 std::string_view json_string (obj.data , obj.size );
@@ -958,17 +933,18 @@ struct FunctionJsonExtractImpl {
958933 if (root_val != nullptr ) {
959934 value.CopyFrom (*root_val, allocator);
960935 } else {
961- rapidjson::Value tmp;
962- value.Swap (tmp);
936+ is_null = true ;
963937 }
938+ } else {
939+ is_null = true ;
964940 }
965- insert_result_lambda (value, row);
941+ insert_result_lambda (value, is_null, row);
966942 }
967943 } else {
968944 for (size_t row = 0 ; row < input_rows_count; row++) {
969- value = parse_json (json_col, data_columns[1 ], allocator, row, 1 ,
970- column_is_consts);
971- insert_result_lambda (value , row);
945+ auto result = parse_json (json_col, data_columns[1 ], allocator, row, 1 ,
946+ column_is_consts);
947+ insert_result_lambda (result. second , !result. first , row);
972948 }
973949 }
974950
@@ -978,12 +954,16 @@ struct FunctionJsonExtractImpl {
978954 value.Reserve (cast_set<rapidjson::SizeType>(data_columns.size () - 1 ), allocator);
979955 for (size_t row = 0 ; row < input_rows_count; row++) {
980956 value.Clear ();
957+ bool found_any = false ;
981958 for (size_t col = 1 ; col < data_columns.size (); ++col) {
982- value.PushBack (parse_json (json_col, data_columns[col], allocator, row, col,
983- column_is_consts),
984- allocator);
959+ auto result = parse_json (json_col, data_columns[col], allocator, row, col,
960+ column_is_consts);
961+ if (result.first ) {
962+ found_any = true ;
963+ value.PushBack (std::move (result.second ), allocator);
964+ }
985965 }
986- insert_result_lambda (value, row);
966+ insert_result_lambda (value, !found_any, row);
987967 }
988968 }
989969 }
0 commit comments