Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dbms/src/Flash/Coprocessor/DAGUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ const std::unordered_map<tipb::ScalarFuncSig, String> scalar_func_map({
//{tipb::ScalarFuncSig::JsonReplaceSig, "cast"},
//{tipb::ScalarFuncSig::JsonRemoveSig, "cast"},
//{tipb::ScalarFuncSig::JsonMergeSig, "cast"},
//{tipb::ScalarFuncSig::JsonObjectSig, "cast"},
{tipb::ScalarFuncSig::JsonObjectSig, "json_object"},
{tipb::ScalarFuncSig::JsonArraySig, "json_array"},
{tipb::ScalarFuncSig::JsonValidJsonSig, "json_valid_json"},
{tipb::ScalarFuncSig::JsonValidOthersSig, "json_valid_others"},
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Functions/FunctionsJson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ void registerFunctionsJson(FunctionFactory & factory)
factory.registerFunction<FunctionCastJsonAsString>();
factory.registerFunction<FunctionJsonLength>();
factory.registerFunction<FunctionJsonArray>();
factory.registerFunction<FunctionJsonObject>();
factory.registerFunction<FunctionCastJsonAsJson>();
factory.registerFunction<FunctionCastRealAsJson>();
factory.registerFunction<FunctionCastDecimalAsJson>();
Expand Down
206 changes: 206 additions & 0 deletions dbms/src/Functions/FunctionsJson.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@
#include <simdjson.h>
#include <tipb/expression.pb.h>

#include <algorithm>
#include <ext/range.h>
#include <limits>
#include <magic_enum.hpp>
#include <string_view>
#include <type_traits>
Expand All @@ -63,6 +65,8 @@ namespace DB

namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
extern const int UNKNOWN_TYPE;
} // namespace ErrorCodes
Expand Down Expand Up @@ -976,6 +980,208 @@ class FunctionJsonArray : public IFunction
};


class FunctionJsonObject : public IFunction
{
public:
static constexpr auto name = "json_object";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionJsonObject>(); }

String getName() const override { return name; }

size_t getNumberOfArguments() const override { return 0; }

bool isVariadic() const override { return true; }

bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (unlikely(arguments.size() % 2 != 0))
{
throw Exception(
fmt::format("Incorrect parameter count in the call to native function '{}'", getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
for (const auto arg_idx : ext::range(0, arguments.size()))
{
if (arg_idx % 2 == 0 && arguments[arg_idx]->onlyNull())
throw Exception("JSON documents may not contain NULL member names.", ErrorCodes::BAD_ARGUMENTS);

if (!arguments[arg_idx]->onlyNull())
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When arg_idx % 2 == 0 and arguments[arg_idx]->onlyNull() == true, I think we can throw exception here as JSON documents may not contain NULL member names.

{
const auto * arg = removeNullable(arguments[arg_idx]).get();
if (!arg->isStringOrFixedString())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument {} of function {}",
arg->getName(),
arg_idx + 1,
getName());
}
}
return std::make_shared<DataTypeString>();
}

void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
if (arguments.empty())
{
// clang-format off
const UInt8 empty_object_json_value[] = {
JsonBinary::TYPE_CODE_OBJECT, // object_type
0x0, 0x0, 0x0, 0x0, // element_count
0x8, 0x0, 0x0, 0x0}; // total_size
// clang-format on
auto empty_object_json = ColumnString::create();
empty_object_json->insertData(
reinterpret_cast<const char *>(empty_object_json_value),
sizeof(empty_object_json_value) / sizeof(UInt8));
block.getByPosition(result).column = ColumnConst::create(std::move(empty_object_json), block.rows());
return;
}

auto nested_block = createBlockWithNestedColumns(block, arguments);
StringSources sources;
for (auto column_number : arguments)
{
sources.push_back(
block.getByPosition(column_number).column->onlyNull()
? nullptr
: createDynamicStringSource(*nested_block.getByPosition(column_number).column));
}

auto rows = block.rows();
auto col_to = ColumnString::create();
auto & data_to = col_to->getChars();
auto & offsets_to = col_to->getOffsets();
offsets_to.resize(rows);

std::vector<const NullMap *> nullmaps;
nullmaps.reserve(sources.size());
bool is_input_nullable = false;
for (auto column_number : arguments)
{
const auto & col = block.getByPosition(column_number).column;
if (col->isColumnNullable())
{
const auto & column_nullable = static_cast<const ColumnNullable &>(*col);
nullmaps.push_back(&(column_nullable.getNullMapData()));
is_input_nullable = true;
}
else
{
nullmaps.push_back(nullptr);
}
}

if (is_input_nullable)
doExecuteImpl<true>(sources, rows, data_to, offsets_to, nullmaps);
else
doExecuteImpl<false>(sources, rows, data_to, offsets_to, nullmaps);

block.getByPosition(result).column = std::move(col_to);
}

private:
template <bool is_input_nullable>
static void doExecuteImpl(
StringSources & sources,
size_t rows,
ColumnString::Chars_t & data_to,
ColumnString::Offsets & offsets_to,
const std::vector<const NullMap *> & nullmaps)
{
struct JsonObjectEntry
{
StringRef key;
JsonBinary value;
size_t input_order;
};

const size_t pair_count = sources.size() / 2;
size_t reserve_size = rows * (1 + pair_count * 16);
for (const auto & source : sources)
reserve_size += source ? source->getSizeForReserve() : rows;
JsonBinary::JsonBinaryWriteBuffer write_buffer(data_to, reserve_size);

std::vector<JsonObjectEntry> entries;
std::vector<StringRef> keys;
std::vector<JsonBinary> values;
entries.reserve(pair_count);
keys.reserve(pair_count);
values.reserve(pair_count);

for (size_t i = 0; i < rows; ++i)
{
entries.clear();
for (size_t col = 0; col < sources.size(); col += 2)
{
if constexpr (is_input_nullable)
{
const auto * key_nullmap = nullmaps[col];
if (!sources[col] || (key_nullmap && (*key_nullmap)[i]))
throw Exception("JSON documents may not contain NULL member names.", ErrorCodes::BAD_ARGUMENTS);
}

assert(sources[col]);
const auto & key_from = sources[col]->getWhole();
if (unlikely(key_from.size > std::numeric_limits<UInt16>::max()))
throw Exception(
"TiDB/TiFlash does not yet support JSON objects with the key length >= 65536",
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
StringRef key{key_from.data, key_from.size};

JsonBinary value(JsonBinary::TYPE_CODE_LITERAL, StringRef(&JsonBinary::LITERAL_NIL, 1));
if constexpr (is_input_nullable)
{
const auto * value_nullmap = nullmaps[col + 1];
if (sources[col + 1] && !(value_nullmap && (*value_nullmap)[i]))
{
const auto & data_from = sources[col + 1]->getWhole();
value = JsonBinary(data_from.data[0], StringRef(&data_from.data[1], data_from.size - 1));
}
}
else
{
assert(sources[col + 1]);
const auto & data_from = sources[col + 1]->getWhole();
value = JsonBinary(data_from.data[0], StringRef(&data_from.data[1], data_from.size - 1));
}

entries.push_back({key, value, col >> 1});
}

std::sort(entries.begin(), entries.end(), [](const auto & lhs, const auto & rhs) {
return lhs.key == rhs.key ? lhs.input_order < rhs.input_order : lhs.key < rhs.key;
});

keys.clear();
values.clear();
for (size_t entry_idx = 0; entry_idx < entries.size();)
{
size_t last_idx = entry_idx;
while (last_idx + 1 < entries.size() && entries[last_idx + 1].key == entries[entry_idx].key)
++last_idx;

keys.push_back(entries[last_idx].key);
values.push_back(entries[last_idx].value);
entry_idx = last_idx + 1;
}

JsonBinary::buildBinaryJsonObjectInBuffer(keys, values, write_buffer);
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();

for (const auto & source : sources)
{
if (source)
source->next();
}
}
}
};


class FunctionCastJsonAsJson : public IFunction
{
public:
Expand Down
131 changes: 131 additions & 0 deletions dbms/src/Functions/tests/gtest_json_object.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Copyright 2026 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <TestUtils/FunctionTestUtils.h>
#include <TestUtils/TiFlashTestBasic.h>
#include <TiDB/Schema/TiDBTypes.h>
#include <gtest/gtest.h>

#include <limits>

namespace DB::ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int BAD_ARGUMENTS;
} // namespace DB::ErrorCodes

namespace DB::tests
{
class TestJsonObject : public DB::tests::FunctionTest
{
public:
ColumnWithTypeAndName castStringToJson(const ColumnWithTypeAndName & column)
{
assert(removeNullable(column.type)->isString());
ColumnsWithTypeAndName inputs{column};
return executeFunction("cast_string_as_json", inputs, nullptr, true);
}

ColumnWithTypeAndName executeFunctionWithCast(
const ColumnNumbers & argument_column_numbers,
const ColumnsWithTypeAndName & columns)
{
auto json_column = executeFunction("json_object", argument_column_numbers, columns);
tipb::FieldType field_type;
field_type.set_flen(-1);
field_type.set_collate(TiDB::ITiDBCollator::BINARY);
field_type.set_tp(TiDB::TypeString);
return executeCastJsonAsStringFunction(json_column, field_type);
}
};

template <typename Fn>
void assertThrowsCode(Fn && fn, int expected_code)
{
try
{
fn();
FAIL() << "Expected DB::Exception to be thrown";
}
catch (const Exception & e)
{
ASSERT_EQ(expected_code, e.code()) << e.displayText();
}
}

TEST_F(TestJsonObject, TestBasicSemantics)
try
{
constexpr size_t rows_count = 2;

{
ColumnsWithTypeAndName inputs{createColumn<String>({"placeholder", "placeholder"})};
auto res = executeFunctionWithCast({}, inputs);
ASSERT_COLUMN_EQ(createConstColumn<Nullable<String>>(rows_count, "{}"), res);
}

{
ColumnsWithTypeAndName inputs{
createColumn<String>({"b", "b"}),
castStringToJson(createColumn<String>({"1", "1"})),
createColumn<String>({"a", "a"}),
castStringToJson(createColumn<Nullable<String>>({{}, "\"x\""})),
};
auto res = executeFunctionWithCast({0, 1, 2, 3}, inputs);
auto expect = createColumn<Nullable<String>>({R"({"a": null, "b": 1})", R"({"a": "x", "b": 1})"});
ASSERT_COLUMN_EQ(expect, res);
}

{
ColumnsWithTypeAndName inputs{
createConstColumn<String>(rows_count, "dup"),
castStringToJson(createConstColumn<String>(rows_count, "1")),
createConstColumn<String>(rows_count, "dup"),
castStringToJson(createColumn<String>({"2", "3"})),
};
auto res = executeFunctionWithCast({0, 1, 2, 3}, inputs);
auto expect = createColumn<Nullable<String>>({R"({"dup": 2})", R"({"dup": 3})"});
ASSERT_COLUMN_EQ(expect, res);
}
}
CATCH

TEST_F(TestJsonObject, TestErrors)
try
{
ASSERT_THROW(executeFunction("json_object", {createColumn<String>({"a"})}), Exception);

auto value = castStringToJson(createColumn<String>({"1"}));
assertThrowsCode(
[&] {
executeFunction("json_object", {createOnlyNullColumn(1), value});
},
ErrorCodes::BAD_ARGUMENTS);

assertThrowsCode(
[&] {
executeFunction("json_object", {createColumn<Nullable<String>>({{}}), value});
},
ErrorCodes::BAD_ARGUMENTS);

String too_long_key(std::numeric_limits<UInt16>::max() + 1, 'a');
assertThrowsCode(
[&] {
executeFunction("json_object", {createColumn<String>({too_long_key}), value});
},
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
CATCH

} // namespace DB::tests
Loading