Skip to content

Commit a247dff

Browse files
committed
Optimize multipart content provider to coalesce small writes and reduce TCP packet fragmentation (Fix #2410)
1 parent 6532464 commit a247dff

2 files changed

Lines changed: 140 additions & 10 deletions

File tree

httplib.h

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,6 +1571,13 @@ ssize_t write_headers(Stream &strm, const Headers &headers);
15711571
bool set_socket_opt_time(socket_t sock, int level, int optname, time_t sec,
15721572
time_t usec);
15731573

1574+
size_t get_multipart_content_length(const UploadFormDataItems &items,
1575+
const std::string &boundary);
1576+
1577+
ContentProvider
1578+
make_multipart_content_provider(const UploadFormDataItems &items,
1579+
const std::string &boundary);
1580+
15741581
} // namespace detail
15751582

15761583
class Server {
@@ -8239,9 +8246,12 @@ make_multipart_content_provider(const UploadFormDataItems &items,
82398246
}
82408247
segs.push_back({owned.back().data(), owned.back().size()});
82418248

8249+
constexpr size_t kBufSize = 65536;
8250+
82428251
struct MultipartState {
82438252
std::vector<std::string> owned;
82448253
std::vector<MultipartSegment> segs;
8254+
std::vector<char> buf = std::vector<char>(kBufSize);
82458255
};
82468256
auto state = std::make_shared<MultipartState>();
82478257
state->owned = std::move(owned);
@@ -8250,19 +8260,48 @@ make_multipart_content_provider(const UploadFormDataItems &items,
82508260
state->segs = std::move(segs);
82518261

82528262
return [state](size_t offset, size_t length, DataSink &sink) -> bool {
8263+
// Buffer multiple small segments into fewer, larger writes to avoid
8264+
// excessive TCP packets when there are many form data items (#2410)
8265+
auto &buf = state->buf;
8266+
size_t buf_len = 0;
8267+
size_t remaining = length;
8268+
8269+
// Find the first segment containing 'offset'
82538270
size_t pos = 0;
8254-
for (const auto &seg : state->segs) {
8255-
// Loop invariant: pos <= offset (proven by advancing pos only when
8256-
// offset - pos >= seg.size, i.e., the segment doesn't contain offset)
8257-
if (seg.size > 0 && offset - pos < seg.size) {
8258-
size_t seg_offset = offset - pos;
8259-
size_t available = seg.size - seg_offset;
8260-
size_t to_write = (std::min)(available, length);
8261-
return sink.write(seg.data + seg_offset, to_write);
8262-
}
8271+
size_t seg_idx = 0;
8272+
for (; seg_idx < state->segs.size(); seg_idx++) {
8273+
const auto &seg = state->segs[seg_idx];
8274+
if (seg.size > 0 && offset - pos < seg.size) { break; }
82638275
pos += seg.size;
82648276
}
8265-
return true; // past end (shouldn't be reached when content_length is exact)
8277+
8278+
size_t seg_offset = (seg_idx < state->segs.size()) ? offset - pos : 0;
8279+
8280+
for (; seg_idx < state->segs.size() && remaining > 0; seg_idx++) {
8281+
const auto &seg = state->segs[seg_idx];
8282+
size_t available = seg.size - seg_offset;
8283+
size_t to_copy = (std::min)(available, remaining);
8284+
const char *src = seg.data + seg_offset;
8285+
seg_offset = 0; // only the first segment has a non-zero offset
8286+
8287+
while (to_copy > 0) {
8288+
size_t space = kBufSize - buf_len;
8289+
size_t chunk = (std::min)(to_copy, space);
8290+
std::memcpy(buf.data() + buf_len, src, chunk);
8291+
buf_len += chunk;
8292+
src += chunk;
8293+
to_copy -= chunk;
8294+
remaining -= chunk;
8295+
8296+
if (buf_len == kBufSize) {
8297+
if (!sink.write(buf.data(), buf_len)) { return false; }
8298+
buf_len = 0;
8299+
}
8300+
}
8301+
}
8302+
8303+
if (buf_len > 0) { return sink.write(buf.data(), buf_len); }
8304+
return true;
82668305
};
82678306
}
82688307

test/test.cc

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12035,6 +12035,97 @@ TEST(MultipartFormDataTest, UploadItemsHasContentLength) {
1203512035
EXPECT_EQ(StatusCode::OK_200, res->status);
1203612036
}
1203712037

12038+
TEST(MultipartFormDataTest, ContentProviderCoalescesWrites) {
12039+
// Verify that make_multipart_content_provider coalesces many small segments
12040+
// into fewer sink.write() calls to avoid TCP packet fragmentation (#2410).
12041+
constexpr size_t kItemCount = 1000;
12042+
12043+
UploadFormDataItems items;
12044+
items.reserve(kItemCount);
12045+
for (size_t i = 0; i < kItemCount; i++) {
12046+
items.push_back(
12047+
{"field" + std::to_string(i), "value" + std::to_string(i), "", ""});
12048+
}
12049+
12050+
const std::string boundary = "----test-boundary";
12051+
auto content_length = detail::get_multipart_content_length(items, boundary);
12052+
auto provider = detail::make_multipart_content_provider(items, boundary);
12053+
12054+
// Drive the provider the same way write_content_with_progress does
12055+
size_t write_count = 0;
12056+
size_t total_bytes = 0;
12057+
12058+
DataSink sink;
12059+
size_t offset = 0;
12060+
sink.write = [&](const char *d, size_t l) -> bool {
12061+
(void)d;
12062+
write_count++;
12063+
total_bytes += l;
12064+
offset += l;
12065+
return true;
12066+
};
12067+
sink.is_writable = []() -> bool { return true; };
12068+
12069+
while (offset < content_length) {
12070+
ASSERT_TRUE(provider(offset, content_length - offset, sink));
12071+
}
12072+
12073+
EXPECT_EQ(content_length, total_bytes);
12074+
12075+
// The total number of segments is 3 * kItemCount + 1 = 3001.
12076+
// With buffering into 64KB blocks, write_count should be much smaller.
12077+
auto segment_count = 3 * kItemCount + 1;
12078+
EXPECT_LT(write_count, segment_count / 10);
12079+
}
12080+
12081+
TEST(MultipartFormDataTest, ManyItemsEndToEnd) {
12082+
// Integration test: send many UploadFormDataItems and verify the server
12083+
// receives all of them correctly (#2410).
12084+
constexpr size_t kItemCount = 500;
12085+
12086+
auto handled = false;
12087+
12088+
Server svr;
12089+
svr.Post("/upload", [&](const Request &req, Response &res) {
12090+
EXPECT_EQ(kItemCount, req.form.fields.size());
12091+
for (size_t i = 0; i < kItemCount; i++) {
12092+
auto key = "field" + std::to_string(i);
12093+
auto val = "value" + std::to_string(i);
12094+
auto it = req.form.fields.find(key);
12095+
if (it != req.form.fields.end()) {
12096+
EXPECT_EQ(val, it->second.content);
12097+
} else {
12098+
ADD_FAILURE() << "Missing field: " << key;
12099+
}
12100+
}
12101+
res.set_content("ok", "text/plain");
12102+
handled = true;
12103+
});
12104+
12105+
auto port = svr.bind_to_any_port(HOST);
12106+
auto t = thread([&] { svr.listen_after_bind(); });
12107+
auto se = detail::scope_exit([&] {
12108+
svr.stop();
12109+
t.join();
12110+
ASSERT_FALSE(svr.is_running());
12111+
ASSERT_TRUE(handled);
12112+
});
12113+
12114+
svr.wait_until_ready();
12115+
12116+
UploadFormDataItems items;
12117+
items.reserve(kItemCount);
12118+
for (size_t i = 0; i < kItemCount; i++) {
12119+
items.push_back(
12120+
{"field" + std::to_string(i), "value" + std::to_string(i), "", ""});
12121+
}
12122+
12123+
Client cli(HOST, port);
12124+
auto res = cli.Post("/upload", items);
12125+
ASSERT_TRUE(res);
12126+
EXPECT_EQ(StatusCode::OK_200, res->status);
12127+
}
12128+
1203812129
TEST(MultipartFormDataTest, MakeFileProvider) {
1203912130
// Verify make_file_provider sends a file's contents correctly.
1204012131
const std::string file_content(4096, 'Z');

0 commit comments

Comments
 (0)