Skip to content

Commit f64ec58

Browse files
committed
feat: Support multi-threaded asynchronous data upload to object storage
1 parent d81f3d6 commit f64ec58

8 files changed

Lines changed: 386 additions & 111 deletions

File tree

bolt/connectors/hive/storage_adapters/s3fs/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,7 @@ if(BOLT_ENABLE_S3)
3939
if(${BOLT_BUILD_TESTING})
4040
add_subdirectory(tests)
4141
endif()
42+
if(${BOLT_BUILD_BENCHMARKS})
43+
add_subdirectory(benchmark)
44+
endif()
4245
endif()

bolt/connectors/hive/storage_adapters/s3fs/S3Config.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ class S3Config {
9191
kRetryMode,
9292
kUseProxyFromEnv,
9393
kCredentialsProvider,
94+
kPartUploadAsync,
95+
kPartUploadSize,
96+
kMaxConcurrentUploadNum,
97+
kUploadThreads,
9498
kEnd
9599
};
96100

@@ -129,6 +133,13 @@ class S3Config {
129133
std::make_pair("use-proxy-from-env", "false")},
130134
{Keys::kCredentialsProvider,
131135
std::make_pair("aws-credentials-provider", std::nullopt)},
136+
{Keys::kPartUploadAsync,
137+
std::make_pair("part-upload-async", "false")},
138+
{Keys::kPartUploadSize,
139+
std::make_pair("part-upload-size", "10485760")},
140+
{Keys::kMaxConcurrentUploadNum,
141+
std::make_pair("max-concurrent-upload-num", "4")},
142+
{Keys::kUploadThreads, std::make_pair("upload-threads", "16")},
132143
};
133144
return config;
134145
}
@@ -258,6 +269,33 @@ class S3Config {
258269
return config_.find(Keys::kCredentialsProvider)->second;
259270
}
260271

272+
/// If true, enables asynchronous upload of parts for S3 multipart uploads,
273+
/// false otherwise.
274+
bool partUploadAsync() const {
275+
auto value = config_.find(Keys::kPartUploadAsync)->second.value();
276+
return folly::to<bool>(value);
277+
}
278+
279+
/// Return the size (in bytes) of each part for S3 multipart uploads.
280+
int32_t partUploadSize() const {
281+
auto value = config_.find(Keys::kPartUploadSize)->second.value();
282+
return folly::to<uint32_t>(value);
283+
}
284+
285+
/// Return the maximum number of concurrent uploads for S3 multipart uploads,
286+
/// applicable only when asynchronous uploads are enabled.
287+
int32_t maxConcurrentUploadNum() const {
288+
auto value = config_.find(Keys::kMaxConcurrentUploadNum)->second.value();
289+
return folly::to<uint32_t>(value);
290+
}
291+
292+
/// Return the number of threads to use for S3 multipart uploads,
293+
/// applicable only when asynchronous uploads are enabled.
294+
int32_t uploadThreads() const {
295+
auto value = config_.find(Keys::kUploadThreads)->second.value();
296+
return folly::to<uint32_t>(value);
297+
}
298+
261299
private:
262300
std::unordered_map<Keys, std::optional<std::string>> config_;
263301
std::string payloadSigningPolicy_;

bolt/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ class S3FileSystem::Impl {
316316

317317
client_ = std::make_shared<Aws::S3::S3Client>(
318318
credentialsProvider, nullptr /* endpointProvider */, clientConfig);
319+
s3Config_ = std::make_shared<S3Config>(s3Config);
319320
++fileSystemCount;
320321
}
321322

@@ -453,6 +454,10 @@ class S3FileSystem::Impl {
453454
return client_.get();
454455
}
455456

457+
S3Config* s3Config() const {
458+
return s3Config_.get();
459+
}
460+
456461
std::string getLogLevelName() const {
457462
return getAwsInstance()->getLogLevelName();
458463
}
@@ -463,6 +468,7 @@ class S3FileSystem::Impl {
463468

464469
private:
465470
std::shared_ptr<Aws::S3::S3Client> client_;
471+
std::shared_ptr<S3Config> s3Config_;
466472
};
467473

468474
S3FileSystem::S3FileSystem(
@@ -494,8 +500,8 @@ std::unique_ptr<WriteFile> S3FileSystem::openFileForWrite(
494500
std::string_view s3Path,
495501
const FileOptions& options) {
496502
const auto path = getPath(s3Path);
497-
auto s3file =
498-
std::make_unique<S3WriteFile>(path, impl_->s3Client(), options.pool);
503+
auto s3file = std::make_unique<S3WriteFile>(
504+
path, impl_->s3Client(), options.pool, impl_->s3Config());
499505
return s3file;
500506
}
501507

bolt/connectors/hive/storage_adapters/s3fs/S3WriteFile.cpp

Lines changed: 126 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
*/
3030

3131
#include "bolt/connectors/hive/storage_adapters/s3fs/S3WriteFile.h"
32+
#include <folly/executors/CPUThreadPoolExecutor.h>
33+
#include <folly/executors/thread_factory/NamedThreadFactory.h>
34+
#include <folly/synchronization/ThrottledLifoSem.h>
3235
#include "bolt/common/base/StatsReporter.h"
3336
#include "bolt/connectors/hive/storage_adapters/s3fs/S3Counters.h"
3437
#include "bolt/connectors/hive/storage_adapters/s3fs/S3Util.h"
@@ -52,14 +55,29 @@ class S3WriteFile::Impl {
5255
explicit Impl(
5356
std::string_view path,
5457
Aws::S3::S3Client* client,
55-
memory::MemoryPool* pool)
58+
memory::MemoryPool* pool,
59+
S3Config* s3Config)
5660
: client_(client), pool_(pool) {
5761
BOLT_CHECK_NOT_NULL(client);
5862
BOLT_CHECK_NOT_NULL(pool);
63+
BOLT_CHECK_NOT_NULL(s3Config);
64+
partUploadSize_ = s3Config->partUploadSize();
65+
if (s3Config->partUploadAsync()) {
66+
maxConcurrentUploadNum_ = std::make_unique<folly::ThrottledLifoSem>(
67+
s3Config->maxConcurrentUploadNum());
68+
if (!uploadThreadPool_) {
69+
uploadThreadPool_ = std::make_shared<folly::CPUThreadPoolExecutor>(
70+
s3Config->uploadThreads(),
71+
std::make_shared<folly::NamedThreadFactory>("upload-thread"));
72+
}
73+
} else {
74+
uploadThreadPool_ = nullptr;
75+
}
76+
5977
getBucketAndKeyFromPath(path, bucket_, key_);
6078
currentPart_ = std::make_unique<dwio::common::DataBuffer<char>>(*pool_);
61-
currentPart_->reserve(kPartUploadSize);
6279
// Check that the object doesn't exist, if it does throw an error.
80+
currentPart_->reserve(partUploadSize_);
6381
{
6482
Aws::S3::Model::HeadObjectRequest request;
6583
request.SetBucket(awsString(bucket_));
@@ -103,6 +121,7 @@ class S3WriteFile::Impl {
103121
/// (https://github.com/apache/arrow/issues/11934). So we instead default
104122
/// to application/octet-stream which is less misleading.
105123
request.SetContentType(kApplicationOctetStream);
124+
request.SetChecksumAlgorithm(Aws::S3::Model::ChecksumAlgorithm::CRC32);
106125
auto outcome = client_->CreateMultipartUpload(request);
107126
BOLT_CHECK_AWS_OUTCOME(
108127
outcome, "Failed initiating multiple part upload", bucket_, key_);
@@ -115,7 +134,7 @@ class S3WriteFile::Impl {
115134
// Appends data to the end of the file.
116135
void append(std::string_view data) {
117136
BOLT_CHECK(!closed(), "File is closed");
118-
if (data.size() + currentPart_->size() >= kPartUploadSize) {
137+
if (data.size() + currentPart_->size() >= partUploadSize_) {
119138
upload(data);
120139
} else {
121140
// Append to current part.
@@ -129,7 +148,7 @@ class S3WriteFile::Impl {
129148
BOLT_CHECK(!closed(), "File is closed");
130149
/// currentPartSize must be less than kPartUploadSize since
131150
/// append() would have already flushed after reaching kUploadPartSize.
132-
BOLT_CHECK_LT(currentPart_->size(), kPartUploadSize);
151+
BOLT_CHECK_LT(currentPart_->size(), partUploadSize_);
133152
}
134153

135154
// Complete the multipart upload and close the file.
@@ -139,6 +158,20 @@ class S3WriteFile::Impl {
139158
}
140159
RECORD_METRIC_VALUE(kMetricS3StartedUploads);
141160
uploadPart({currentPart_->data(), currentPart_->size()}, true);
161+
if (uploadThreadPool_) {
162+
if (!futures_.empty()) {
163+
folly::collectAll(std::move(futures_)).get();
164+
}
165+
// The list of parts should be in ascending order.
166+
std::sort(
167+
uploadState_.completedParts.begin(),
168+
uploadState_.completedParts.end(),
169+
[](const Aws::S3::Model::CompletedPart& a,
170+
const Aws::S3::Model::CompletedPart& b) {
171+
return a.GetPartNumber() < b.GetPartNumber();
172+
});
173+
}
174+
142175
BOLT_CHECK_EQ(uploadState_.partNumber, uploadState_.completedParts.size());
143176
// Complete the multipart upload.
144177
{
@@ -172,7 +205,6 @@ class S3WriteFile::Impl {
172205
}
173206

174207
private:
175-
static constexpr int64_t kPartUploadSize = 10 * 1024 * 1024;
176208
static constexpr const char* kApplicationOctetStream =
177209
"application/octet-stream";
178210

@@ -186,10 +218,9 @@ class S3WriteFile::Impl {
186218
int64_t partNumber = 0;
187219
Aws::String id;
188220
};
189-
UploadState uploadState_;
190221

191-
// Data can be smaller or larger than the kPartUploadSize.
192-
// Complete the currentPart_ and upload kPartUploadSize chunks of data.
222+
// Data can be smaller or larger than the partUploadSize_.
223+
// Complete the currentPart_ and upload partUploadSize_ chunks of data.
193224
// Save the remaining into currentPart_.
194225
void upload(const std::string_view data) {
195226
auto dataPtr = data.data();
@@ -200,44 +231,90 @@ class S3WriteFile::Impl {
200231
uploadPart({currentPart_->data(), currentPart_->size()});
201232
dataPtr += remainingBufferSize;
202233
dataSize -= remainingBufferSize;
203-
while (dataSize > kPartUploadSize) {
204-
uploadPart({dataPtr, kPartUploadSize});
205-
dataPtr += kPartUploadSize;
206-
dataSize -= kPartUploadSize;
234+
while (dataSize > partUploadSize_) {
235+
uploadPart({dataPtr, partUploadSize_});
236+
dataPtr += partUploadSize_;
237+
dataSize -= partUploadSize_;
207238
}
208239
// Stash the remaining at the beginning of currentPart.
209240
currentPart_->unsafeAppend(0, dataPtr, dataSize);
210241
}
211242

212243
void uploadPart(const std::string_view part, bool isLast = false) {
213-
// Only the last part can be less than kPartUploadSize.
214-
BOLT_CHECK(isLast || (!isLast && (part.size() == kPartUploadSize)));
215-
// Upload the part.
216-
{
217-
Aws::S3::Model::UploadPartRequest request;
218-
request.SetBucket(bucket_);
219-
request.SetKey(key_);
220-
request.SetUploadId(uploadState_.id);
221-
request.SetPartNumber(++uploadState_.partNumber);
222-
request.SetContentLength(part.size());
223-
request.SetBody(
224-
std::make_shared<StringViewStream>(part.data(), part.size()));
225-
auto outcome = client_->UploadPart(request);
226-
BOLT_CHECK_AWS_OUTCOME(outcome, "Failed to upload", bucket_, key_);
227-
// Append ETag and part number for this uploaded part.
228-
// This will be needed for upload completion in Close().
229-
auto result = outcome.GetResult();
230-
Aws::S3::Model::CompletedPart part;
244+
// Only the last part can be less than partUploadSize_.
245+
BOLT_CHECK(isLast || part.size() == partUploadSize_);
246+
auto uploadPartSync = [&](const std::string_view partData) {
247+
Aws::S3::Model::CompletedPart completedPart =
248+
uploadPartSeq(uploadState_.id, ++uploadState_.partNumber, partData);
249+
uploadState_.completedParts.push_back(std::move(completedPart));
250+
};
251+
// If this is the last part and no parts have been uploaded yet,
252+
// use the synchronous upload method.
253+
bool useSyncUpload =
254+
!uploadThreadPool_ || (isLast && uploadState_.partNumber == 0);
255+
if (useSyncUpload) {
256+
uploadPartSync(part);
257+
} else {
258+
uploadPartAsync(part);
259+
}
260+
}
231261

232-
part.SetPartNumber(uploadState_.partNumber);
233-
part.SetETag(result.GetETag());
234-
// Don't add the checksum to the part if the checksum is empty.
235-
// Some filesystems such as IBM COS require this to be not set.
236-
if (!result.GetChecksumCRC32().empty()) {
237-
part.SetChecksumCRC32(result.GetChecksumCRC32());
238-
}
239-
uploadState_.completedParts.push_back(std::move(part));
262+
// Common logic for uploading a part.
263+
Aws::S3::Model::CompletedPart uploadPartSeq(
264+
const Aws::String& uploadId,
265+
const int64_t partNumber,
266+
const std::string_view part) {
267+
Aws::S3::Model::UploadPartRequest request;
268+
request.SetBucket(bucket_);
269+
request.SetKey(key_);
270+
request.SetUploadId(uploadId);
271+
request.SetPartNumber(partNumber);
272+
request.SetContentLength(part.size());
273+
request.SetBody(
274+
std::make_shared<StringViewStream>(part.data(), part.size()));
275+
// The default algorithm used is MD5. However, MD5 is not supported with
276+
// fips and can cause a SIGSEGV. Set CRC32 instead which is a standard for
277+
// checksum computation and is not restricted by fips.
278+
request.SetChecksumAlgorithm(Aws::S3::Model::ChecksumAlgorithm::CRC32);
279+
auto outcome = client_->UploadPart(request);
280+
BOLT_CHECK_AWS_OUTCOME(outcome, "Failed to upload", bucket_, key_);
281+
// Append ETag and part number for this uploaded part.
282+
// This will be needed for upload completion in Close().
283+
auto result = outcome.GetResult();
284+
Aws::S3::Model::CompletedPart completedPart;
285+
completedPart.SetPartNumber(partNumber);
286+
completedPart.SetETag(result.GetETag());
287+
// Don't add the checksum to the part if the checksum is empty.
288+
// Some filesystems such as IBM COS require this to be not set.
289+
if (!result.GetChecksumCRC32().empty()) {
290+
completedPart.SetChecksumCRC32(result.GetChecksumCRC32());
240291
}
292+
return completedPart;
293+
}
294+
295+
// Upload the part asynchronously.
296+
void uploadPartAsync(const std::string_view part) {
297+
// NOLINT(readability-convert-member-functions-to-static)
298+
maxConcurrentUploadNum_->wait();
299+
const int64_t partNumber = ++uploadState_.partNumber;
300+
std::shared_ptr<std::string> partStr =
301+
std::make_shared<std::string>(part.data(), part.size());
302+
futures_.emplace_back(
303+
folly::via(uploadThreadPool_.get(), [this, partNumber, partStr]() {
304+
SCOPE_EXIT {
305+
maxConcurrentUploadNum_->post();
306+
};
307+
try {
308+
Aws::S3::Model::CompletedPart completedPart =
309+
uploadPartSeq(uploadState_.id, partNumber, *partStr);
310+
std::lock_guard<std::mutex> lock(uploadStateMutex_);
311+
uploadState_.completedParts.push_back(std::move(completedPart));
312+
} catch (const std::exception& e) {
313+
LOG(ERROR) << "Exception during async upload: " << e.what();
314+
} catch (...) {
315+
LOG(ERROR) << "Unknown exception during async upload.";
316+
}
317+
}));
241318
}
242319

243320
Aws::S3::S3Client* client_;
@@ -246,13 +323,22 @@ class S3WriteFile::Impl {
246323
std::string bucket_;
247324
std::string key_;
248325
size_t fileSize_ = -1;
326+
UploadState uploadState_;
327+
std::mutex uploadStateMutex_;
328+
std::vector<folly::Future<folly::Unit>> futures_;
329+
size_t partUploadSize_;
330+
// maxConcurrentUploadNum_ controls the concurrency of asynchronous uploads to
331+
// S3 for each S3WriteFile, preventing excessive memory usage.
332+
std::unique_ptr<folly::ThrottledLifoSem> maxConcurrentUploadNum_;
333+
inline static std::shared_ptr<folly::CPUThreadPoolExecutor> uploadThreadPool_;
249334
};
250335

251336
S3WriteFile::S3WriteFile(
252337
std::string_view path,
253338
Aws::S3::S3Client* client,
254-
memory::MemoryPool* pool) {
255-
impl_ = std::make_shared<Impl>(path, client, pool);
339+
memory::MemoryPool* pool,
340+
S3Config* s3Config) {
341+
impl_ = std::make_shared<Impl>(path, client, pool, s3Config);
256342
}
257343

258344
void S3WriteFile::append(std::string_view data) {

bolt/connectors/hive/storage_adapters/s3fs/S3WriteFile.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
#include "bolt/common/file/File.h"
3434
#include "bolt/common/memory/MemoryPool.h"
35+
#include "bolt/connectors/hive/storage_adapters/s3fs/S3Config.h"
3536

3637
namespace Aws::S3 {
3738
class S3Client;
@@ -64,7 +65,8 @@ class S3WriteFile : public WriteFile {
6465
S3WriteFile(
6566
std::string_view path,
6667
Aws::S3::S3Client* client,
67-
memory::MemoryPool* pool);
68+
memory::MemoryPool* pool,
69+
S3Config* s3Config);
6870

6971
/// Appends data to the end of the file.
7072
/// Uploads a part on reaching part size limit.

0 commit comments

Comments
 (0)