Skip to content

Commit d993365

Browse files
Insert software prefetches into merge functions. This improves performance when hardware prefetchers are disabled on AMD machines.
PiperOrigin-RevId: 671468969
1 parent 8aa0add commit d993365

3 files changed

Lines changed: 14 additions & 0 deletions

File tree

src/google/protobuf/extension_set.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,7 @@ size_t SizeOfUnion(ItX it_dest, ItX end_dest, ItY it_source, ItY end_source) {
971971

972972
void ExtensionSet::MergeFrom(const MessageLite* extendee,
973973
const ExtensionSet& other) {
974+
Prefetch5LinesFrom1Line(&other);
974975
if (PROTOBUF_PREDICT_TRUE(!is_large())) {
975976
if (PROTOBUF_PREDICT_TRUE(!other.is_large())) {
976977
GrowCapacity(SizeOfUnion(flat_begin(), flat_end(), other.flat_begin(),

src/google/protobuf/port.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,15 @@ inline PROTOBUF_ALWAYS_INLINE void Prefetch5LinesFrom7Lines(const void* ptr) {
306306
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 704);
307307
}
308308

309+
// Prefetch 5 64-byte cache lines starting from 1 cache-line ahead.
310+
inline PROTOBUF_ALWAYS_INLINE void Prefetch5LinesFrom1Line(const void* ptr) {
311+
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 64);
312+
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 128);
313+
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 192);
314+
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 256);
315+
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 320);
316+
}
317+
309318
#if defined(NDEBUG) && ABSL_HAVE_BUILTIN(__builtin_unreachable)
310319
[[noreturn]] ABSL_ATTRIBUTE_COLD PROTOBUF_ALWAYS_INLINE inline void
311320
Unreachable() {

src/google/protobuf/repeated_ptr_field.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ memswap<ArenaOffsetHelper<RepeatedPtrFieldBase>::value>(
132132
template <>
133133
void RepeatedPtrFieldBase::MergeFrom<std::string>(
134134
const RepeatedPtrFieldBase& from) {
135+
Prefetch5LinesFrom1Line(&from);
135136
ABSL_DCHECK_NE(&from, this);
136137
int new_size = current_size_ + from.current_size_;
137138
auto dst = reinterpret_cast<std::string**>(InternalReserve(new_size));
@@ -159,6 +160,7 @@ void RepeatedPtrFieldBase::MergeFrom<std::string>(
159160

160161
int RepeatedPtrFieldBase::MergeIntoClearedMessages(
161162
const RepeatedPtrFieldBase& from) {
163+
Prefetch5LinesFrom1Line(&from);
162164
auto dst = reinterpret_cast<MessageLite**>(elements() + current_size_);
163165
auto src = reinterpret_cast<MessageLite* const*>(from.elements());
164166
int count = std::min(ClearedCount(), from.current_size_);
@@ -173,6 +175,7 @@ int RepeatedPtrFieldBase::MergeIntoClearedMessages(
173175

174176
void RepeatedPtrFieldBase::MergeFromConcreteMessage(
175177
const RepeatedPtrFieldBase& from, CopyFn copy_fn) {
178+
Prefetch5LinesFrom1Line(&from);
176179
ABSL_DCHECK_NE(&from, this);
177180
int new_size = current_size_ + from.current_size_;
178181
void** dst = InternalReserve(new_size);
@@ -196,6 +199,7 @@ void RepeatedPtrFieldBase::MergeFromConcreteMessage(
196199
template <>
197200
void RepeatedPtrFieldBase::MergeFrom<MessageLite>(
198201
const RepeatedPtrFieldBase& from) {
202+
Prefetch5LinesFrom1Line(&from);
199203
ABSL_DCHECK_NE(&from, this);
200204
ABSL_DCHECK(from.current_size_ > 0);
201205
int new_size = current_size_ + from.current_size_;

0 commit comments

Comments
 (0)