Skip to content

Commit 7834fc6

Browse files
committed
[Disco] Set worker CPU affinity with env variable
This PR enables setting the CPU affinity of disco workers in MLC, following the support in apache/tvm#16807. The purpose is to try reduce the CPU core switch overhead brought to disco workers which may cause extra bubble times in disco workers before/during tasks. We use a macro `MLC_DISCO_WORKER_CPU_BINDING` to specify the CPU affinities of workers. This is by default not used. To enable it, you can run the command like ```shell MLC_DISCO_WORKER_CPU_BINDING=64,65,66,67 python some_mlc_app.py ``` to specify the four CPU core ids for the four workers.
1 parent 96b8c33 commit 7834fc6

2 files changed

Lines changed: 55 additions & 0 deletions

File tree

cpp/serve/function_table.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,44 @@
1313
#include <tvm/runtime/packed_func.h>
1414
#include <tvm/runtime/registry.h>
1515

16+
#include <cstdlib>
1617
#include <filesystem>
1718
#include <string>
1819
#include <vector>
1920

2021
#include "../support/load_bytes_from_file.h"
22+
#include "../support/utils.h"
2123
#include "sampler/sampler.h"
2224

2325
namespace mlc {
2426
namespace llm {
2527
namespace serve {
2628

29+
Optional<IntTuple> GetDiscoWorkerCPUBinding(int num_workers) {
30+
const char* raw_cpu_binding = std::getenv("MLC_DISCO_WORKER_CPU_BINDING");
31+
if (raw_cpu_binding == nullptr) {
32+
return NullOpt;
33+
}
34+
35+
std::string cpu_binding_str(raw_cpu_binding);
36+
std::vector<std::string> cpu_ids_str = Split(cpu_binding_str, ',');
37+
std::vector<int64_t> cpu_ids;
38+
for (const std::string& cpu_id_str : cpu_ids_str) {
39+
try {
40+
cpu_ids.push_back(std::stol(cpu_id_str));
41+
} catch (std::invalid_argument const& ex) {
42+
LOG(FATAL) << "Invalid MLC_DISCO_WORKER_CPU_BINDING \"" << cpu_binding_str << "\"";
43+
}
44+
}
45+
if (static_cast<int>(cpu_ids.size()) < num_workers) {
46+
LOG(FATAL) << "Insufficient number of specified CPU workers in MLC_DISCO_WORKER_CPU_BINDING, "
47+
"expecting at least "
48+
<< num_workers << "CPU ids but only " << cpu_ids.size() << " are given.";
49+
}
50+
51+
return IntTuple{cpu_ids};
52+
}
53+
2754
PackedFunc FunctionTable::SessionFuncAsPackedFunc(Session sess, DRef sess_func, String name) {
2855
return PackedFunc([sess, func = std::move(sess_func), name = std::move(name)](
2956
TVMArgs args, TVMRetValue* rv) -> void {
@@ -100,6 +127,10 @@ void FunctionTable::Init(TVMArgValue reload_lib, Device device, picojson::object
100127
}
101128
return SessionFuncAsPackedFunc(sess, func, name);
102129
};
130+
if (Optional<IntTuple> cpu_ids = GetDiscoWorkerCPUBinding(/*num_workers=*/num_shards)) {
131+
IntTuple cpu_ids_value = cpu_ids.value();
132+
sess->CallPacked(sess->GetGlobalFunc("runtime.disco.bind_worker_to_cpu_core"), cpu_ids_value);
133+
}
103134
this->get_global_func = [this](const std::string& name) -> PackedFunc {
104135
return SessionFuncAsPackedFunc(sess, sess->GetGlobalFunc(name), name);
105136
};

cpp/support/utils.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*!
2+
* Copyright (c) 2023 by Contributors
3+
* \file utils.h
4+
* \brief Utility functions.
5+
*/
6+
#include <sstream>
7+
#include <string>
8+
#include <vector>
9+
10+
namespace mlc {
11+
namespace llm {
12+
13+
inline std::vector<std::string> Split(const std::string& str, char delim) {
14+
std::string item;
15+
std::istringstream is(str);
16+
std::vector<std::string> ret;
17+
while (std::getline(is, item, delim)) {
18+
ret.push_back(item);
19+
}
20+
return ret;
21+
}
22+
23+
} // namespace llm
24+
} // namespace mlc

0 commit comments

Comments
 (0)