Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
aae7a05
almostly finished fixing envs overfitting
Ero-Sennin9 May 7, 2026
021e0aa
finished amends
Ero-Sennin9 May 8, 2026
009a1ed
added improvements
Ero-Sennin9 May 8, 2026
ce8a9a8
added little improves and also fixed ExtractIfNotExtracted
Ero-Sennin9 May 12, 2026
7d4636a
added final amends to env logic
Ero-Sennin9 May 12, 2026
5f2de32
added another fixes
Ero-Sennin9 May 12, 2026
7611f86
added final amends
Ero-Sennin9 May 12, 2026
13af7e1
added last improvements to env code
Ero-Sennin9 May 13, 2026
51b29fd
fixed some compilation erros in env code
Ero-Sennin9 May 13, 2026
fb7e537
added another fixes to env code
Ero-Sennin9 May 13, 2026
8c9cbb1
finished all compilation errors in env code
Ero-Sennin9 May 13, 2026
ea22eaa
added numerous fixes to env code
Ero-Sennin9 May 13, 2026
6883fe3
deleted unuseful friend in env code
Ero-Sennin9 May 14, 2026
e9ec593
added guard principle to the env code
Ero-Sennin9 May 14, 2026
ec75d46
fixed stub launcher after previous remarks
Ero-Sennin9 May 14, 2026
83de9f5
added environment proxy to isolate Environment in EnvironmentManager
Ero-Sennin9 May 14, 2026
0ac18c0
added numerous compile time fixes to env code
Ero-Sennin9 May 14, 2026
080deb4
added fixes to stub launcher code
Ero-Sennin9 May 14, 2026
f06faa7
added another fixes to stub launcher code
Ero-Sennin9 May 14, 2026
7bb6bcc
added another fixes to env code
Ero-Sennin9 May 14, 2026
57fee73
added another fixes
Ero-Sennin9 May 14, 2026
1e7a496
added last fixes to the env code
Ero-Sennin9 May 14, 2026
dc75339
added last fixes to the env code (i hope)
Ero-Sennin9 May 14, 2026
500a848
add last fixes to the env code
Ero-Sennin9 May 14, 2026
d416fb8
added i hope last fixes to env code
Ero-Sennin9 May 14, 2026
2ffb90f
fixed signatures in env code
Ero-Sennin9 May 14, 2026
2dc5619
added types fixes in the env code
Ero-Sennin9 May 14, 2026
79db276
fixed types in the env code
Ero-Sennin9 May 14, 2026
2739ddb
fixed EnvironmentProxy
Ero-Sennin9 May 14, 2026
d530ec2
added fixes to EnvironmentProxy
Ero-Sennin9 May 14, 2026
9b70ad2
added fixes to EnvironmentProxy
Ero-Sennin9 May 14, 2026
29a47d1
fixed logs in the env code
Ero-Sennin9 May 14, 2026
88c4433
added fixes to lgs in the env code
Ero-Sennin9 May 14, 2026
5bf81da
returned logs code in the nev code
Ero-Sennin9 May 14, 2026
549c197
fixed ref counting in the env code
Ero-Sennin9 May 14, 2026
fce8893
added some debug logs
Ero-Sennin9 May 15, 2026
19f4535
fixed bug with canonical env path
Ero-Sennin9 May 15, 2026
3aa7895
maybe fixed environment guard
Ero-Sennin9 May 15, 2026
31841d8
fixed environment guard
Ero-Sennin9 May 15, 2026
1493d81
added fixes with move semantics to the env code
Ero-Sennin9 May 15, 2026
28df6c4
removed debug logs from the env code
Ero-Sennin9 May 15, 2026
964044d
Small changes
aleksn7 May 26, 2026
5028129
Remove useless changes
aleksn7 May 26, 2026
514fcc0
Cleanup env for uninitialized stub
aleksn7 May 26, 2026
ef09594
Add sanity log into EnvironmentManager destructor
aleksn7 May 26, 2026
f1da0d5
Make comments more formal
aleksn7 May 26, 2026
8c6bcfa
Rename ref counter methods
aleksn7 May 26, 2026
2e21869
Remove useless variables
aleksn7 May 26, 2026
8cf2408
Change env_path to canonical_env_path
aleksn7 May 27, 2026
195874e
Fix drop environment mismatch bug
aleksn7 May 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 115 additions & 61 deletions src/pb_env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -240,95 +240,149 @@ EnvironmentManager::EnvironmentManager()
}

std::string
EnvironmentManager::ExtractIfNotExtracted(std::string env_path)
EnvironmentManager::ExtractIfNotExtracted(const std::string& env_path)
{
// Lock the mutex. Only a single thread should modify the map.
std::lock_guard<std::mutex> lk(mutex_);
char canonical_env_path[PATH_MAX + 1];

char* err = realpath(env_path.c_str(), canonical_env_path);
if (err == nullptr) {
throw PythonBackendException(
std::string("Failed to get the canonical path for ") + env_path + ".");
}

time_t last_modified_time;
LastModifiedTime(canonical_env_path, &last_modified_time);

bool env_extracted = false;
bool re_extraction = false;

// If the path is not a conda-packed file, then bypass the extraction process
struct stat info;
if (stat(canonical_env_path, &info) != 0) {
if (stat(env_path.c_str(), &info) != 0) {
throw PythonBackendException(
std::string("stat() of : ") + canonical_env_path + " returned error.");
"stat() of : " + env_path + " returned error.");
} else if (S_ISDIR(info.st_mode)) {
LOG_MESSAGE(
TRITONSERVER_LOG_VERBOSE,
(std::string("Returning canonical path since EXECUTION_ENV_PATH does "
"not contain compressed path. Path: ") +
canonical_env_path)
("Returning path since EXECUTION_ENV_PATH does "
"not contain compressed path. Path: " +
env_path)
.c_str());
return canonical_env_path;
return env_path;
}
const auto env_itr = env_map_.find(canonical_env_path);
if (env_itr != env_map_.end()) {

// Lock the mutex. Only a single thread should modify the map.
std::lock_guard<std::mutex> lk(mutex_);

time_t last_modified_time;
LastModifiedTime(env_path, &last_modified_time);

auto env_itr = env_map_.find(env_path);
// Extract only if the env has not been extracted yet.
if (env_itr == env_map_.end()) {
LOG_MESSAGE(
TRITONSERVER_LOG_VERBOSE,
("Extracting Python execution env " + env_path).c_str());

std::string dst_env_path =
std::string(base_path_) + "/" + std::to_string(env_path_counter_);
++env_path_counter_;

// Add the environment to the list of environments.
env_itr =
env_map_
.try_emplace(env_path, env_path, dst_env_path, last_modified_time)
.first;
} else {
Environment& env = env_itr->second;

// Check if the environment has been modified and would
// need to be extracted again.
if (env_itr->second.second == last_modified_time) {
env_extracted = true;
} else {
if (env.LastModifiedTime() != last_modified_time) {
LOG_MESSAGE(
TRITONSERVER_LOG_VERBOSE,
("Re-extracting Python execution env " + env_path).c_str());
// Environment file has been updated. Need to clear
// the previously extracted environment and extract
// the environment to the same destination directory.
RecursiveDirectoryDelete(env_itr->second.first.c_str());
re_extraction = true;
env.Update(last_modified_time);
}
}

// Extract only if the env has not been extracted yet.
if (!env_extracted) {
LOG_MESSAGE(
TRITONSERVER_LOG_VERBOSE,
(std::string("Extracting Python execution env ") + canonical_env_path)
.c_str());
std::string dst_env_path;
if (re_extraction) {
dst_env_path = env_map_[canonical_env_path].first;
} else {
dst_env_path =
std::string(base_path_) + "/" + std::to_string(env_map_.size());
}
Environment& env = env_itr->second;

std::string canonical_env_path_str(canonical_env_path);
// Reference counter must be incremented on each ExtractIfNotExtracted call.
env.IncrementRefCount();

int status =
mkdir(dst_env_path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
if (status == 0) {
ExtractTarFile(canonical_env_path_str, dst_env_path);
} else {
throw PythonBackendException(
std::string("Failed to create environment directory for '") +
dst_env_path.c_str() + "'.");
}
if (re_extraction) {
// Just update the last modified timestamp
env_map_[canonical_env_path].second = last_modified_time;
} else {
// Add the path to the list of environments
env_map_.insert({canonical_env_path, {dst_env_path, last_modified_time}});
LOG_MESSAGE(
TRITONSERVER_LOG_VERBOSE,
("Successfully extracted Python execution env " + env_path).c_str());

return env.Destination();
}

void
EnvironmentManager::DropEnvironment(const std::string& env_path)
{
LOG_MESSAGE(
TRITONSERVER_LOG_VERBOSE,
("Trying to drop Python execution env " + env_path).c_str());

std::lock_guard<std::mutex> lk(mutex_);

auto env_itr = env_map_.find(env_path);
if (env_itr != env_map_.end()) {
if (env_itr->second.DecrementRefCount() == 0) {
env_map_.erase(env_itr);
LOG_MESSAGE(
TRITONSERVER_LOG_VERBOSE,
("Successfully dropped Python execution env " + env_path).c_str());
}
return dst_env_path;
} else {
return env_map_.find(canonical_env_path)->second.first;
LOG_MESSAGE(
TRITONSERVER_LOG_VERBOSE, ("The environment with the key '" + env_path +
"' is not presented the env_map")
.c_str());
}
}

EnvironmentManager::~EnvironmentManager()
{
// All environments must be deleted before EnvironmentManager is called
if (!env_map_.empty()) {
LOG_MESSAGE(
TRITONSERVER_LOG_ERROR, "Environment mananager map is not empty");
}

RecursiveDirectoryDelete(base_path_);
}

EnvironmentManager::Environment::Environment(
const std::string& source, const std::string& destination,
const time_t& last_modified_time)
: source_(source), destination_(destination),
last_modified_time_(last_modified_time)
{
Extract();
}

void
EnvironmentManager::Environment::Extract()
{
int status =
mkdir(destination_.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
if (status != 0) {
throw PythonBackendException(
"Failed to create environment directory for '" + destination_ + "'.");
}
ExtractTarFile(source_, destination_);
}

void
EnvironmentManager::Environment::Update(const time_t& last_modified_time)
{
Delete();
Extract();
last_modified_time_ = last_modified_time;
}

void
EnvironmentManager::Environment::Delete()
{
RecursiveDirectoryDelete(destination_.c_str());
}

EnvironmentManager::Environment::~Environment()
{
Delete();
}

#endif

}}} // namespace triton::backend::python
46 changes: 40 additions & 6 deletions src/pb_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,51 @@ bool FileExists(std::string& path);
//
#ifndef _WIN32
class EnvironmentManager {
std::map<std::string, std::pair<std::string, time_t>> env_map_;
char base_path_[PATH_MAX + 1];
std::mutex mutex_;

public:
class Environment {
public:
Environment(
const std::string& source, const std::string& destination,
const time_t& last_modified_time);
~Environment();

void Update(const time_t& last_modified_time);
void IncrementRefCount() { ++ref_count_; }
size_t DecrementRefCount() { return --ref_count_; }

const std::string& Source() const { return source_; }
const std::string& Destination() const { return destination_; }
const time_t& LastModifiedTime() const { return last_modified_time_; }

private:
void Extract();
void Delete();

std::string source_;
std::string destination_;
time_t last_modified_time_;

size_t ref_count_ = 0;
};

EnvironmentManager();

// Extracts the tar.gz file in the 'env_path' if it has not been
// already extracted.
std::string ExtractIfNotExtracted(std::string env_path);
// already extracted
std::string ExtractIfNotExtracted(const std::string& env_path);

~EnvironmentManager();

// Decrement the refcount for the environment identified by
// env_path. If the refcount reaches zero, the environment is
// removed from the map.
void DropEnvironment(const std::string& env_path);

private:
size_t env_path_counter_ = 0;
std::map<std::string, Environment> env_map_;
char base_path_[PATH_MAX + 1];
std::mutex mutex_;
};
#endif

Expand Down
7 changes: 4 additions & 3 deletions src/python_be.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2027,7 +2027,8 @@ ModelInstanceState::~ModelInstanceState()
}
// Terminate stub first to allow any last messages to be received by the back
// end before deallocating the queue memory
Stub()->TerminateStub();
ModelState* model_state = reinterpret_cast<ModelState*>(Model());
Stub()->TerminateStub(model_state);
TerminateMonitor();
Stub()->ClearQueues();
Stub().reset();
Expand Down Expand Up @@ -2056,7 +2057,7 @@ ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
RETURN_IF_ERROR((*state)->SetModelConfig());

(*state)->Stub()->UpdateHealth();
(*state)->Stub()->TerminateStub();
(*state)->Stub()->TerminateStub(*state);
(*state)->Stub()->ClearQueues();
(*state)->Stub().reset();
}
Expand Down Expand Up @@ -2172,7 +2173,7 @@ ModelState::LaunchAutoCompleteStubProcess()
}
catch (const BackendModelException& ex) {
Stub()->UpdateHealth();
Stub()->TerminateStub();
Stub()->TerminateStub(this);
Stub()->ClearQueues();
Stub().reset();
RETURN_ERROR_IF_TRUE(
Expand Down
21 changes: 20 additions & 1 deletion src/stub_launcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,19 @@ StubLauncher::Initialize(ModelState* model_state)
// are supported.
if (python_execution_env_ != "") {
#ifndef _WIN32
// Resolve symlinks to avoid duplicate environment entries.
char canonical_env_path[PATH_MAX + 1];
char* err = realpath(python_execution_env_.c_str(), canonical_env_path);
if (err == nullptr) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
("Failed to get the canonical path for " + python_execution_env_ +
".")
.c_str());
}

python_execution_env_ = canonical_env_path;

RETURN_IF_ERROR(GetPythonEnvironment(model_state));
#else
return TRITONSERVER_ErrorNew(
Expand Down Expand Up @@ -798,7 +811,7 @@ StubLauncher::UpdateHealth()
}

void
StubLauncher::TerminateStub()
StubLauncher::TerminateStub(ModelState* model_state)
{
if (is_initialized_) {
bool force_kill = false;
Expand All @@ -825,6 +838,12 @@ StubLauncher::TerminateStub()
}
}

// Nothing will happen if python_execution_env_ doesn't exist
#ifndef _WIN32
model_state->StateForBackend()->env_manager->DropEnvironment(
python_execution_env_);
#endif

// First destroy the IPCControl. This makes sure that IPCControl is
// destroyed before the shared memory manager goes out of scope.
ipc_control_.reset();
Expand Down
2 changes: 1 addition & 1 deletion src/stub_launcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class StubLauncher {
}

// Destruct Stub process
void TerminateStub();
void TerminateStub(ModelState* model_state);

// Reset log queue and bls decoupled queue pointers
void ClearQueues();
Expand Down
Loading