Skip to content

Commit f393b43

Browse files
committed
merged preallocated_output with allocated_output
1 parent 5cad292 commit f393b43

3 files changed

Lines changed: 7 additions & 9 deletions

File tree

core/runtime/TRTEngine.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ struct TRTEngine : torch::CustomClassHolder {
181181
std::string shape_key = "None";
182182
bool use_pre_allocated_outputs = false;
183183
std::vector<at::Tensor> pre_allocated_outputs;
184-
std::vector<at::Tensor> allocated_outputs;
185184

186185
// Output Allocator-Related Functionality
187186
bool requires_output_allocator = false; // engine requires output allocator

core/runtime/execute_engine.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,11 +247,11 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
247247
if (can_use_pre_allocated_outputs) {
248248
outputs = compiled_engine->pre_allocated_outputs;
249249
} else {
250-
if (compiled_engine->allocated_outputs.size() == 0 || compiled_engine->output_tensors_are_unowned or
250+
if (compiled_engine->pre_allocated_outputs.size() == 0 || compiled_engine->output_tensors_are_unowned or
251251
shape_changed) {
252-
compiled_engine->allocated_outputs = create_output_tensors(compiled_engine);
252+
compiled_engine->pre_allocated_outputs = create_output_tensors(compiled_engine);
253253
}
254-
outputs = compiled_engine->allocated_outputs;
254+
outputs = compiled_engine->pre_allocated_outputs;
255255
}
256256

257257
for (auto output_indices : compiled_engine->out_binding_map) {

py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,6 @@ def __init__(
174174
self.cudagraph: Optional[torch.cuda.CUDAGraph] = None
175175
self._caller_stream: Optional[torch.cuda.Stream] = None
176176
self._engine_stream: Optional[torch.cuda.Stream] = None
177-
self.output_tensors: Optional[List[torch.Tensor]] = None
178177

179178
# TODO: Make the below a Dictionary {shape: cudagraph}
180179
self.shape_key: Optional[str] = None
@@ -505,7 +504,7 @@ def run_standard_execution() -> torch.Tensor | Tuple[torch.Tensor, ...]:
505504
if can_use_pre_allocated_outputs:
506505
outputs = self.pre_allocated_outputs
507506
else:
508-
if shape_changed or self.output_tensors is None:
507+
if shape_changed or not self.pre_allocated_outputs:
509508
self.output_shapes = [
510509
tuple(self.context.get_tensor_shape(output_name))
511510
for output_name in self.output_names
@@ -515,12 +514,12 @@ def run_standard_execution() -> torch.Tensor | Tuple[torch.Tensor, ...]:
515514
"Encountered dynamic output shapes during runtime. This could mean the network has data-dependent output shapes which is not currently supported."
516515
)
517516
if (
518-
self.output_tensors is None
517+
not self.pre_allocated_outputs
519518
or self.output_tensors_are_unowned
520519
or shape_changed
521520
):
522-
self.output_tensors = self.create_output_tensors()
523-
outputs = self.output_tensors
521+
self.pre_allocated_outputs = self.create_output_tensors()
522+
outputs = self.pre_allocated_outputs
524523

525524
for o, output_name in enumerate(self.output_names):
526525
if need_cudagraphs_record:

0 commit comments

Comments
 (0)