|
| 1 | +from __future__ import print_function, with_statement, absolute_import |
| 2 | +import shutil |
| 3 | +import torch |
| 4 | +import logging |
| 5 | +import re |
| 6 | +import os |
| 7 | +import json |
| 8 | + |
| 9 | +from ..version import __version__ |
| 10 | +from ..clipper_admin import ClipperException |
| 11 | +from .deployer_utils import save_python_function, serialize_object |
| 12 | + |
| 13 | +logger = logging.getLogger(__name__) |
| 14 | + |
| 15 | + |
| 16 | +def create_pytorch_endpoint(clipper_conn, |
| 17 | + name, |
| 18 | + input_type, |
| 19 | + inputs, |
| 20 | + func, |
| 21 | + pytorch_model, |
| 22 | + default_output="None", |
| 23 | + version=1, |
| 24 | + slo_micros=3000000, |
| 25 | + labels=None, |
| 26 | + registry=None, |
| 27 | + base_image=None, |
| 28 | + num_replicas=1, |
| 29 | + onnx_backend="caffe2", |
| 30 | + batch_size=-1): |
| 31 | + """This function deploys the prediction function with a PyTorch model. |
| 32 | + It serializes the PyTorch model in Onnx format and creates a container that loads it as a Caffe2 model. |
| 33 | + Parameters |
| 34 | + ---------- |
| 35 | + clipper_conn : :py:meth:`clipper_admin.ClipperConnection` |
| 36 | + A ``ClipperConnection`` object connected to a running Clipper cluster. |
| 37 | + name : str |
| 38 | + The name to be assigned to both the registered application and deployed model. |
| 39 | + input_type : str |
| 40 | + The input_type to be associated with the registered app and deployed model. |
| 41 | + One of "integers", "floats", "doubles", "bytes", or "strings". |
| 42 | + inputs : |
| 43 | + input of func. |
| 44 | + func : function |
| 45 | + The prediction function. Any state associated with the function will be |
| 46 | + captured via closure capture and pickled with Cloudpickle. |
| 47 | + pytorch_model : pytorch model object |
| 48 | + The PyTorch model to save. |
| 49 | + default_output : str, optional |
| 50 | + The default output for the application. The default output will be returned whenever |
| 51 | + an application is unable to receive a response from a model within the specified |
| 52 | + query latency SLO (service level objective). The reason the default output was returned |
| 53 | + is always provided as part of the prediction response object. Defaults to "None". |
| 54 | + version : str, optional |
| 55 | + The version to assign this model. Versions must be unique on a per-model |
| 56 | + basis, but may be re-used across different models. |
| 57 | + slo_micros : int, optional |
| 58 | + The query latency objective for the application in microseconds. |
| 59 | + This is the processing latency between Clipper receiving a request |
| 60 | + and sending a response. It does not account for network latencies |
| 61 | + before a request is received or after a response is sent. |
| 62 | + If Clipper cannot process a query within the latency objective, |
| 63 | + the default output is returned. Therefore, it is recommended that |
| 64 | + the SLO not be set aggressively low unless absolutely necessary. |
| 65 | + 100000 (100ms) is a good starting value, but the optimal latency objective |
| 66 | + will vary depending on the application. |
| 67 | + labels : list(str), optional |
| 68 | + A list of strings annotating the model. These are ignored by Clipper |
| 69 | + and used purely for user annotations. |
| 70 | + registry : str, optional |
| 71 | + The Docker container registry to push the freshly built model to. Note |
| 72 | + that if you are running Clipper on Kubernetes, this registry must be accesible |
| 73 | + to the Kubernetes cluster in order to fetch the container from the registry. |
| 74 | + base_image : str, optional |
| 75 | + The base Docker image to build the new model image from. This |
| 76 | + image should contain all code necessary to run a Clipper model |
| 77 | + container RPC client. |
| 78 | + num_replicas : int, optional |
| 79 | + The number of replicas of the model to create. The number of replicas |
| 80 | + for a model can be changed at any time with |
| 81 | + :py:meth:`clipper.ClipperConnection.set_num_replicas`. |
| 82 | + onnx_backend : str, optional |
| 83 | + The provided onnx backend.Caffe2 is the only currently supported ONNX backend. |
| 84 | + batch_size : int, optional |
| 85 | + The user-defined query batch size for the model. Replicas of the model will attempt |
| 86 | + to process at most `batch_size` queries simultaneously. They may process smaller |
| 87 | + batches if `batch_size` queries are not immediately available. |
| 88 | + If the default value of -1 is used, Clipper will adaptively calculate the batch size for individual |
| 89 | + replicas of this model. |
| 90 | + """ |
| 91 | + |
| 92 | + clipper_conn.register_application(name, input_type, default_output, |
| 93 | + slo_micros) |
| 94 | + deploy_pytorch_model(clipper_conn, name, version, input_type, inputs, func, |
| 95 | + pytorch_model, base_image, labels, registry, |
| 96 | + num_replicas, onnx_backend) |
| 97 | + |
| 98 | + clipper_conn.link_model_to_app(name, name) |
| 99 | + |
| 100 | + |
| 101 | +def deploy_pytorch_model(clipper_conn, |
| 102 | + name, |
| 103 | + version, |
| 104 | + input_type, |
| 105 | + inputs, |
| 106 | + func, |
| 107 | + pytorch_model, |
| 108 | + base_image=None, |
| 109 | + labels=None, |
| 110 | + registry=None, |
| 111 | + num_replicas=1, |
| 112 | + onnx_backend="caffe2", |
| 113 | + batch_size=-1): |
| 114 | + """This function deploys the prediction function with a PyTorch model. |
| 115 | + It serializes the PyTorch model in Onnx format and creates a container that loads it as a Caffe2 model. |
| 116 | + Parameters |
| 117 | + ---------- |
| 118 | + clipper_conn : :py:meth:`clipper_admin.ClipperConnection` |
| 119 | + A ``ClipperConnection`` object connected to a running Clipper cluster. |
| 120 | + name : str |
| 121 | + The name to be assigned to both the registered application and deployed model. |
| 122 | + version : str |
| 123 | + The version to assign this model. Versions must be unique on a per-model |
| 124 | + basis, but may be re-used across different models. |
| 125 | + input_type : str |
| 126 | + The input_type to be associated with the registered app and deployed model. |
| 127 | + One of "integers", "floats", "doubles", "bytes", or "strings". |
| 128 | + inputs : |
| 129 | + input of func. |
| 130 | + func : function |
| 131 | + The prediction function. Any state associated with the function will be |
| 132 | + captured via closure capture and pickled with Cloudpickle. |
| 133 | + pytorch_model : pytorch model object |
| 134 | + The Pytorch model to save. |
| 135 | + base_image : str, optional |
| 136 | + The base Docker image to build the new model image from. This |
| 137 | + image should contain all code necessary to run a Clipper model |
| 138 | + container RPC client. |
| 139 | + labels : list(str), optional |
| 140 | + A list of strings annotating the model. These are ignored by Clipper |
| 141 | + and used purely for user annotations. |
| 142 | + registry : str, optional |
| 143 | + The Docker container registry to push the freshly built model to. Note |
| 144 | + that if you are running Clipper on Kubernetes, this registry must be accesible |
| 145 | + to the Kubernetes cluster in order to fetch the container from the registry. |
| 146 | + num_replicas : int, optional |
| 147 | + The number of replicas of the model to create. The number of replicas |
| 148 | + for a model can be changed at any time with |
| 149 | + :py:meth:`clipper.ClipperConnection.set_num_replicas`. |
| 150 | + onnx_backend : str, optional |
| 151 | + The provided onnx backend.Caffe2 is the only currently supported ONNX backend. |
| 152 | + batch_size : int, optional |
| 153 | + The user-defined query batch size for the model. Replicas of the model will attempt |
| 154 | + to process at most `batch_size` queries simultaneously. They may process smaller |
| 155 | + batches if `batch_size` queries are not immediately available. |
| 156 | + If the default value of -1 is used, Clipper will adaptively calculate the batch size for individual |
| 157 | + replicas of this model. |
| 158 | + """ |
| 159 | + if base_image is None: |
| 160 | + if onnx_backend is "caffe2": |
| 161 | + base_image = "clipper/caffe2-onnx-container:{}".format(__version__) |
| 162 | + else: |
| 163 | + logger.error( |
| 164 | + "{backend} ONNX backend is not currently supported.".format( |
| 165 | + backend=onnx_backend)) |
| 166 | + |
| 167 | + serialization_dir = save_python_function(name, func) |
| 168 | + |
| 169 | + try: |
| 170 | + torch_out = torch.onnx._export( |
| 171 | + pytorch_model, inputs, "model.onnx", export_params=True) |
| 172 | + # Deploy model |
| 173 | + clipper_conn.build_and_deploy_model( |
| 174 | + name, version, input_type, serialization_dir, base_image, labels, |
| 175 | + registry, num_replicas, batch_size) |
| 176 | + |
| 177 | + except Exception as e: |
| 178 | + logger.error( |
| 179 | + "Error serializing PyTorch model to ONNX: {e}".format(e=e)) |
| 180 | + |
| 181 | + logger.info("Torch model has be serialized to ONNX format") |
| 182 | + |
| 183 | + # Remove temp files |
| 184 | + shutil.rmtree(serialization_dir) |
0 commit comments