PyTorch + ONNX + Caffe2 Model deployer (#362)

haofanwang · dcrankshaw · commit f8ce8ec287be · 2018-02-23T12:47:04.000-08:00
* update caffe2 deployer

* update caffe2 container

* update caffe2 container

* update Caffe2Dockerfile

* update deploy_caffe2_models.py

* Update build_docker_images.sh

* Format code

* Update caffe2 container entrypoint permissions

* Update Caffe2Dockerfile

* Update caffe2_container.py

* Update caffe2.py

* Update build_docker_images.sh

* Rename caffe2.py to onnx.py

* Update onnx.py

* Update and rename caffe2_container.py to caffe2_onnx_container.py

* Update and rename caffe2_container_entry.sh to caffe2_onnx_container_entry.sh

* Rename Caffe2Dockerfile to Caffe2OnnxDockerfile

* Rename deploy_caffe2_models.py to deploy_pytorch_to_caffe2_with_onnx.py

* Update caffe2_onnx_container_entry.sh

* Update Caffe2OnnxDockerfile

* Update deploy_pytorch_to_caffe2_with_onnx.py

* Update onnx.py

* Update onnx.py

* Update caffe2_onnx_container_entry.sh

* Update onnx.py

* Update onnx.py

* Update deploy_pytorch_to_caffe2_with_onnx.py

* Update onnx.py

* Update deploy_pytorch_to_caffe2_with_onnx.py

* Update onnx.py

* Update deploy_pytorch_to_caffe2_with_onnx.py

* Support PyTorch + ONNX + Caffe2 Model deployer

* Support PyTorch + ONNX + Caffe2 Model deployer

* Update onnx.py
diff --git a/bin/build_docker_images.sh b/bin/build_docker_images.sh
@@ -259,6 +259,7 @@ build_images () {
     create_image tf_cifar_container TensorFlowCifarDockerfile $public
     create_image tf-container TensorFlowDockerfile $public
     create_image pytorch-container PyTorchContainerDockerfile $public
+    create_image caffe2-onnx-container Caffe2OnnxDockerfile $public
     create_image mxnet-container MXNetContainerDockerfile $public
 
     # Build Metric Monitor image - no dependency
diff --git a/clipper_admin/clipper_admin/deployers/onnx.py b/clipper_admin/clipper_admin/deployers/onnx.py
@@ -0,0 +1,184 @@
+from __future__ import print_function, with_statement, absolute_import
+import shutil
+import torch
+import logging
+import re
+import os
+import json
+
+from ..version import __version__
+from ..clipper_admin import ClipperException
+from .deployer_utils import save_python_function, serialize_object
+
+logger = logging.getLogger(__name__)
+
+
+def create_pytorch_endpoint(clipper_conn,
+                            name,
+                            input_type,
+                            inputs,
+                            func,
+                            pytorch_model,
+                            default_output="None",
+                            version=1,
+                            slo_micros=3000000,
+                            labels=None,
+                            registry=None,
+                            base_image=None,
+                            num_replicas=1,
+                            onnx_backend="caffe2",
+                            batch_size=-1):
+    """This function deploys the prediction function with a PyTorch model.
+    It serializes the PyTorch model in Onnx format and creates a container that loads it as a Caffe2 model.
+    Parameters
+    ----------
+    clipper_conn : :py:meth:`clipper_admin.ClipperConnection`
+        A ``ClipperConnection`` object connected to a running Clipper cluster.
+    name : str
+        The name to be assigned to both the registered application and deployed model.
+    input_type : str
+        The input_type to be associated with the registered app and deployed model.
+        One of "integers", "floats", "doubles", "bytes", or "strings".
+    inputs :
+        input of func.
+    func : function
+        The prediction function. Any state associated with the function will be
+        captured via closure capture and pickled with Cloudpickle.
+    pytorch_model : pytorch model object
+        The PyTorch model to save.
+    default_output : str, optional
+        The default output for the application. The default output will be returned whenever
+        an application is unable to receive a response from a model within the specified
+        query latency SLO (service level objective). The reason the default output was returned
+        is always provided as part of the prediction response object. Defaults to "None".
+    version : str, optional
+        The version to assign this model. Versions must be unique on a per-model
+        basis, but may be re-used across different models.
+    slo_micros : int, optional
+        The query latency objective for the application in microseconds.
+        This is the processing latency between Clipper receiving a request
+        and sending a response. It does not account for network latencies
+        before a request is received or after a response is sent.
+        If Clipper cannot process a query within the latency objective,
+        the default output is returned. Therefore, it is recommended that
+        the SLO not be set aggressively low unless absolutely necessary.
+        100000 (100ms) is a good starting value, but the optimal latency objective
+        will vary depending on the application.
+    labels : list(str), optional
+        A list of strings annotating the model. These are ignored by Clipper
+        and used purely for user annotations.
+    registry : str, optional
+        The Docker container registry to push the freshly built model to. Note
+        that if you are running Clipper on Kubernetes, this registry must be accesible
+        to the Kubernetes cluster in order to fetch the container from the registry.
+    base_image : str, optional
+        The base Docker image to build the new model image from. This
+        image should contain all code necessary to run a Clipper model
+        container RPC client.
+    num_replicas : int, optional
+        The number of replicas of the model to create. The number of replicas
+        for a model can be changed at any time with
+        :py:meth:`clipper.ClipperConnection.set_num_replicas`.
+    onnx_backend : str, optional
+        The provided onnx backend.Caffe2 is the only currently supported ONNX backend.
+    batch_size : int, optional
+        The user-defined query batch size for the model. Replicas of the model will attempt
+        to process at most `batch_size` queries simultaneously. They may process smaller 
+        batches if `batch_size` queries are not immediately available.
+        If the default value of -1 is used, Clipper will adaptively calculate the batch size for individual
+        replicas of this model.
+    """
+
+    clipper_conn.register_application(name, input_type, default_output,
+                                      slo_micros)
+    deploy_pytorch_model(clipper_conn, name, version, input_type, inputs, func,
+                         pytorch_model, base_image, labels, registry,
+                         num_replicas, onnx_backend)
+
+    clipper_conn.link_model_to_app(name, name)
+
+
+def deploy_pytorch_model(clipper_conn,
+                         name,
+                         version,
+                         input_type,
+                         inputs,
+                         func,
+                         pytorch_model,
+                         base_image=None,
+                         labels=None,
+                         registry=None,
+                         num_replicas=1,
+                         onnx_backend="caffe2",
+                         batch_size=-1):
+    """This function deploys the prediction function with a PyTorch model.
+    It serializes the PyTorch model in Onnx format and creates a container that loads it as a Caffe2 model.
+    Parameters
+    ----------
+    clipper_conn : :py:meth:`clipper_admin.ClipperConnection`
+        A ``ClipperConnection`` object connected to a running Clipper cluster.
+    name : str
+        The name to be assigned to both the registered application and deployed model.
+    version : str
+        The version to assign this model. Versions must be unique on a per-model
+        basis, but may be re-used across different models.
+    input_type : str
+        The input_type to be associated with the registered app and deployed model.
+        One of "integers", "floats", "doubles", "bytes", or "strings".
+    inputs :
+        input of func.
+    func : function
+        The prediction function. Any state associated with the function will be
+        captured via closure capture and pickled with Cloudpickle.
+    pytorch_model : pytorch model object
+        The Pytorch model to save.
+    base_image : str, optional
+        The base Docker image to build the new model image from. This
+        image should contain all code necessary to run a Clipper model
+        container RPC client.
+    labels : list(str), optional
+        A list of strings annotating the model. These are ignored by Clipper
+        and used purely for user annotations.
+    registry : str, optional
+        The Docker container registry to push the freshly built model to. Note
+        that if you are running Clipper on Kubernetes, this registry must be accesible
+        to the Kubernetes cluster in order to fetch the container from the registry.
+    num_replicas : int, optional
+        The number of replicas of the model to create. The number of replicas
+        for a model can be changed at any time with
+        :py:meth:`clipper.ClipperConnection.set_num_replicas`.
+    onnx_backend : str, optional
+        The provided onnx backend.Caffe2 is the only currently supported ONNX backend.
+    batch_size : int, optional
+        The user-defined query batch size for the model. Replicas of the model will attempt
+        to process at most `batch_size` queries simultaneously. They may process smaller 
+        batches if `batch_size` queries are not immediately available.
+        If the default value of -1 is used, Clipper will adaptively calculate the batch size for individual
+        replicas of this model.
+    """
+    if base_image is None:
+        if onnx_backend is "caffe2":
+            base_image = "clipper/caffe2-onnx-container:{}".format(__version__)
+        else:
+            logger.error(
+                "{backend} ONNX backend is not currently supported.".format(
+                    backend=onnx_backend))
+
+    serialization_dir = save_python_function(name, func)
+
+    try:
+        torch_out = torch.onnx._export(
+            pytorch_model, inputs, "model.onnx", export_params=True)
+        # Deploy model
+        clipper_conn.build_and_deploy_model(
+            name, version, input_type, serialization_dir, base_image, labels,
+            registry, num_replicas, batch_size)
+
+    except Exception as e:
+        logger.error(
+            "Error serializing PyTorch model to ONNX: {e}".format(e=e))
+
+    logger.info("Torch model has be serialized to ONNX format")
+
+    # Remove temp files
+    shutil.rmtree(serialization_dir)
diff --git a/containers/python/caffe2_onnx_container.py b/containers/python/caffe2_onnx_container.py
@@ -0,0 +1,115 @@
+from __future__ import print_function
+import rpc
+import os
+import sys
+import json
+
+import numpy as np
+
+import cloudpickle
+
+import onnx
+import onnx_caffe2.backend
+
+import importlib
+
+IMPORT_ERROR_RETURN_CODE = 3
+
+MODEL_RELATIVE_PATH = "model.onnx"
+
+
+def load_predict_func(file_path):
+    with open(file_path, 'r') as serialized_func_file:
+        return cloudpickle.load(serialized_func_file)
+
+
+def load_onnx_into_caffe2_model(model_path):
+    model = onnx.load(model_path)
+    prepared_backend = onnx_caffe2.backend.prepare(model, device="CPU")
+    return prepared_backend
+
+
+class Caffe2Container(rpc.ModelContainerBase):
+    def __init__(self, path, input_type):
+        self.input_type = rpc.string_to_input_type(input_type)
+        modules_folder_path = "{dir}/modules/".format(dir=path)
+        sys.path.append(os.path.abspath(modules_folder_path))
+        predict_fname = "func.pkl"
+        predict_path = "{dir}/{predict_fname}".format(
+            dir=path, predict_fname=predict_fname)
+        self.predict_func = load_predict_func(predict_path)
+
+        onnx_path = os.path.join(path, MODEL_RELATIVE_PATH)
+
+        self.model = load_onnx_into_caffe2_model(onnx_path)
+
+    def predict_ints(self, inputs):
+        preds = self.predict_func(self.model, inputs)
+        return [str(p) for p in preds]
+
+    def predict_floats(self, inputs):
+        preds = self.predict_func(self.model, inputs)
+        return [str(p) for p in preds]
+
+    def predict_doubles(self, inputs):
+        preds = self.predict_func(self.model, inputs)
+        return [str(p) for p in preds]
+
+    def predict_bytes(self, inputs):
+        preds = self.predict_func(self.model, inputs)
+        return [str(p) for p in preds]
+
+    def predict_strings(self, inputs):
+        preds = self.predict_func(self.model, inputs)
+        return [str(p) for p in preds]
+
+
+if __name__ == "__main__":
+    print("Starting Caffe2Container container")
+    try:
+        model_name = os.environ["CLIPPER_MODEL_NAME"]
+    except KeyError:
+        print(
+            "ERROR: CLIPPER_MODEL_NAME environment variable must be set",
+            file=sys.stdout)
+        sys.exit(1)
+    try:
+        model_version = os.environ["CLIPPER_MODEL_VERSION"]
+    except KeyError:
+        print(
+            "ERROR: CLIPPER_MODEL_VERSION environment variable must be set",
+            file=sys.stdout)
+        sys.exit(1)
+
+    ip = "127.0.0.1"
+    if "CLIPPER_IP" in os.environ:
+        ip = os.environ["CLIPPER_IP"]
+    else:
+        print("Connecting to Clipper on localhost")
+
+    port = 7000
+    if "CLIPPER_PORT" in os.environ:
+        port = int(os.environ["CLIPPER_PORT"])
+    else:
+        print("Connecting to Clipper with default port: {port}".format(
+            port=port))
+
+    input_type = "doubles"
+    if "CLIPPER_INPUT_TYPE" in os.environ:
+        input_type = os.environ["CLIPPER_INPUT_TYPE"]
+    else:
+        print("Using default input type: doubles")
+
+    model_path = os.environ["CLIPPER_MODEL_PATH"]
+
+    print("Initializing Caffe2 ONNX container")
+    sys.stdout.flush()
+    sys.stderr.flush()
+
+    try:
+        model = Caffe2Container(model_path, input_type)
+        rpc_service = rpc.RPCService()
+        rpc_service.start(model, ip, port, model_name, model_version,
+                          input_type)
+    except ImportError:
+        sys.exit(IMPORT_ERROR_RETURN_CODE)
diff --git a/containers/python/caffe2_onnx_container_entry.sh b/containers/python/caffe2_onnx_container_entry.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env sh
+
+IMPORT_ERROR_RETURN_CODE=3
+
+echo "Attempting to run Caffe2 ONNX container without installing dependencies"
+echo "Contents of /model"
+ls /model/
+/bin/bash -c "exec python /container/caffe2_onnx_container.py"
+if [ $? -eq $IMPORT_ERROR_RETURN_CODE ]; then
+	echo "Running Caffe2 ONNX container without installing dependencies fails"
+	echo "Will install dependencies and try again"
+  conda install -y --file /model/conda_dependencies.txt
+  pip install -r /model/pip_dependencies.txt
+  /bin/bash -c "exec python /container/caffe2_onnx_container.py"
+fi
diff --git a/dockerfiles/Caffe2OnnxDockerfile b/dockerfiles/Caffe2OnnxDockerfile
@@ -0,0 +1,22 @@
+ARG CODE_VERSION
+FROM clipper/py-rpc:${CODE_VERSION}
+
+COPY clipper_admin/clipper_admin/python_container_conda_deps.txt /lib/
+
+RUN echo deb http://ftp.de.debian.org/debian jessie-backports main >> /etc/apt/sources.list \
+      && apt-get update --fix-missing \
+      && apt-get install -yqq -t jessie-backports openjdk-8-jdk \
+      && conda install -y --file /lib/python_container_conda_deps.txt \
+      && conda install -c anaconda cloudpickle=0.5.2 \
+      && conda install -c conda-forge onnx \
+      && conda install -c caffe2 caffe2 \
+      && pip install onnx-caffe2 \
+      && conda install -c pytorch pytorch torchvision
+
+COPY containers/python/caffe2_onnx_container.py containers/python/caffe2_onnx_container_entry.sh /container/
+COPY clipper_admin/ /lib/clipper_admin
+RUN pip install /lib/clipper_admin
+
+CMD ["/container/caffe2_onnx_container_entry.sh"]
+
+# vim: set filetype=dockerfile:
diff --git a/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py b/integration-tests/deploy_pytorch_to_caffe2_with_onnx.py