Support for Generic Tensorflow Models (#319)

santi81 · dcrankshaw · commit 07ac4ca6d773 · 2017-11-16T10:55:54.000-08:00
diff --git a/bin/build_docker_images.sh b/bin/build_docker_images.sh
@@ -257,6 +257,7 @@ build_images () {
     create_image python-closure-container PyClosureContainerDockerfile $public
     create_image pyspark-container PySparkContainerDockerfile $public
     create_image tf_cifar_container TensorFlowCifarDockerfile $public
+    create_image tf-container TensorFlowDockerfile $public
 }
 
 
diff --git a/bin/run_unittests.sh b/bin/run_unittests.sh
@@ -140,6 +140,7 @@ function run_integration_tests {
   python ../integration-tests/deploy_pyspark_models.py
   python ../integration-tests/deploy_pyspark_pipeline_models.py
   python ../integration-tests/kubernetes_integration_test.py
+  python ../integration-tests/deploy_tensorflow_models.py
   ../integration-tests/r_integration_test/rclipper_test.sh
 }
 
diff --git a/clipper_admin/clipper_admin/deployers/tensorflow.py b/clipper_admin/clipper_admin/deployers/tensorflow.py
@@ -0,0 +1,176 @@
+from __future__ import print_function, with_statement, absolute_import
+import shutil
+import tensorflow as tf
+import logging
+import re
+import os
+import json
+
+from ..version import __version__
+from ..clipper_admin import ClipperException
+from .deployer_utils import save_python_function
+
+logger = logging.getLogger(__name__)
+
+
+def create_endpoint(clipper_conn,
+                    name,
+                    input_type,
+                    func,
+                    tf_sess,
+                    default_output="None",
+                    version=1,
+                    slo_micros=3000000,
+                    labels=None,
+                    registry=None,
+                    base_image="clipper/tf-container:{}".format(__version__),
+                    num_replicas=1):
+    """Registers an app and deploys the provided predict function with TensorFlow  model as
+    a Clipper model.
+
+    Parameters
+    ----------
+    clipper_conn : :py:meth:`clipper_admin.ClipperConnection`
+        A ``ClipperConnection`` object connected to a running Clipper cluster.
+    name : str
+        The name to be assigned to both the registered application and deployed model.
+    input_type : str
+        The input_type to be associated with the registered app and deployed model.
+        One of "integers", "floats", "doubles", "bytes", or "strings".
+    func : function
+        The prediction function. Any state associated with the function will be
+        captured via closure capture and pickled with Cloudpickle.
+    tf_sess : The Tensorflow Session to save.
+    default_output : str, optional
+        The default output for the application. The default output will be returned whenever
+        an application is unable to receive a response from a model within the specified
+        query latency SLO (service level objective). The reason the default output was returned
+        is always provided as part of the prediction response object. Defaults to "None".
+    version : str, optional
+        The version to assign this model. Versions must be unique on a per-model
+        basis, but may be re-used across different models.
+    slo_micros : int, optional
+        The query latency objective for the application in microseconds.
+        This is the processing latency between Clipper receiving a request
+        and sending a response. It does not account for network latencies
+        before a request is received or after a response is sent.
+        If Clipper cannot process a query within the latency objective,
+        the default output is returned. Therefore, it is recommended that
+        the SLO not be set aggressively low unless absolutely necessary.
+        100000 (100ms) is a good starting value, but the optimal latency objective
+        will vary depending on the application.
+    labels : list(str), optional
+        A list of strings annotating the model. These are ignored by Clipper
+        and used purely for user annotations.
+    registry : str, optional
+        The Docker container registry to push the freshly built model to. Note
+        that if you are running Clipper on Kubernetes, this registry must be accesible
+        to the Kubernetes cluster in order to fetch the container from the registry.
+    base_image : str, optional
+        The base Docker image to build the new model image from. This
+        image should contain all code necessary to run a Clipper model
+        container RPC client.
+    num_replicas : int, optional
+        The number of replicas of the model to create. The number of replicas
+        for a model can be changed at any time with
+        :py:meth:`clipper.ClipperConnection.set_num_replicas`.
+    """
+
+    clipper_conn.register_application(name, input_type, default_output,
+                                      slo_micros)
+    deploy_tensorflow_model(clipper_conn, name, version, input_type, func,
+                            tf_sess, base_image, labels, registry,
+                            num_replicas)
+
+    clipper_conn.link_model_to_app(name, name)
+
+
+def deploy_tensorflow_model(
+        clipper_conn,
+        name,
+        version,
+        input_type,
+        func,
+        tf_sess,
+        base_image="clipper/tf-container:{}".format(__version__),
+        labels=None,
+        registry=None,
+        num_replicas=1):
+    """Deploy a Python prediction function with a Tensorflow model.
+    Parameters
+    ----------
+    clipper_conn : :py:meth:`clipper_admin.ClipperConnection`
+        A ``ClipperConnection`` object connected to a running Clipper cluster.
+    name : str
+        The name to be assigned to both the registered application and deployed model.
+    version : str
+        The version to assign this model. Versions must be unique on a per-model
+        basis, but may be re-used across different models.
+    input_type : str
+        The input_type to be associated with the registered app and deployed model.
+        One of "integers", "floats", "doubles", "bytes", or "strings".
+    func : function
+        The prediction function. Any state associated with the function will be
+        captured via closure capture and pickled with Cloudpickle.
+    tf_sess : tensorflow.python.client.session.Session
+        The tensor flow session to save.
+    base_image : str, optional
+        The base Docker image to build the new model image from. This
+        image should contain all code necessary to run a Clipper model
+        container RPC client.
+    labels : list(str), optional
+        A list of strings annotating the model. These are ignored by Clipper
+        and used purely for user annotations.
+    registry : str, optional
+        The Docker container registry to push the freshly built model to. Note
+        that if you are running Clipper on Kubernetes, this registry must be accesible
+        to the Kubernetes cluster in order to fetch the container from the registry.
+    num_replicas : int, optional
+        The number of replicas of the model to create. The number of replicas
+        for a model can be changed at any time with
+        :py:meth:`clipper.ClipperConnection.set_num_replicas`.
+
+
+    Example
+    -------
+        from clipper_admin import ClipperConnection, DockerContainerManager
+        from clipper_admin.deployers.tensorflow import deploy_tensorflow_model
+
+        clipper_conn = ClipperConnection(DockerContainerManager())
+
+        # Connect to an already-running Clipper cluster
+        clipper_conn.connect()
+
+        def predict(sess, inputs):
+            preds = sess.run('predict_class:0', feed_dict={'pixels:0': inputs})
+            return [str(p) for p in preds]
+
+        deploy_tensorflow_model(
+        clipper_conn,
+        model_name,
+        version,
+        input_type,
+        predict_fn,
+        sess)
+
+    """
+    # save predict function
+    serialization_dir = save_python_function(name, func)
+    # save Tensorflow session
+    tf_sess_save_loc = os.path.join(serialization_dir, "tfmodel/model.ckpt")
+    try:
+        saver = tf.train.Saver()
+        save_path = saver.save(tf_sess, tf_sess_save_loc)
+    except Exception as e:
+        logger.warn("Error saving Tensorflow model: %s" % e)
+        raise e
+
+    logger.info("TensorFlow model saved at: %s " % save_path)
+
+    # Deploy model
+    clipper_conn.build_and_deploy_model(name, version, input_type,
+                                        serialization_dir, base_image, labels,
+                                        registry, num_replicas)
+
+    # Remove temp files
+    shutil.rmtree(serialization_dir)
diff --git a/clipper_admin/setup.py b/clipper_admin/setup.py
@@ -35,4 +35,5 @@
     ],
     extras_require={
         'PySpark': ['pyspark'],
+        'TensorFlow': ['tensorflow'],
     })
diff --git a/containers/python/tf_container.py b/containers/python/tf_container.py
@@ -0,0 +1,100 @@
+from __future__ import print_function
+import rpc
+import os
+import sys
+import tensorflow as tf
+
+from clipper_admin.deployers import cloudpickle
+
+
+def load_predict_func(file_path):
+    with open(file_path, 'r') as serialized_func_file:
+        return cloudpickle.load(serialized_func_file)
+
+
+class TfContainer(rpc.ModelContainerBase):
+    def __init__(self, path, input_type):
+        self.input_type = rpc.string_to_input_type(input_type)
+        modules_folder_path = "{dir}/modules/".format(dir=path)
+        sys.path.append(os.path.abspath(modules_folder_path))
+        predict_fname = "func.pkl"
+        predict_path = "{dir}/{predict_fname}".format(
+            dir=path, predict_fname=predict_fname)
+        self.predict_func = load_predict_func(predict_path)
+        self.sess = tf.Session(
+            '',
+            tf.Graph(),
+            config=tf.ConfigProto(
+                allow_soft_placement=True, log_device_placement=True))
+        metagraph_path = os.path.join(path, "tfmodel/model.ckpt.meta")
+        checkpoint_path = os.path.join(path, "tfmodel/model.ckpt")
+        with tf.device("/gpu:0"):
+            with self.sess.graph.as_default():
+                saver = tf.train.import_meta_graph(
+                    metagraph_path, clear_devices=True)
+                saver.restore(self.sess, checkpoint_path)
+
+    def predict_ints(self, inputs):
+        preds = self.predict_func(self.sess, inputs)
+        return [str(p) for p in preds]
+
+    def predict_floats(self, inputs):
+        preds = self.predict_func(self.sess, inputs)
+        return [str(p) for p in preds]
+
+    def predict_doubles(self, inputs):
+        preds = self.predict_func(self.sess, inputs)
+        return [str(p) for p in preds]
+
+    def predict_bytes(self, inputs):
+        preds = self.predict_func(self.sess, inputs)
+        return [str(p) for p in preds]
+
+    def predict_strings(self, inputs):
+        preds = self.predict_func(self.sess, inputs)
+        return [str(p) for p in preds]
+
+
+if __name__ == "__main__":
+    print("Starting TensorFlow container")
+    try:
+        model_name = os.environ["CLIPPER_MODEL_NAME"]
+    except KeyError:
+        print(
+            "ERROR: CLIPPER_MODEL_NAME environment variable must be set",
+            file=sys.stdout)
+        sys.exit(1)
+    try:
+        model_version = os.environ["CLIPPER_MODEL_VERSION"]
+    except KeyError:
+        print(
+            "ERROR: CLIPPER_MODEL_VERSION environment variable must be set",
+            file=sys.stdout)
+        sys.exit(1)
+
+    ip = "127.0.0.1"
+    if "CLIPPER_IP" in os.environ:
+        ip = os.environ["CLIPPER_IP"]
+    else:
+        print("Connecting to Clipper on localhost")
+
+    port = 7000
+    if "CLIPPER_PORT" in os.environ:
+        port = int(os.environ["CLIPPER_PORT"])
+    else:
+        print("Connecting to Clipper with default port: 7000")
+
+    input_type = "doubles"
+    if "CLIPPER_INPUT_TYPE" in os.environ:
+        input_type = os.environ["CLIPPER_INPUT_TYPE"]
+    else:
+        print("Using default input type: doubles")
+
+    model_dir_path = os.environ["CLIPPER_MODEL_PATH"]
+    model_files = os.listdir(model_dir_path)
+    assert len(model_files) >= 2
+    fname = os.path.splitext(model_files[0])[0]
+    full_fname = os.path.join(model_dir_path, fname)
+    model = TfContainer(model_dir_path, input_type)
+    rpc_service = rpc.RPCService()
+    rpc_service.start(model, ip, port, model_name, model_version, input_type)
diff --git a/containers/python/tf_container_entry.sh b/containers/python/tf_container_entry.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env sh
+
+IMPORT_ERROR_RETURN_CODE=3
+
+echo "Attempting to run TensorFlow container without installing any dependencies"
+echo "Contents of /model"
+ls /model/
+
+/bin/bash -c "exec python /container/tf_container.py"
+if [ $? -eq $IMPORT_ERROR_RETURN_CODE ]; then
+	echo "Running TensorFlow container without installing dependencies fails"
+	echo "Will install dependencies and try again"
+  conda install -y --file /model/conda_dependencies.txt
+  pip install -r /model/pip_dependencies.txt
+  /bin/bash -c "exec python /container/tf_container.py"
+fi
diff --git a/dockerfiles/ClipperTestsDockerfile b/dockerfiles/ClipperTestsDockerfile
@@ -17,12 +17,12 @@ RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh \
 ENV PATH "/opt/conda/bin:$PATH"
 RUN conda install -y libgcc pyzmq
  
-RUN pip install requests subprocess32 scikit-learn numpy pyyaml docker kubernetes pyspark
+RUN pip install requests subprocess32 scikit-learn numpy pyyaml docker kubernetes pyspark tensorflow
 
 # Install maven
 ARG MAVEN_VERSION=3.5.0
 ARG SHA=beb91419245395bd69a4a6edad5ca3ec1a8b64e41457672dc687c173a495f034
-ARG BASE_URL=https://apache.osuosl.org/maven/maven-3/${MAVEN_VERSION}/binaries
+ARG BASE_URL=https://archive.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries
 
 RUN mkdir -p /usr/share/maven /usr/share/maven/ref \
   && curl -fsSL -o /tmp/apache-maven.tar.gz ${BASE_URL}/apache-maven-$MAVEN_VERSION-bin.tar.gz \
diff --git a/dockerfiles/SparkScalaContainerDockerfile b/dockerfiles/SparkScalaContainerDockerfile
@@ -5,7 +5,7 @@ FROM openjdk:8-jdk
 # First set up maven
 ARG MAVEN_VERSION=3.5.0
 ARG SHA=beb91419245395bd69a4a6edad5ca3ec1a8b64e41457672dc687c173a495f034
-ARG BASE_URL=https://apache.osuosl.org/maven/maven-3/${MAVEN_VERSION}/binaries
+ARG BASE_URL=https://archive.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries
 
 RUN mkdir -p /usr/share/maven /usr/share/maven/ref \
   && curl -fsSL -o /tmp/apache-maven.tar.gz ${BASE_URL}/apache-maven-$MAVEN_VERSION-bin.tar.gz \
diff --git a/dockerfiles/TensorFlowDockerfile b/dockerfiles/TensorFlowDockerfile
@@ -0,0 +1,15 @@
+ARG CODE_VERSION
+FROM clipper/py-rpc:${CODE_VERSION}
+
+COPY clipper_admin/clipper_admin/python_container_conda_deps.txt /lib/
+RUN conda install -y --file /lib/python_container_conda_deps.txt
+
+RUN conda install tensorflow
+
+COPY containers/python/tf_container.py containers/python/tf_container_entry.sh /container/
+COPY clipper_admin/ /lib/clipper_admin
+RUN pip install /lib/clipper_admin
+
+CMD ["/container/tf_container_entry.sh"]
+
+# vim: set filetype=dockerfile:
diff --git a/integration-tests/deploy_tensorflow_models.py b/integration-tests/deploy_tensorflow_models.py

Original file line number	Diff line number	Diff line change
`@@ -257,6 +257,7 @@ build_images () {`
`257`	`257`	`create_image python-closure-container PyClosureContainerDockerfile $public`
`258`	`258`	`create_image pyspark-container PySparkContainerDockerfile $public`
`259`	`259`	`create_image tf_cifar_container TensorFlowCifarDockerfile $public`
	`260`	`+ create_image tf-container TensorFlowDockerfile $public`
`260`	`261`	`}`
`261`	`262`
`262`	`263`
Original file line number	Diff line number	Diff line change
`@@ -140,6 +140,7 @@ function run_integration_tests {`
`140`	`140`	`python ../integration-tests/deploy_pyspark_models.py`
`141`	`141`	`python ../integration-tests/deploy_pyspark_pipeline_models.py`
`142`	`142`	`python ../integration-tests/kubernetes_integration_test.py`
	`143`	`+ python ../integration-tests/deploy_tensorflow_models.py`
`143`	`144`	`../integration-tests/r_integration_test/rclipper_test.sh`
`144`	`145`	`}`
`145`	`146`