Skip to content

Commit 07ac4ca

Browse files
santi81dcrankshaw
authored andcommitted
Support for Generic Tensorflow Models (#319)
1 parent 25e4f15 commit 07ac4ca

10 files changed

Lines changed: 514 additions & 3 deletions

File tree

bin/build_docker_images.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ build_images () {
257257
create_image python-closure-container PyClosureContainerDockerfile $public
258258
create_image pyspark-container PySparkContainerDockerfile $public
259259
create_image tf_cifar_container TensorFlowCifarDockerfile $public
260+
create_image tf-container TensorFlowDockerfile $public
260261
}
261262

262263

bin/run_unittests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ function run_integration_tests {
140140
python ../integration-tests/deploy_pyspark_models.py
141141
python ../integration-tests/deploy_pyspark_pipeline_models.py
142142
python ../integration-tests/kubernetes_integration_test.py
143+
python ../integration-tests/deploy_tensorflow_models.py
143144
../integration-tests/r_integration_test/rclipper_test.sh
144145
}
145146

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
from __future__ import print_function, with_statement, absolute_import
2+
import shutil
3+
import tensorflow as tf
4+
import logging
5+
import re
6+
import os
7+
import json
8+
9+
from ..version import __version__
10+
from ..clipper_admin import ClipperException
11+
from .deployer_utils import save_python_function
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
def create_endpoint(clipper_conn,
17+
name,
18+
input_type,
19+
func,
20+
tf_sess,
21+
default_output="None",
22+
version=1,
23+
slo_micros=3000000,
24+
labels=None,
25+
registry=None,
26+
base_image="clipper/tf-container:{}".format(__version__),
27+
num_replicas=1):
28+
"""Registers an app and deploys the provided predict function with TensorFlow model as
29+
a Clipper model.
30+
31+
Parameters
32+
----------
33+
clipper_conn : :py:meth:`clipper_admin.ClipperConnection`
34+
A ``ClipperConnection`` object connected to a running Clipper cluster.
35+
name : str
36+
The name to be assigned to both the registered application and deployed model.
37+
input_type : str
38+
The input_type to be associated with the registered app and deployed model.
39+
One of "integers", "floats", "doubles", "bytes", or "strings".
40+
func : function
41+
The prediction function. Any state associated with the function will be
42+
captured via closure capture and pickled with Cloudpickle.
43+
tf_sess : The Tensorflow Session to save.
44+
default_output : str, optional
45+
The default output for the application. The default output will be returned whenever
46+
an application is unable to receive a response from a model within the specified
47+
query latency SLO (service level objective). The reason the default output was returned
48+
is always provided as part of the prediction response object. Defaults to "None".
49+
version : str, optional
50+
The version to assign this model. Versions must be unique on a per-model
51+
basis, but may be re-used across different models.
52+
slo_micros : int, optional
53+
The query latency objective for the application in microseconds.
54+
This is the processing latency between Clipper receiving a request
55+
and sending a response. It does not account for network latencies
56+
before a request is received or after a response is sent.
57+
If Clipper cannot process a query within the latency objective,
58+
the default output is returned. Therefore, it is recommended that
59+
the SLO not be set aggressively low unless absolutely necessary.
60+
100000 (100ms) is a good starting value, but the optimal latency objective
61+
will vary depending on the application.
62+
labels : list(str), optional
63+
A list of strings annotating the model. These are ignored by Clipper
64+
and used purely for user annotations.
65+
registry : str, optional
66+
The Docker container registry to push the freshly built model to. Note
67+
that if you are running Clipper on Kubernetes, this registry must be accesible
68+
to the Kubernetes cluster in order to fetch the container from the registry.
69+
base_image : str, optional
70+
The base Docker image to build the new model image from. This
71+
image should contain all code necessary to run a Clipper model
72+
container RPC client.
73+
num_replicas : int, optional
74+
The number of replicas of the model to create. The number of replicas
75+
for a model can be changed at any time with
76+
:py:meth:`clipper.ClipperConnection.set_num_replicas`.
77+
"""
78+
79+
clipper_conn.register_application(name, input_type, default_output,
80+
slo_micros)
81+
deploy_tensorflow_model(clipper_conn, name, version, input_type, func,
82+
tf_sess, base_image, labels, registry,
83+
num_replicas)
84+
85+
clipper_conn.link_model_to_app(name, name)
86+
87+
88+
def deploy_tensorflow_model(
89+
clipper_conn,
90+
name,
91+
version,
92+
input_type,
93+
func,
94+
tf_sess,
95+
base_image="clipper/tf-container:{}".format(__version__),
96+
labels=None,
97+
registry=None,
98+
num_replicas=1):
99+
"""Deploy a Python prediction function with a Tensorflow model.
100+
Parameters
101+
----------
102+
clipper_conn : :py:meth:`clipper_admin.ClipperConnection`
103+
A ``ClipperConnection`` object connected to a running Clipper cluster.
104+
name : str
105+
The name to be assigned to both the registered application and deployed model.
106+
version : str
107+
The version to assign this model. Versions must be unique on a per-model
108+
basis, but may be re-used across different models.
109+
input_type : str
110+
The input_type to be associated with the registered app and deployed model.
111+
One of "integers", "floats", "doubles", "bytes", or "strings".
112+
func : function
113+
The prediction function. Any state associated with the function will be
114+
captured via closure capture and pickled with Cloudpickle.
115+
tf_sess : tensorflow.python.client.session.Session
116+
The tensor flow session to save.
117+
base_image : str, optional
118+
The base Docker image to build the new model image from. This
119+
image should contain all code necessary to run a Clipper model
120+
container RPC client.
121+
labels : list(str), optional
122+
A list of strings annotating the model. These are ignored by Clipper
123+
and used purely for user annotations.
124+
registry : str, optional
125+
The Docker container registry to push the freshly built model to. Note
126+
that if you are running Clipper on Kubernetes, this registry must be accesible
127+
to the Kubernetes cluster in order to fetch the container from the registry.
128+
num_replicas : int, optional
129+
The number of replicas of the model to create. The number of replicas
130+
for a model can be changed at any time with
131+
:py:meth:`clipper.ClipperConnection.set_num_replicas`.
132+
133+
134+
Example
135+
-------
136+
from clipper_admin import ClipperConnection, DockerContainerManager
137+
from clipper_admin.deployers.tensorflow import deploy_tensorflow_model
138+
139+
clipper_conn = ClipperConnection(DockerContainerManager())
140+
141+
# Connect to an already-running Clipper cluster
142+
clipper_conn.connect()
143+
144+
def predict(sess, inputs):
145+
preds = sess.run('predict_class:0', feed_dict={'pixels:0': inputs})
146+
return [str(p) for p in preds]
147+
148+
deploy_tensorflow_model(
149+
clipper_conn,
150+
model_name,
151+
version,
152+
input_type,
153+
predict_fn,
154+
sess)
155+
156+
"""
157+
# save predict function
158+
serialization_dir = save_python_function(name, func)
159+
# save Tensorflow session
160+
tf_sess_save_loc = os.path.join(serialization_dir, "tfmodel/model.ckpt")
161+
try:
162+
saver = tf.train.Saver()
163+
save_path = saver.save(tf_sess, tf_sess_save_loc)
164+
except Exception as e:
165+
logger.warn("Error saving Tensorflow model: %s" % e)
166+
raise e
167+
168+
logger.info("TensorFlow model saved at: %s " % save_path)
169+
170+
# Deploy model
171+
clipper_conn.build_and_deploy_model(name, version, input_type,
172+
serialization_dir, base_image, labels,
173+
registry, num_replicas)
174+
175+
# Remove temp files
176+
shutil.rmtree(serialization_dir)

clipper_admin/setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,5 @@
3535
],
3636
extras_require={
3737
'PySpark': ['pyspark'],
38+
'TensorFlow': ['tensorflow'],
3839
})

containers/python/tf_container.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
from __future__ import print_function
2+
import rpc
3+
import os
4+
import sys
5+
import tensorflow as tf
6+
7+
from clipper_admin.deployers import cloudpickle
8+
9+
10+
def load_predict_func(file_path):
11+
with open(file_path, 'r') as serialized_func_file:
12+
return cloudpickle.load(serialized_func_file)
13+
14+
15+
class TfContainer(rpc.ModelContainerBase):
16+
def __init__(self, path, input_type):
17+
self.input_type = rpc.string_to_input_type(input_type)
18+
modules_folder_path = "{dir}/modules/".format(dir=path)
19+
sys.path.append(os.path.abspath(modules_folder_path))
20+
predict_fname = "func.pkl"
21+
predict_path = "{dir}/{predict_fname}".format(
22+
dir=path, predict_fname=predict_fname)
23+
self.predict_func = load_predict_func(predict_path)
24+
self.sess = tf.Session(
25+
'',
26+
tf.Graph(),
27+
config=tf.ConfigProto(
28+
allow_soft_placement=True, log_device_placement=True))
29+
metagraph_path = os.path.join(path, "tfmodel/model.ckpt.meta")
30+
checkpoint_path = os.path.join(path, "tfmodel/model.ckpt")
31+
with tf.device("/gpu:0"):
32+
with self.sess.graph.as_default():
33+
saver = tf.train.import_meta_graph(
34+
metagraph_path, clear_devices=True)
35+
saver.restore(self.sess, checkpoint_path)
36+
37+
def predict_ints(self, inputs):
38+
preds = self.predict_func(self.sess, inputs)
39+
return [str(p) for p in preds]
40+
41+
def predict_floats(self, inputs):
42+
preds = self.predict_func(self.sess, inputs)
43+
return [str(p) for p in preds]
44+
45+
def predict_doubles(self, inputs):
46+
preds = self.predict_func(self.sess, inputs)
47+
return [str(p) for p in preds]
48+
49+
def predict_bytes(self, inputs):
50+
preds = self.predict_func(self.sess, inputs)
51+
return [str(p) for p in preds]
52+
53+
def predict_strings(self, inputs):
54+
preds = self.predict_func(self.sess, inputs)
55+
return [str(p) for p in preds]
56+
57+
58+
if __name__ == "__main__":
59+
print("Starting TensorFlow container")
60+
try:
61+
model_name = os.environ["CLIPPER_MODEL_NAME"]
62+
except KeyError:
63+
print(
64+
"ERROR: CLIPPER_MODEL_NAME environment variable must be set",
65+
file=sys.stdout)
66+
sys.exit(1)
67+
try:
68+
model_version = os.environ["CLIPPER_MODEL_VERSION"]
69+
except KeyError:
70+
print(
71+
"ERROR: CLIPPER_MODEL_VERSION environment variable must be set",
72+
file=sys.stdout)
73+
sys.exit(1)
74+
75+
ip = "127.0.0.1"
76+
if "CLIPPER_IP" in os.environ:
77+
ip = os.environ["CLIPPER_IP"]
78+
else:
79+
print("Connecting to Clipper on localhost")
80+
81+
port = 7000
82+
if "CLIPPER_PORT" in os.environ:
83+
port = int(os.environ["CLIPPER_PORT"])
84+
else:
85+
print("Connecting to Clipper with default port: 7000")
86+
87+
input_type = "doubles"
88+
if "CLIPPER_INPUT_TYPE" in os.environ:
89+
input_type = os.environ["CLIPPER_INPUT_TYPE"]
90+
else:
91+
print("Using default input type: doubles")
92+
93+
model_dir_path = os.environ["CLIPPER_MODEL_PATH"]
94+
model_files = os.listdir(model_dir_path)
95+
assert len(model_files) >= 2
96+
fname = os.path.splitext(model_files[0])[0]
97+
full_fname = os.path.join(model_dir_path, fname)
98+
model = TfContainer(model_dir_path, input_type)
99+
rpc_service = rpc.RPCService()
100+
rpc_service.start(model, ip, port, model_name, model_version, input_type)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/usr/bin/env sh
2+
3+
IMPORT_ERROR_RETURN_CODE=3
4+
5+
echo "Attempting to run TensorFlow container without installing any dependencies"
6+
echo "Contents of /model"
7+
ls /model/
8+
9+
/bin/bash -c "exec python /container/tf_container.py"
10+
if [ $? -eq $IMPORT_ERROR_RETURN_CODE ]; then
11+
echo "Running TensorFlow container without installing dependencies fails"
12+
echo "Will install dependencies and try again"
13+
conda install -y --file /model/conda_dependencies.txt
14+
pip install -r /model/pip_dependencies.txt
15+
/bin/bash -c "exec python /container/tf_container.py"
16+
fi

dockerfiles/ClipperTestsDockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh \
1717
ENV PATH "/opt/conda/bin:$PATH"
1818
RUN conda install -y libgcc pyzmq
1919

20-
RUN pip install requests subprocess32 scikit-learn numpy pyyaml docker kubernetes pyspark
20+
RUN pip install requests subprocess32 scikit-learn numpy pyyaml docker kubernetes pyspark tensorflow
2121

2222
# Install maven
2323
ARG MAVEN_VERSION=3.5.0
2424
ARG SHA=beb91419245395bd69a4a6edad5ca3ec1a8b64e41457672dc687c173a495f034
25-
ARG BASE_URL=https://apache.osuosl.org/maven/maven-3/${MAVEN_VERSION}/binaries
25+
ARG BASE_URL=https://archive.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries
2626

2727
RUN mkdir -p /usr/share/maven /usr/share/maven/ref \
2828
&& curl -fsSL -o /tmp/apache-maven.tar.gz ${BASE_URL}/apache-maven-$MAVEN_VERSION-bin.tar.gz \

dockerfiles/SparkScalaContainerDockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ FROM openjdk:8-jdk
55
# First set up maven
66
ARG MAVEN_VERSION=3.5.0
77
ARG SHA=beb91419245395bd69a4a6edad5ca3ec1a8b64e41457672dc687c173a495f034
8-
ARG BASE_URL=https://apache.osuosl.org/maven/maven-3/${MAVEN_VERSION}/binaries
8+
ARG BASE_URL=https://archive.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries
99

1010
RUN mkdir -p /usr/share/maven /usr/share/maven/ref \
1111
&& curl -fsSL -o /tmp/apache-maven.tar.gz ${BASE_URL}/apache-maven-$MAVEN_VERSION-bin.tar.gz \

dockerfiles/TensorFlowDockerfile

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
ARG CODE_VERSION
2+
FROM clipper/py-rpc:${CODE_VERSION}
3+
4+
COPY clipper_admin/clipper_admin/python_container_conda_deps.txt /lib/
5+
RUN conda install -y --file /lib/python_container_conda_deps.txt
6+
7+
RUN conda install tensorflow
8+
9+
COPY containers/python/tf_container.py containers/python/tf_container_entry.sh /container/
10+
COPY clipper_admin/ /lib/clipper_admin
11+
RUN pip install /lib/clipper_admin
12+
13+
CMD ["/container/tf_container_entry.sh"]
14+
15+
# vim: set filetype=dockerfile:

0 commit comments

Comments
 (0)