@@ -136,7 +136,7 @@ After Kubeflow has been installed, we can now deploy the Llama 3 LLM to KServe.
136136
1371371. Create a config file for deploying the Llama 3 model on your cluster.
138138
139- ` ` ` file {title=" llama3 .yaml" lang=" yaml" }
139+ ` ` ` file {title=" model .yaml" lang=" yaml" }
140140 apiVersion: serving.kserve.io/v1beta1
141141 kind: InferenceService
142142 metadata:
@@ -171,7 +171,7 @@ After Kubeflow has been installed, we can now deploy the Llama 3 LLM to KServe.
1711711. Apply the configuration.
172172
173173 ` ` ` command
174- kubectl apply -f llama3 .yaml
174+ kubectl apply -f model .yaml
175175 ` ` `
176176
177177Once the configuration applies, Llama 3 will be running on your LKE cluster.
@@ -186,9 +186,9 @@ Milvus, the vector database designed for AI inference workloads, will be used as
186186 standalone:
187187 resources:
188188 requests:
189- nvidia.com/gpu: " 1"
189+ nvidia.com/gpu: " 1"
190190 limits:
191- nvidia.com/gpu: " 1"
191+ nvidia.com/gpu: " 1"
192192 ` ` `
193193
1941941. Add Milvus to Helm.
@@ -238,54 +238,54 @@ This tutorial employs a Python script to create the YAML file used within Kubefl
238238 from kfp import dsl
239239
240240 @dsl.component(
241- base_image=' nvcr.io/nvidia/ai-workbench/python-cuda117:1.0.3' ,
242- packages_to_install=[' pymilvus>=2.4.2' , ' llama-index' , ' llama-index-vector-stores-milvus' , ' llama-index-embeddings-huggingface' , ' llama-index-llms-openai-like' ]
243- )
241+ base_image=' nvcr.io/nvidia/ai-workbench/python-cuda117:1.0.3' ,
242+ packages_to_install=[' pymilvus>=2.4.2' , ' llama-index' , ' llama-index-vector-stores-milvus' , ' llama-index-embeddings-huggingface' , ' llama-index-llms-openai-like' ]
243+ )
244244 def doc_ingest_component(url: str, collection: str) -> None:
245- print(" >>> doc_ingest_component" )
245+ print(" >>> doc_ingest_component" )
246246
247- from urllib.request import urlopen
248- from io import BytesIO
249- from zipfile import ZipFile
247+ from urllib.request import urlopen
248+ from io import BytesIO
249+ from zipfile import ZipFile
250250
251- http_response = urlopen(url)
252- zipfile = ZipFile(BytesIO(http_response.read ()))
253- zipfile.extractall(path=' ./md_docs' )
251+ http_response = urlopen(url)
252+ zipfile = ZipFile(BytesIO(http_response.read ()))
253+ zipfile.extractall(path=' ./md_docs' )
254254
255- from llama_index.core import SimpleDirectoryReader
255+ from llama_index.core import SimpleDirectoryReader
256256
257- # load documents
258- documents = SimpleDirectoryReader(" ./md_docs/" , recursive=True, required_exts=[" .md" ]).load_data()
257+ # load documents
258+ documents = SimpleDirectoryReader(" ./md_docs/" , recursive=True, required_exts=[" .md" ]).load_data()
259259
260- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
261- from llama_index.core import Settings
260+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
261+ from llama_index.core import Settings
262262
263- Settings.embed_model = HuggingFaceEmbedding(
264- model_name=" BAAI/bge-large-en-v1.5"
265- )
263+ Settings.embed_model = HuggingFaceEmbedding(
264+ model_name=" BAAI/bge-large-en-v1.5"
265+ )
266266
267- from llama_index.llms.openai_like import OpenAILike
267+ from llama_index.llms.openai_like import OpenAILike
268268
269- llm = OpenAILike(
270- model=" llama3" ,
271- api_base=" http://huggingface-llama3-predictor-00001.default.svc.cluster.local/openai/v1" ,
272- api_key = " EMPTY" ,
273- max_tokens = 512)
269+ llm = OpenAILike(
270+ model=" llama3" ,
271+ api_base=" http://huggingface-llama3-predictor-00001.default.svc.cluster.local/openai/v1" ,
272+ api_key = " EMPTY" ,
273+ max_tokens = 512)
274274
275- Settings.llm = llm
275+ Settings.llm = llm
276276
277- from llama_index.core import VectorStoreIndex, StorageContext
278- from llama_index.vector_stores.milvus import MilvusVectorStore
277+ from llama_index.core import VectorStoreIndex, StorageContext
278+ from llama_index.vector_stores.milvus import MilvusVectorStore
279279
280- vector_store = MilvusVectorStore(uri=" http://my-release-milvus.default.svc.cluster.local:19530" , collection=collection, dim=1024, overwrite=True)
281- storage_context = StorageContext.from_defaults(vector_store=vector_store)
282- index = VectorStoreIndex.from_documents(
283- documents, storage_context=storage_context
284- )
280+ vector_store = MilvusVectorStore(uri=" http://my-release-milvus.default.svc.cluster.local:19530" , collection=collection, dim=1024, overwrite=True)
281+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
282+ index = VectorStoreIndex.from_documents(
283+ documents, storage_context=storage_context
284+ )
285285
286286 @dsl.pipeline
287287 def doc_ingest_pipeline(url: str, collection: str) -> None:
288- comp = doc_ingest_component(url=url, collection=collection)
288+ comp = doc_ingest_component(url=url, collection=collection)
289289
290290 from kfp import compiler
291291
@@ -430,7 +430,8 @@ After the pipeline files have been created, we can deploy the chatbot and config
430430
4314311. Use the following YAML configuration file to deploy the pipelines and open-webui applications.
432432
433- ` ` ` file {title=" open-webui.yaml" lang=" yaml" }
433+ ` ` ` file {title=" webui-pipelines.yaml" lang=" yaml" }
434+ ---
434435 apiVersion: apps/v1
435436 kind: Deployment
436437 metadata:
@@ -439,38 +440,38 @@ After the pipeline files have been created, we can deploy the chatbot and config
439440 spec:
440441 replicas: 1
441442 selector:
442- matchLabels:
443- app: pipelines-webui
444- template:
445- metadata:
446- labels:
443+ matchLabels:
447444 app: pipelines-webui
448- spec:
449- containers:
450- - name: pipelines-webui
451- image: ghcr.io/open-webui/pipelines:main
452- ports:
453- - containerPort: 9099
454- resources:
455- requests:
456- cpu: " 500m"
457- memory: " 500Mi"
458- limits:
459- cpu: " 1000m"
460- memory: " 1Gi"
461- env:
462- - name: PIPELINES_REQUIREMENTS_PATH
463- value: " /opt/pipeline-requirements.txt"
464- - name: PIPELINES_URLS
465- value: " file:///opt/rag_pipeline.py"
466- tty: true
467- volumeMounts:
445+ template:
446+ metadata:
447+ labels:
448+ app: pipelines-webui
449+ spec:
450+ containers:
451+ - name: pipelines-webui
452+ image: ghcr.io/open-webui/pipelines:main
453+ ports:
454+ - containerPort: 9099
455+ resources:
456+ requests:
457+ cpu: " 500m"
458+ memory: " 500Mi"
459+ limits:
460+ cpu: " 1000m"
461+ memory: " 1Gi"
462+ env:
463+ - name: PIPELINES_REQUIREMENTS_PATH
464+ value: " /opt/pipeline-requirements.txt"
465+ - name: PIPELINES_URLS
466+ value: " file:///opt/rag_pipeline.py"
467+ tty: true
468+ volumeMounts:
469+ - name: config-volume
470+ mountPath: /opt
471+ volumes:
468472 - name: config-volume
469- mountPath: /opt
470- volumes:
471- - name: config-volume
472- configMap:
473- name: pipelines-files
473+ configMap:
474+ name: pipelines-files
474475 ---
475476 apiVersion: v1
476477 kind: Service
@@ -480,24 +481,24 @@ After the pipeline files have been created, we can deploy the chatbot and config
480481 spec:
481482 type: ClusterIP
482483 selector:
483- app: pipelines-webui
484+ app: pipelines-webui
484485 ports:
485- - protocol: TCP
486- port: 9099
487- targetPort: 9099
486+ - protocol: TCP
487+ port: 9099
488+ targetPort: 9099
488489 ---
489490 apiVersion: v1
490491 kind: PersistentVolumeClaim
491492 metadata:
492493 labels:
493- app: open-webui
494+ app: open-webui
494495 name: open-webui-pvc
495496 namespace: open-webui
496497 spec:
497498 accessModes: [" ReadWriteOnce" ]
498499 resources:
499- requests:
500- storage: 2Gi
500+ requests:
501+ storage: 2Gi
501502 ---
502503 apiVersion: apps/v1
503504 kind: Deployment
@@ -507,40 +508,40 @@ After the pipeline files have been created, we can deploy the chatbot and config
507508 spec:
508509 replicas: 1
509510 selector:
510- matchLabels:
511- app: open-webui
512- template:
513- metadata:
514- labels:
511+ matchLabels:
515512 app: open-webui
516- spec:
517- containers:
518- - name: open-webui
519- image: ghcr.io/open-webui/open-webui:main
520- ports:
521- - containerPort: 8080
522- resources:
523- requests:
524- cpu: " 500m"
525- memory: " 500Mi"
526- limits:
527- cpu: " 1000m"
528- memory: " 1Gi"
529- env:
530- - name: ENABLE_OLLAMA_API
531- value: " False"
532- - name: OPENAI_API_BASE_URLS
533- value: " http://huggingface-llama3-predictor-00001.default.svc.cluster.local/openai/v1;http://pipelines-service.open-webui.svc.cluster.local:9099"
534- - name: OPENAI_API_KEYS
535- value: " EMPTY;0p3n-w3bu!"
536- tty: true
537- volumeMounts:
513+ template:
514+ metadata:
515+ labels:
516+ app: open-webui
517+ spec:
518+ containers:
519+ - name: open-webui
520+ image: ghcr.io/open-webui/open-webui:main
521+ ports:
522+ - containerPort: 8080
523+ resources:
524+ requests:
525+ cpu: " 500m"
526+ memory: " 500Mi"
527+ limits:
528+ cpu: " 1000m"
529+ memory: " 1Gi"
530+ env:
531+ - name: ENABLE_OLLAMA_API
532+ value: " False"
533+ - name: OPENAI_API_BASE_URLS
534+ value: " http://huggingface-llama3-predictor-00001.default.svc.cluster.local/openai/v1;http://pipelines-service.open-webui.svc.cluster.local:9099"
535+ - name: OPENAI_API_KEYS
536+ value: " EMPTY;0p3n-w3bu!"
537+ tty: true
538+ volumeMounts:
539+ - name: webui-volume
540+ mountPath: /app/backend/data
541+ volumes:
538542 - name: webui-volume
539- mountPath: /app/backend/data
540- volumes:
541- - name: webui-volume
542- persistentVolumeClaim:
543- claimName: open-webui-pvc
543+ persistentVolumeClaim:
544+ claimName: open-webui-pvc
544545 ---
545546 apiVersion: v1
546547 kind: Service
@@ -550,18 +551,17 @@ After the pipeline files have been created, we can deploy the chatbot and config
550551 spec:
551552 type: ClusterIP
552553 selector:
553- app: open-webui
554+ app: open-webui
554555 ports:
555- - protocol: TCP
556- port: 8080
557- targetPort: 8080
558- ---
556+ - protocol: TCP
557+ port: 8080
558+ targetPort: 8080
559559 ` ` `
560560
5615611. Apply the configuration.
562562
563563 ` ` ` command
564- kubectl apply -f open- webui.yaml
564+ kubectl apply -f webui-pipelines .yaml
565565 ` ` `
566566
567567# ## Access and test the chatbot application
0 commit comments