|
| 1 | +# Edge Craft Retrieval-Augmented Generation |
| 2 | + |
| 3 | +Edge Craft RAG (EC-RAG) is a customizable, tunable and production-ready |
| 4 | +Retrieval-Augmented Generation system for edge solutions. It is designed to |
| 5 | +curate the RAG pipeline to meet hardware requirements at edge with guaranteed |
| 6 | +quality and performance. |
| 7 | + |
| 8 | +## Quick Start Guide |
| 9 | + |
| 10 | +### Run Containers with Docker Compose |
| 11 | + |
| 12 | +```bash |
| 13 | +cd GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc |
| 14 | + |
| 15 | +export MODEL_PATH="your model path for all your models" |
| 16 | +export DOC_PATH="your doc path for uploading a dir of files" |
| 17 | +export HOST_IP="your host ip" |
| 18 | +export UI_SERVICE_PORT="port for UI service" |
| 19 | + |
| 20 | +# Optional for vllm endpoint |
| 21 | +export vLLM_ENDPOINT="http://${HOST_IP}:8008" |
| 22 | + |
| 23 | +# If you have a proxy configured, uncomment below line |
| 24 | +# export no_proxy=$no_proxy,${HOST_IP},edgecraftrag,edgecraftrag-server |
| 25 | +# If you have a HF mirror configured, it will be imported to the container |
| 26 | +# export HF_ENDPOINT="your HF mirror endpoint" |
| 27 | + |
| 28 | +# By default, the ports of the containers are set, uncomment if you want to change |
| 29 | +# export MEGA_SERVICE_PORT=16011 |
| 30 | +# export PIPELINE_SERVICE_PORT=16011 |
| 31 | + |
| 32 | +docker compose up -d |
| 33 | +``` |
| 34 | + |
| 35 | +### (Optional) Build Docker Images for Mega Service, Server and UI by your own |
| 36 | + |
| 37 | +```bash |
| 38 | +cd GenAIExamples/EdgeCraftRAG |
| 39 | + |
| 40 | +docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag:latest -f Dockerfile . |
| 41 | +docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-server:latest -f Dockerfile.server . |
| 42 | +docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui . |
| 43 | +``` |
| 44 | + |
| 45 | +### ChatQnA with LLM Example (Command Line) |
| 46 | + |
| 47 | +```bash |
| 48 | +cd GenAIExamples/EdgeCraftRAG |
| 49 | + |
| 50 | +# Activate pipeline test_pipeline_local_llm |
| 51 | +curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.' |
| 52 | + |
| 53 | +# Will need to wait for several minutes |
| 54 | +# Expected output: |
| 55 | +# { |
| 56 | +# "idx": "3214cf25-8dff-46e6-b7d1-1811f237cf8c", |
| 57 | +# "name": "rag_test", |
| 58 | +# "comp_type": "pipeline", |
| 59 | +# "node_parser": { |
| 60 | +# "idx": "ababed12-c192-4cbb-b27e-e49c76a751ca", |
| 61 | +# "parser_type": "simple", |
| 62 | +# "chunk_size": 400, |
| 63 | +# "chunk_overlap": 48 |
| 64 | +# }, |
| 65 | +# "indexer": { |
| 66 | +# "idx": "46969b63-8a32-4142-874d-d5c86ee9e228", |
| 67 | +# "indexer_type": "faiss_vector", |
| 68 | +# "model": { |
| 69 | +# "idx": "7aae57c0-13a4-4a15-aecb-46c2ec8fe738", |
| 70 | +# "type": "embedding", |
| 71 | +# "model_id": "BAAI/bge-small-en-v1.5", |
| 72 | +# "model_path": "/home/user/models/bge_ov_embedding", |
| 73 | +# "device": "auto" |
| 74 | +# } |
| 75 | +# }, |
| 76 | +# "retriever": { |
| 77 | +# "idx": "3747fa59-ff9b-49b6-a8e8-03cdf8c979a4", |
| 78 | +# "retriever_type": "vectorsimilarity", |
| 79 | +# "retrieve_topk": 30 |
| 80 | +# }, |
| 81 | +# "postprocessor": [ |
| 82 | +# { |
| 83 | +# "idx": "d46a6cae-ba7a-412e-85b7-d334f175efaa", |
| 84 | +# "postprocessor_type": "reranker", |
| 85 | +# "model": { |
| 86 | +# "idx": "374e7471-bd7d-41d0-b69d-a749a052b4b0", |
| 87 | +# "type": "reranker", |
| 88 | +# "model_id": "BAAI/bge-reranker-large", |
| 89 | +# "model_path": "/home/user/models/bge_ov_reranker", |
| 90 | +# "device": "auto" |
| 91 | +# }, |
| 92 | +# "top_n": 2 |
| 93 | +# } |
| 94 | +# ], |
| 95 | +# "generator": { |
| 96 | +# "idx": "52d8f112-6290-4dd3-bc28-f9bd5deeb7c8", |
| 97 | +# "generator_type": "local", |
| 98 | +# "model": { |
| 99 | +# "idx": "fa0c11e1-46d1-4df8-a6d8-48cf6b99eff3", |
| 100 | +# "type": "llm", |
| 101 | +# "model_id": "qwen2-7b-instruct", |
| 102 | +# "model_path": "/home/user/models/qwen2-7b-instruct/INT4_compressed_weights", |
| 103 | +# "device": "auto" |
| 104 | +# } |
| 105 | +# }, |
| 106 | +# "status": { |
| 107 | +# "active": true |
| 108 | +# } |
| 109 | +# } |
| 110 | + |
| 111 | +# Prepare data from local directory |
| 112 | +curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"#REPLACE WITH YOUR LOCAL DOC DIR#"}' | jq '.' |
| 113 | + |
| 114 | +# Validate Mega Service |
| 115 | +curl -X POST http://${HOST_IP}:16011/v1/chatqna -H "Content-Type: application/json" -d '{"messages":"#REPLACE WITH YOUR QUESTION HERE#", "top_n":5, "max_tokens":512}' | jq '.' |
| 116 | +``` |
| 117 | + |
| 118 | +### ChatQnA with LLM Example (UI) |
| 119 | + |
| 120 | +Open your browser, access http://${HOST_IP}:8082 |
| 121 | + |
| 122 | +> Your browser should be running on the same host of your console, otherwise you will need to access UI with your host domain name instead of ${HOST_IP}. |
| 123 | +
|
| 124 | +### (Optional) Launch vLLM with OpenVINO service |
| 125 | + |
| 126 | +```bash |
| 127 | +# 1. export LLM_MODEL |
| 128 | +export LLM_MODEL="your model id" |
| 129 | +# 2. Uncomment below code in 'GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml' |
| 130 | + # vllm-service: |
| 131 | + # image: vllm:openvino |
| 132 | + # container_name: vllm-openvino-server |
| 133 | + # depends_on: |
| 134 | + # - vllm-service |
| 135 | + # ports: |
| 136 | + # - "8008:80" |
| 137 | + # environment: |
| 138 | + # no_proxy: ${no_proxy} |
| 139 | + # http_proxy: ${http_proxy} |
| 140 | + # https_proxy: ${https_proxy} |
| 141 | + # vLLM_ENDPOINT: ${vLLM_ENDPOINT} |
| 142 | + # LLM_MODEL: ${LLM_MODEL} |
| 143 | + # entrypoint: /bin/bash -c "\ |
| 144 | + # cd / && \ |
| 145 | + # export VLLM_CPU_KVCACHE_SPACE=50 && \ |
| 146 | + # python3 -m vllm.entrypoints.openai.api_server \ |
| 147 | + # --model '${LLM_MODEL}' \ |
| 148 | + # --host 0.0.0.0 \ |
| 149 | + # --port 80" |
| 150 | +``` |
| 151 | + |
| 152 | +## Advanced User Guide |
| 153 | + |
| 154 | +### Pipeline Management |
| 155 | + |
| 156 | +#### Create a pipeline |
| 157 | + |
| 158 | +```bash |
| 159 | +curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline.json | jq '.' |
| 160 | +``` |
| 161 | + |
| 162 | +It will take some time to prepare the embedding model. |
| 163 | + |
| 164 | +#### Upload a text |
| 165 | + |
| 166 | +```bash |
| 167 | +curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.' |
| 168 | +``` |
| 169 | + |
| 170 | +#### Provide a query to retrieve context with similarity search. |
| 171 | + |
| 172 | +```bash |
| 173 | +curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d @examples/test_query.json | jq '.' |
| 174 | +``` |
| 175 | + |
| 176 | +#### Create the second pipeline test2 |
| 177 | + |
| 178 | +```bash |
| 179 | +curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline2.json | jq '.' |
| 180 | +``` |
| 181 | + |
| 182 | +#### Check all pipelines |
| 183 | + |
| 184 | +```bash |
| 185 | +curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" | jq '.' |
| 186 | +``` |
| 187 | + |
| 188 | +#### Compare similarity retrieval (test1) and keyword retrieval (test2) |
| 189 | + |
| 190 | +```bash |
| 191 | +# Activate pipeline test1 |
| 192 | +curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test1 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.' |
| 193 | +# Similarity retrieval |
| 194 | +curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.' |
| 195 | + |
| 196 | +# Activate pipeline test2 |
| 197 | +curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test2 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.' |
| 198 | +# Keyword retrieval |
| 199 | +curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.' |
| 200 | + |
| 201 | +``` |
| 202 | + |
| 203 | +### Model Management |
| 204 | + |
| 205 | +#### Load a model |
| 206 | + |
| 207 | +```bash |
| 208 | +curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d @examples/test_model_load.json | jq '.' |
| 209 | +``` |
| 210 | + |
| 211 | +It will take some time to load the model. |
| 212 | + |
| 213 | +#### Check all models |
| 214 | + |
| 215 | +```bash |
| 216 | +curl -X GET http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" | jq '.' |
| 217 | +``` |
| 218 | + |
| 219 | +#### Update a model |
| 220 | + |
| 221 | +```bash |
| 222 | +curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d @examples/test_model_update.json | jq '.' |
| 223 | +``` |
| 224 | + |
| 225 | +#### Check a certain model |
| 226 | + |
| 227 | +```bash |
| 228 | +curl -X GET http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" | jq '.' |
| 229 | +``` |
| 230 | + |
| 231 | +#### Delete a model |
| 232 | + |
| 233 | +```bash |
| 234 | +curl -X DELETE http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" | jq '.' |
| 235 | +``` |
| 236 | + |
| 237 | +### File Management |
| 238 | + |
| 239 | +#### Add a text |
| 240 | + |
| 241 | +```bash |
| 242 | +curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.' |
| 243 | +``` |
| 244 | + |
| 245 | +#### Add files from existed file path |
| 246 | + |
| 247 | +```bash |
| 248 | +curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_dir.json | jq '.' |
| 249 | +curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.' |
| 250 | +``` |
| 251 | + |
| 252 | +#### Check all files |
| 253 | + |
| 254 | +```bash |
| 255 | +curl -X GET http://${HOST_IP}:16010/v1/data/files -H "Content-Type: application/json" | jq '.' |
| 256 | +``` |
| 257 | + |
| 258 | +#### Check one file |
| 259 | + |
| 260 | +```bash |
| 261 | +curl -X GET http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type: application/json" | jq '.' |
| 262 | +``` |
| 263 | + |
| 264 | +#### Delete a file |
| 265 | + |
| 266 | +```bash |
| 267 | +curl -X DELETE http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type: application/json" | jq '.' |
| 268 | +``` |
| 269 | + |
| 270 | +#### Update a file |
| 271 | + |
| 272 | +```bash |
| 273 | +curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.' |
| 274 | +``` |
0 commit comments