Reranking using an optimized bi-encoder (#219)

gadmarkovits · web-flow · commit 5748471ca6b0 · 2024-06-24T17:09:26.000+08:00
Signed-off-by: gadmarkovits &lt;gad.markovits@intel.com&gt;
diff --git a/comps/__init__.py b/comps/__init__.py
@@ -13,6 +13,7 @@
     GeneratedDoc,
     LLMParamsDoc,
     SearchedDoc,
+    RerankedDoc,
     TextDoc,
     RAGASParams,
     RAGASScores,
diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py
@@ -69,6 +69,11 @@ class GeneratedDoc(BaseDoc):
     prompt: str
 
 
+class RerankedDoc(BaseDoc):
+    reranked_docs: DocList[TextDoc]
+    initial_query: str
+
+
 class LLMParamsDoc(BaseDoc):
     query: str
     max_new_tokens: int = 1024
diff --git a/comps/reranks/fastrag/README.md b/comps/reranks/fastrag/README.md
@@ -0,0 +1,69 @@
+# Reranking Microservice
+
+The Reranking Microservice, fueled by reranking models, stands as a straightforward yet immensely potent tool for semantic search. When provided with a query and a collection of documents, reranking swiftly indexes the documents based on their semantic relevance to the query, arranging them from most to least pertinent. This microservice significantly enhances overall accuracy. In a text retrieval system, either a dense embedding model or a sparse lexical search index is often employed to retrieve relevant text documents based on the input. However, a reranking model can further refine this process by rearranging potential candidates into a final, optimized order.
+
+# 🚀1. Start Microservice with Python (Option 1)
+
+To start the Reranking microservice, you must first install the required python packages.
+
+## 1.1 Install Requirements
+
+```bash
+pip install -r requirements.txt
+```
+
+## 1.2 Install fastRAG
+
+```bash
+git clone https://github.com/IntelLabs/fastRAG.git
+cd fastRag
+pip install .
+pip install .[intel]
+```
+
+## 1.3 Start Reranking Service with Python Script
+
+```bash
+export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static"
+python local_reranking.py
+```
+
+# 🚀2. Start Microservice with Docker (Option 2)
+
+## 2.1 Setup Environment Variables
+
+```bash
+export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static"
+```
+
+## 2.2 Build Docker Image
+
+```bash
+cd ../../
+docker build -t opea/reranking-fastrag:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/fastrag/docker/Dockerfile .
+```
+
+## 2.3 Run Docker
+
+```bash
+docker run -d --name="reranking-fastrag-server" -p 8000:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e EMBED_MODEL=$EMBED_MODEL opea/reranking-fastrag:latest
+```
+
+# 🚀3. Consume Reranking Service
+
+## 3.1 Check Service Status
+
+```bash
+curl http://localhost:8000/v1/health_check \
+  -X GET \
+  -H 'Content-Type: application/json'
+```
+
+## 3.2 Consume Reranking Service
+
+```bash
+curl http://localhost:8000/v1/reranking \
+  -X POST \
+  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+  -H 'Content-Type: application/json'
+```
diff --git a/comps/reranks/fastrag/__init__.py b/comps/reranks/fastrag/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/reranks/fastrag/config.py b/comps/reranks/fastrag/config.py
@@ -0,0 +1,7 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+# Re-ranking model
+RANKER_MODEL = os.getenv("EMBED_MODEL", "Intel/bge-small-en-v1.5-rag-int8-static")
diff --git a/comps/reranks/fastrag/docker/Dockerfile b/comps/reranks/fastrag/docker/Dockerfile
@@ -0,0 +1,35 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.10-slim
+
+ENV LANG C.UTF-8
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN git clone https://github.com/IntelLabs/fastRAG.git /home/user/fastRAG && \
+    cd /home/user/fastRAG && \
+    pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/reranks/fastrag/requirements.txt && \
+    pip install . && \
+    pip install .[intel]
+
+ENV PYTHONPATH=$PYTHONPH:/home/user
+
+WORKDIR /home/user/comps/reranks/fastrag
+
+ENTRYPOINT ["python", "local_reranking.py"]
+
diff --git a/comps/reranks/fastrag/local_reranking.py b/comps/reranks/fastrag/local_reranking.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from config import RANKER_MODEL
+from fastrag.rankers import IPEXBiEncoderSimilarityRanker
+from haystack import Document
+from langsmith import traceable
+
+from comps.cores.mega.micro_service import ServiceType, opea_microservices, register_microservice
+from comps.cores.proto.docarray import RerankedDoc, SearchedDoc, TextDoc
+
+
+@register_microservice(
+    name="opea_service@local_reranking",
+    service_type=ServiceType.RERANK,
+    endpoint="/v1/reranking",
+    host="0.0.0.0",
+    port=8000,
+    input_datatype=SearchedDoc,
+    output_datatype=RerankedDoc,
+)
+@traceable(run_type="llm")
+def reranking(input: SearchedDoc) -> RerankedDoc:
+    documents = []
+    for i, d in enumerate(input.retrieved_docs):
+        documents.append(Document(content=d.text, id=(i + 1)))
+    sorted_documents = reranker_model.run(input.initial_query, documents)["documents"]
+    ranked_documents = [TextDoc(id=doc.id, text=doc.content) for doc in sorted_documents]
+    res = RerankedDoc(initial_query=input.initial_query, reranked_docs=ranked_documents)
+    return res
+
+
+if __name__ == "__main__":
+    # Use an optimized quantized bi-encoder model for re-reranking
+    reranker_model = IPEXBiEncoderSimilarityRanker(RANKER_MODEL)
+    reranker_model.warm_up()
+
+    opea_microservices["opea_service@local_reranking"].start()
diff --git a/comps/reranks/fastrag/requirements.txt b/comps/reranks/fastrag/requirements.txt
@@ -0,0 +1,10 @@
+docarray[full]
+fastapi
+haystack-ai
+langchain
+langsmith
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+sentence_transformers
+shortuuid
diff --git a/comps/reranks/langchain/local_reranking.py b/comps/reranks/langchain/local_reranking.py
@@ -4,7 +4,7 @@
 from langsmith import traceable
 from sentence_transformers import CrossEncoder
 
-from comps import RerankedDoc, SearchedDoc, ServiceType, opea_microservices, register_microservice
+from comps import RerankedDoc, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice
 
 
 @register_microservice(
@@ -21,7 +21,7 @@ def reranking(input: SearchedDoc) -> RerankedDoc:
     query_and_docs = [(input.initial_query, doc.text) for doc in input.retrieved_docs]
     scores = reranker_model.predict(query_and_docs)
     first_passage = sorted(list(zip(input.retrieved_docs, scores)), key=lambda x: x[1], reverse=True)[0][0]
-    res = RerankedDoc(query=input.query, doc=first_passage)
+    res = RerankedDoc(initial_query=input.initial_query, reranked_docs=[first_passage])
     return res
 
 
diff --git a/tests/test_reranks_fastrag.sh b/tests/test_reranks_fastrag.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -xe
+
+WORKPATH=$(dirname "$PWD")
+ip_address=$(hostname -I | awk '{print $1}')
+function build_docker_images() {
+    cd $WORKPATH
+    docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/reranking-fastrag:comps -f comps/reranks/fastrag/docker/Dockerfile .
+}
+
+function start_service() {
+    export EMBED_MODEL="Intel/bge-small-en-v1.5-rag-int8-static"
+    fastrag_service_port=8000
+    unset http_proxy
+    docker run -d --name="test-comps-reranking-fastrag-server" -p ${fastrag_service_port}:8000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e EMBED_MODEL=$EMBED_MODEL opea/reranking-fastrag:comps
+    sleep 3m
+}
+
+function validate_microservice() {
+    fastrag_service_port=8000
+    http_proxy="" curl http://${ip_address}:${fastrag_service_port}/v1/reranking\
+        -X POST \
+        -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+        -H 'Content-Type: application/json'
+    docker logs test-comps-reranking-fastrag-server
+}
+
+function stop_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-rerank*")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+}
+
+function main() {
+
+    stop_docker
+
+    build_docker_images
+    start_service
+
+    validate_microservice
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# Copyright (C) 2024 Intel Corporation`
	`2`	`+# SPDX-License-Identifier: Apache-2.0`