Skip to content

Commit f3746dc

Browse files
Async support for some microservices (#763)
* Async support for some microservices Signed-off-by: lvliang-intel <liang1.lv@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix issues Signed-off-by: lvliang-intel <liang1.lv@intel.com> * fix issues Signed-off-by: lvliang-intel <liang1.lv@intel.com> * fix import issue Signed-off-by: lvliang-intel <liang1.lv@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add dependency library Signed-off-by: lvliang-intel <liang1.lv@intel.com> * fix issue Signed-off-by: lvliang-intel <liang1.lv@intel.com> * roll back pinecone change Signed-off-by: lvliang-intel <liang1.lv@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: lvliang-intel <liang1.lv@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent ceba539 commit f3746dc

File tree

19 files changed

+64
-61
lines changed

19 files changed

+64
-61
lines changed

comps/embeddings/mosec/langchain/embedding_mosec.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ def empty_embedding() -> List[float]:
5757
output_datatype=EmbedDoc,
5858
)
5959
@register_statistics(names=["opea_service@embedding_mosec"])
60-
def embedding(input: TextDoc) -> EmbedDoc:
60+
async def embedding(input: TextDoc) -> EmbedDoc:
6161
if logflag:
6262
logger.info(input)
6363
start = time.time()
64-
embed_vector = embeddings.embed_query(input.text)
64+
embed_vector = await embeddings.aembed_query(input.text)
6565
res = EmbedDoc(text=input.text, embedding=embed_vector)
6666
statistics_dict["opea_service@embedding_mosec"].append_latency(time.time() - start, None)
6767
if logflag:

comps/intent_detection/langchain/intent_detection.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
host="0.0.0.0",
1717
port=9000,
1818
)
19-
def llm_generate(input: LLMParamsDoc):
19+
async def llm_generate(input: LLMParamsDoc):
2020
llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
2121
llm = HuggingFaceEndpoint(
2222
endpoint_url=llm_endpoint,
@@ -35,7 +35,7 @@ def llm_generate(input: LLMParamsDoc):
3535

3636
llm_chain = LLMChain(prompt=prompt, llm=llm)
3737

38-
response = llm_chain.invoke(input.query)
38+
response = await llm_chain.ainvoke(input.query)
3939
response = response["text"]
4040
print("response", response)
4141
return GeneratedDoc(text=response, prompt=input.query)

comps/llms/faq-generation/tgi/langchain/llm.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,9 @@ def post_process_text(text: str):
3434
host="0.0.0.0",
3535
port=9000,
3636
)
37-
def llm_generate(input: LLMParamsDoc):
37+
async def llm_generate(input: LLMParamsDoc):
3838
if logflag:
3939
logger.info(input)
40-
llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
4140
llm = HuggingFaceEndpoint(
4241
endpoint_url=llm_endpoint,
4342
max_new_tokens=input.max_tokens,
@@ -54,9 +53,6 @@ def llm_generate(input: LLMParamsDoc):
5453
"""
5554
PROMPT = PromptTemplate.from_template(templ)
5655
llm_chain = load_summarize_chain(llm=llm, prompt=PROMPT)
57-
58-
# Split text
59-
text_splitter = CharacterTextSplitter()
6056
texts = text_splitter.split_text(input.query)
6157

6258
# Create multiple documents
@@ -77,12 +73,15 @@ async def stream_generator():
7773

7874
return StreamingResponse(stream_generator(), media_type="text/event-stream")
7975
else:
80-
response = llm_chain.invoke(docs)
76+
response = await llm_chain.ainvoke(docs)
8177
response = response["output_text"]
8278
if logflag:
8379
logger.info(response)
8480
return GeneratedDoc(text=response, prompt=input.query)
8581

8682

8783
if __name__ == "__main__":
84+
llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
85+
# Split text
86+
text_splitter = CharacterTextSplitter()
8887
opea_microservices["opea_service@llm_faqgen"].start()

comps/llms/summarization/tgi/langchain/llm.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ def post_process_text(text: str):
3333
host="0.0.0.0",
3434
port=9000,
3535
)
36-
def llm_generate(input: LLMParamsDoc):
36+
async def llm_generate(input: LLMParamsDoc):
3737
if logflag:
3838
logger.info(input)
39-
llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
39+
4040
llm = HuggingFaceEndpoint(
4141
endpoint_url=llm_endpoint,
4242
max_new_tokens=input.max_tokens,
@@ -48,9 +48,6 @@ def llm_generate(input: LLMParamsDoc):
4848
streaming=input.streaming,
4949
)
5050
llm_chain = load_summarize_chain(llm=llm, chain_type="map_reduce")
51-
52-
# Split text
53-
text_splitter = CharacterTextSplitter()
5451
texts = text_splitter.split_text(input.query)
5552

5653
# Create multiple documents
@@ -71,12 +68,15 @@ async def stream_generator():
7168

7269
return StreamingResponse(stream_generator(), media_type="text/event-stream")
7370
else:
74-
response = llm_chain.invoke(docs)
71+
response = await llm_chain.ainvoke(docs)
7572
response = response["output_text"]
7673
if logflag:
7774
logger.info(response)
7875
return GeneratedDoc(text=response, prompt=input.query)
7976

8077

8178
if __name__ == "__main__":
79+
llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
80+
# Split text
81+
text_splitter = CharacterTextSplitter()
8282
opea_microservices["opea_service@llm_docsum"].start()

comps/llms/text-generation/ollama/langchain/llm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
host="0.0.0.0",
2020
port=9000,
2121
)
22-
def llm_generate(input: LLMParamsDoc):
22+
async def llm_generate(input: LLMParamsDoc):
2323
if logflag:
2424
logger.info(input)
2525
ollama = Ollama(
@@ -48,7 +48,7 @@ async def stream_generator():
4848

4949
return StreamingResponse(stream_generator(), media_type="text/event-stream")
5050
else:
51-
response = ollama.invoke(input.query)
51+
response = await ollama.ainvoke(input.query)
5252
if logflag:
5353
logger.info(response)
5454
return GeneratedDoc(text=response, prompt=input.query)

comps/llms/text-generation/ray_serve/llm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def post_process_text(text: str):
3838
host="0.0.0.0",
3939
port=9000,
4040
)
41-
def llm_generate(input: LLMParamsDoc):
41+
async def llm_generate(input: LLMParamsDoc):
4242
llm_endpoint = os.getenv("RAY_Serve_ENDPOINT", "http://localhost:8080")
4343
llm_model = os.getenv("LLM_MODEL", "Llama-2-7b-chat-hf")
4444
if "/" in llm_model:
@@ -73,7 +73,7 @@ async def stream_generator():
7373

7474
return StreamingResponse(stream_generator(), media_type="text/event-stream")
7575
else:
76-
response = llm.invoke(input.query)
76+
response = await llm.ainvoke(input.query)
7777
response = response.content
7878
return GeneratedDoc(text=response, prompt=input.query)
7979

comps/llms/text-generation/vllm/langchain/llm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def post_process_text(text: str):
4848
host="0.0.0.0",
4949
port=9000,
5050
)
51-
def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]):
51+
async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]):
5252
if logflag:
5353
logger.info(input)
5454

@@ -102,7 +102,7 @@ async def stream_generator():
102102
return StreamingResponse(stream_generator(), media_type="text/event-stream")
103103

104104
else:
105-
response = llm.invoke(new_input.query, **parameters)
105+
response = await llm.ainvoke(new_input.query, **parameters)
106106
if logflag:
107107
logger.info(response)
108108

@@ -153,7 +153,7 @@ async def stream_generator():
153153
return StreamingResponse(stream_generator(), media_type="text/event-stream")
154154

155155
else:
156-
response = llm.invoke(prompt, **parameters)
156+
response = await llm.ainvoke(prompt, **parameters)
157157
if logflag:
158158
logger.info(response)
159159

comps/llms/text-generation/vllm/llama_index/llm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def post_process_text(text: str):
3939
host="0.0.0.0",
4040
port=9000,
4141
)
42-
def llm_generate(input: LLMParamsDoc):
42+
async def llm_generate(input: LLMParamsDoc):
4343
if logflag:
4444
logger.info(input)
4545
llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008")
@@ -56,8 +56,8 @@ def llm_generate(input: LLMParamsDoc):
5656

5757
if input.streaming:
5858

59-
def stream_generator():
60-
for text in llm.stream_complete(input.query):
59+
async def stream_generator():
60+
async for text in llm.astream_complete(input.query):
6161
output = text.text
6262
yield f"data: {output}\n\n"
6363
if logflag:
@@ -66,7 +66,7 @@ def stream_generator():
6666

6767
return StreamingResponse(stream_generator(), media_type="text/event-stream")
6868
else:
69-
response = llm.complete(input.query).text
69+
response = await llm.acomplete(input.query).text
7070
if logflag:
7171
logger.info(response)
7272
return GeneratedDoc(text=response, prompt=input.query)

comps/llms/text-generation/vllm/ray/llm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
host="0.0.0.0",
3131
port=9000,
3232
)
33-
def llm_generate(input: LLMParamsDoc):
33+
async def llm_generate(input: LLMParamsDoc):
3434
if logflag:
3535
logger.info(input)
3636
llm_endpoint = os.getenv("vLLM_RAY_ENDPOINT", "http://localhost:8006")
@@ -50,9 +50,9 @@ def llm_generate(input: LLMParamsDoc):
5050

5151
if input.streaming:
5252

53-
def stream_generator():
53+
async def stream_generator():
5454
chat_response = ""
55-
for text in llm.stream(input.query):
55+
for text in llm.astream(input.query):
5656
text = text.content
5757
chat_response += text
5858
chunk_repr = repr(text.encode("utf-8"))
@@ -63,7 +63,7 @@ def stream_generator():
6363

6464
return StreamingResponse(stream_generator(), media_type="text/event-stream")
6565
else:
66-
response = llm.invoke(input.query)
66+
response = await llm.ainvoke(input.query)
6767
response = response.content
6868
if logflag:
6969
logger.info(response)

comps/lvms/predictionguard/lvm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class LVMDoc(BaseDoc):
2828
output_datatype=TextDoc,
2929
)
3030
@register_statistics(names=["opea_service@lvm_predictionguard"])
31-
async def lvm(request: LVMDoc) -> TextDoc:
31+
def lvm(request: LVMDoc) -> TextDoc:
3232
start = time.time()
3333

3434
# make a request to the Prediction Guard API using the LlaVa model

0 commit comments

Comments
 (0)