LLM-Local-Deployment/example_LLM_API_call.py at main · Vision-and-Multimodal-Intelligence-Lab/LLM-Local-Deployment · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
HOST = ""
PORT = 1
API_KEY = ""

import openai

class local_llm_api:
    def __init__(self, host_ip, port_num, api_key_string):
        self.client = openai.Client(base_url=f"http://{host_ip}:{port_num}/v1", api_key=api_key_string)
        self.model_name = self.client.models.list().data[0].id

        self.history = []

        # print model name when doing initialization
        print(self.model_name)

    def get_reponse(self, message):

        self.history.append({
            "role": "user",
            "content": message,
        })

        response_stream = self.client.chat.completions.create(
            model=self.model_name,
            messages=self.history,
            temperature=0.6,
            top_p=0.95,
            stream=True,
        )

        reasoning = ""
        answer = ""
        content_start = False
        reason_start = False
        for chunk in response_stream:
            if chunk.choices[0].delta.content:
                if not content_start:
                    content_start = True
                    print("\n==== End of the reasoning ====")
                    print("\n==== Beginning of the answer ====")
                print(chunk.choices[0].delta.content, end="")
                answer += chunk.choices[0].delta.content
            elif chunk.choices[0].delta.reasoning_content:
                if not reason_start:
                    reason_start = True
                    print("\n==== Beginning of the reasoning ====")
                print(chunk.choices[0].delta.reasoning_content, end="")

        print("\n==== End of the answer ====\n")

        return answer

    def append_history(self, answer):

        self.history.append({
            "role": "assistant",
            "content": answer,
        })


# example
call_llm = local_llm_api(HOST, PORT, API_KEY)

# message = "How many 'r's are in the word 'strawberry'?"
message = "What are the top 3 most common misconceptions about large language models?"
message2 = "How can these misconceptions be addressed or corrected?"

answer = call_llm.get_reponse(message)
call_llm.append_history(answer)

call_llm.get_reponse(message2)