ProtosAI/llm/llama.cpp/chat.py at master · jasonacox/ProtosAI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/python3
"""
Llama_cpp CLI ChatBot Example

Python chat client for OpenAI and the llama-cpp-python[server] OpenAI API Compatible
Web Server. Provides a simple command line interface (CLI) chat session.

Features:
  * Uses OpenAI API
  * Works with local hosted OpenAI compatible llama-cpp-python[server]
  * Retains conversational context for LLM
  * Uses response stream to render LLM chunks instead of waiting for full response

Requirements:
  * pip install openai

Running a llama-cpp-python server:
  * CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
  * pip install llama-cpp-python[server]
  * python3 -m llama_cpp.server --model models/7B/ggml-model.bin

Author: Jason A. Cox
10 Sept 2023
https://github.com/jasonacox/ProtosAI

"""
import openai
import datetime

# Configuration Settings - Showing local LLM
api_key = "OPENAI_API_KEY"                       # Required, use bogus string for Llama.cpp
api_base = "http://localhost:8000/v1"            # Use API endpoint or comment out for OpenAI
agentname = "Jarvis"                             # Set the name of your bot
mymodel  ="tinyllm"                              # Pick model to use e.g. gpt-3.5-turbo for OpenAI
TESTMODE = False                                 # Uses test prompts
USE_SYSTEM = False                               # Use system prompt for first message

# Set base prompt and initialize the context array for conversation dialogue
current_date = datetime.datetime.now()
formatted_date = current_date.strftime("%m/%d/%Y")
baseprompt = "You are %s, a highly intelligent assistant. Keep your answers brief and accurate. Current date is %s." % (agentname, formatted_date)
if USE_SYSTEM:
    context = [{"role": "system", "content": baseprompt}]
else:
    context = [{"role": "user", "content": baseprompt}, {"role": "assistant", "content": "Okay, let's get started."}]

# Function - Send prompt to LLM for response
def ask(prompt):
    global context
    # remember context
    context.append({"role": "user", "content": prompt})
    llm = openai.OpenAI(api_key=api_key, base_url=api_base)
    response = llm.chat.completions.create(
        model=mymodel,
        max_tokens=1024,
        stream=True, # Send response chunks as LLM computes next tokens
        temperature=0.7,
        messages=context,
    )
    return response

# Function - Render LLM response output in chunks
def printresponse(response):
    completion_text = ''
    # iterate through the stream of events and print it
    for event in response:
        event_text = event.choices[0].delta.content
        if event_text:
            chunk = event_text
            completion_text += chunk
            print(f"{chunk}",end="",flush=True)
    print("",flush=True)
    # remember context
    context.append({"role": "assistant", "content" : completion_text})
    return completion_text

# Chatbot Header
print(f"ChatBot - Greetings! My name is {agentname}. Enter an empty line to quit chat.")
print()

prompts = []
if TESTMODE:
    # define the series of questions here
    prompts.append("What is your name?")
    prompts.append("What is today's date?")
    prompts.append("What day of the week is it?")
    prompts.append("Answer this riddle: Ram's mom has three children, Reshma, Raja and a third one. What is the name of the third child?")
    prompts.append("Pick a color.")
    prompts.append("Now write a poem about that color.")
    prompts.append("Thank you very much! Goodbye!")

# Loop to prompt user for input
while True:
    if len(prompts) > 0:
        p = prompts.pop(0)
        print(f"> {p}")
    else:
        p = input("> ")
    if not p or p == "":
        break
    print()
    response=ask(p)
    print(f"{agentname}> ",end="", flush=True)
    ans = printresponse(response)
    print()

print("Done")