Skip to content

Commit 7fffa1c

Browse files
committed
implement nohistory for not storing request thread
1 parent 635a4b5 commit 7fffa1c

File tree

2 files changed

+40
-28
lines changed

2 files changed

+40
-28
lines changed

llms/extensions/app/__init__.py

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@ async def upload_agent_avatar(request):
615615
ctx.add_post("/agents/avatar", upload_agent_avatar)
616616

617617
async def chat_request(openai_request, context):
618+
nohistory = context.get("nohistory")
618619
chat = openai_request
619620
user = context.get("user", None)
620621
provider = context.get("provider", None)
@@ -629,6 +630,8 @@ async def chat_request(openai_request, context):
629630
started_at = context.get("startedAt")
630631
if not started_at:
631632
context["startedAt"] = started_at = datetime.now()
633+
if nohistory:
634+
return
632635
if thread_id is None:
633636
thread = {
634637
"user": user,
@@ -666,6 +669,8 @@ async def chat_request(openai_request, context):
666669
ctx.register_chat_request_filter(chat_request)
667670

668671
async def tool_request(chat_request, context):
672+
if context.get("nohistory"):
673+
return
669674
messages = chat_request.get("messages", [])
670675
ctx.dbg(f"tool_request: messages {len(messages)}")
671676
thread_id = context.get("threadId", None)
@@ -712,6 +717,7 @@ def truncate_long_strings(obj, max_length=10000):
712717

713718
async def chat_response(openai_response, context):
714719
ctx.dbg("create_response")
720+
nohistory = context.get("nohistory")
715721
o = openai_response
716722
chat = context.get("chat")
717723
usage = o.get("usage", None)
@@ -767,7 +773,7 @@ async def chat_response(openai_response, context):
767773
}
768774
tasks.append(g_db.create_request_async(request, user=user))
769775

770-
if thread_id:
776+
if thread_id and not nohistory:
771777
messages = chat.get("messages", [])
772778
last_role = messages[-1].get("role", None) if len(messages) > 0 else None
773779
if last_role == "user" or last_role == "tool":
@@ -810,37 +816,38 @@ async def chat_response(openai_response, context):
810816
if provider_response:
811817
update_thread["providerResponse"] = truncate_long_strings(provider_response)
812818
tasks.append(g_db.update_thread_async(thread_id, update_thread, user=user))
813-
else:
819+
elif not thread_id:
814820
ctx.dbg("Missing thread_id")
815821

816822
await asyncio.gather(*tasks)
817823

818-
# Update thread costs from all thread requests
819-
thread_requests = g_db.query_requests({"threadId": thread_id}, user=user)
820-
total_costs = 0
821-
total_input = 0
822-
total_output = 0
823-
for request in thread_requests:
824-
total_costs += request.get("cost", 0) or 0
825-
total_input += request.get("inputTokens", 0) or 0
826-
total_output += request.get("outputTokens", 0) or 0
827-
stats = {
828-
"inputTokens": total_input,
829-
"outputTokens": total_output,
830-
"cost": total_costs,
831-
"duration": duration,
832-
"requests": len(thread_requests),
833-
}
834-
g_db.update_thread(
835-
thread_id,
836-
{
824+
if thread_id and not nohistory:
825+
# Update thread costs from all thread requests
826+
thread_requests = g_db.query_requests({"threadId": thread_id}, user=user)
827+
total_costs = 0
828+
total_input = 0
829+
total_output = 0
830+
for request in thread_requests:
831+
total_costs += request.get("cost", 0) or 0
832+
total_input += request.get("inputTokens", 0) or 0
833+
total_output += request.get("outputTokens", 0) or 0
834+
stats = {
837835
"inputTokens": total_input,
838836
"outputTokens": total_output,
839837
"cost": total_costs,
840-
"stats": stats,
841-
},
842-
user=user,
843-
)
838+
"duration": duration,
839+
"requests": len(thread_requests),
840+
}
841+
g_db.update_thread(
842+
thread_id,
843+
{
844+
"inputTokens": total_input,
845+
"outputTokens": total_output,
846+
"cost": total_costs,
847+
"stats": stats,
848+
},
849+
user=user,
850+
)
844851

845852
ctx.register_chat_response_filter(chat_response)
846853

@@ -852,15 +859,16 @@ async def chat_error(e: Exception, context: Any):
852859
ctx.dbg("Missing chat")
853860
return
854861

862+
nohistory = context.get("nohistory")
855863
title = context.get("title") or prompt_to_title(ctx.last_user_prompt(chat) if chat else None)
856864
completed_at = datetime.now()
857865
user = context.get("user", None)
858866

859867
thread_id = context.get("threadId", None)
860868
tasks = []
861-
if thread_id:
869+
if thread_id and not nohistory:
862870
tasks.append(g_db.update_thread_async(thread_id, {"completedAt": completed_at, "error": error}, user=user))
863-
else:
871+
elif not thread_id:
864872
ctx.dbg("Missing threadId")
865873

866874
request = {

llms/main.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1984,7 +1984,7 @@ async def g_chat_completion(chat, context=None):
19841984
raise e
19851985

19861986

1987-
async def cli_chat(chat, tools=None, image=None, audio=None, file=None, args=None, raw=False):
1987+
async def cli_chat(chat, tools=None, image=None, audio=None, file=None, args=None, raw=False, nohistory=False):
19881988
if g_default_model:
19891989
chat["model"] = g_default_model
19901990

@@ -2061,6 +2061,7 @@ async def cli_chat(chat, tools=None, image=None, audio=None, file=None, args=Non
20612061
try:
20622062
context = {
20632063
"tools": tools or "all",
2064+
"nohistory": nohistory,
20642065
}
20652066
response = await g_app.chat_completion(chat, context=context)
20662067

@@ -3732,6 +3733,7 @@ def create_arg_parser():
37323733
metavar="PARAMS",
37333734
)
37343735
parser.add_argument("--raw", action="store_true", help="Return raw AI JSON response")
3736+
parser.add_argument("--nohistory", action="store_true", help="Skip saving response to database")
37353737

37363738
parser.add_argument(
37373739
"--list", action="store_true", help="Show list of enabled providers and their models (alias ls provider?)"
@@ -4131,6 +4133,7 @@ async def chat_handler(request):
41314133
metadata = chat.get("metadata", {})
41324134
context["threadId"] = metadata.get("threadId", None)
41334135
context["tools"] = metadata.get("tools", "all")
4136+
context["nohistory"] = metadata.get("nohistory", False)
41344137
response = await g_app.chat_completion(chat, context)
41354138
return web.json_response(response)
41364139
except Exception as e:
@@ -4606,6 +4609,7 @@ async def start_background_tasks(app):
46064609
file=cli_args.file,
46074610
args=args,
46084611
raw=cli_args.raw,
4612+
nohistory=cli_args.nohistory,
46094613
)
46104614
)
46114615
return ExitCode.SUCCESS

0 commit comments

Comments
 (0)