Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions fastdeploy/input/ernie_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ def process_request(self, request, max_model_len=None, **kwargs):
else:
request.prompt_token_ids = self.messages2ids(request.to_dict())

if len(request.prompt_token_ids) == 0:
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
if max_model_len is not None and len(request.prompt_token_ids) > max_model_len:
request.prompt_token_ids = request.prompt_token_ids[: max_model_len - 1]
if request.get("max_tokens") is None:
Expand Down Expand Up @@ -160,7 +162,9 @@ def process_request_dict(self, request, max_model_len=None):
req_id = request.get("request_id", None)
data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}")
else:
request["prompt_token_ids"] = self.messages2ids(request)
request['prompt_token_ids'] = self.messages2ids(request)
if len(request['prompt_token_ids']) == 0:
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")

# truncate prompts that exceed the length limit
if max_model_len is not None and len(request["prompt_token_ids"]) > max_model_len:
Expand All @@ -184,7 +188,6 @@ def process_response(self, response_dict, **kwargs):
Returns:
Dict: response contain text fields
"""

req_id = response_dict.request_id
token_ids = response_dict.outputs.token_ids

Expand Down Expand Up @@ -228,6 +231,7 @@ def process_response_dict_normal(self, response_dict, **kwargs):
Returns:
Dict: response contain text fields
"""
enable_thinking = kwargs.get("enable_thinking")
token_ids = response_dict["outputs"]["token_ids"]
is_end = response_dict["finished"]
req_id = response_dict["request_id"]
Expand All @@ -237,8 +241,9 @@ def process_response_dict_normal(self, response_dict, **kwargs):
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
if is_end:
full_text = previous_texts + delta_text
if self.reasoning_parser:
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
if enable_thinking and self.reasoning_parser:
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(
full_text, response_dict)
response_dict["outputs"]["text"] = text
response_dict["outputs"]["reasoning_content"] = reasoning_content
else:
Expand Down
5 changes: 5 additions & 0 deletions fastdeploy/input/mm_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from fastdeploy.entrypoints.chat_utils import parse_chat_messages
from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer
from fastdeploy.utils import data_processor_logger

from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor
from .process_video import read_frames_decord, read_video_decord
Expand Down Expand Up @@ -252,6 +253,8 @@ def request2ids(
image_message_list.append(item)

prompt_token_ids = self.apply_chat_template(request)
if len(prompt_token_ids) == 0:
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
image_start_index = 0
image_message_index = 0
for i in range(len(prompt_token_ids)):
Expand Down Expand Up @@ -503,4 +506,6 @@ def apply_chat_template(self, request):
)
tokens = self.tokenizer.tokenize(prompt_token_str)
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
data_processor_logger.info(
f"req_id:{request.get('request_id', ''),} tokens: {tokens}, token_ids: {token_ids}")
return token_ids
19 changes: 14 additions & 5 deletions fastdeploy/input/text_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,11 @@ def process_request(self, request, max_model_len=None, **kwargs):
task["enable_thinking"] = kwargs.get("enable_thinking", True)
request.prompt_token_ids = self.messages2ids(task)
else:
raise ValueError(f"The request should have `input_ids`, `text` or `messages`: {request}.")
raise ValueError(
f"The request should have `input_ids`, `text` or `messages`: {request}."
)
if len(request.prompt_token_ids) == 0:
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
if request.get("max_tokens") is None:
request.set(
"max_tokens",
Expand Down Expand Up @@ -283,8 +287,11 @@ def process_request_dict(self, request, max_model_len=None, **kwargs):
raise ValueError("This model does not support chat_template.")
request["prompt_token_ids"] = self.messages2ids(request)
else:
raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}")

raise ValueError(
f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}"
)
if len(request['prompt_token_ids']) == 0:
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
if request.get("max_tokens") is None:
request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"]))
if request.get("temperature") < _SAMPLING_EPS:
Expand Down Expand Up @@ -335,6 +342,7 @@ def process_response_dict_normal(self, response_dict, **kwargs):
Returns:
Dict: response contain text fields
"""
enable_thinking = kwargs.get("enable_thinking")
token_ids = response_dict["outputs"]["token_ids"]
is_end = response_dict["finished"]
req_id = response_dict["request_id"]
Expand All @@ -344,8 +352,9 @@ def process_response_dict_normal(self, response_dict, **kwargs):
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
if is_end:
full_text = previous_texts + delta_text
if self.reasoning_parser:
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
if enable_thinking and self.reasoning_parser:
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(
full_text, response_dict)
response_dict["outputs"]["text"] = text
response_dict["outputs"]["reasoning_content"] = reasoning_content
else:
Expand Down
Loading