-
Notifications
You must be signed in to change notification settings - Fork 32.5k
Open
Labels
Description
System Info
I am trying to run inference using mistralai/Voxtral-Mini-3B-2507 on an audio (np.ndarray). On loading the processor using processor = transformers.AutoProcessor.from_pretrained(MODEL, trust_remote_code=True), I am getting the following error:
/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning:
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
warnings.warn(
Converting tekken.json to tokenizer.json: 100%|██████████| 150000/150000 [00:02<00:00, 69142.59it/s]
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
[/tmp/ipykernel_7845/2744582412.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in <cell line: 0>()
12 # else:
13 # processor = MODEL_DICT[MODEL]['processor'].from_pretrained(MODEL, trust_remote_code=True)
---> 14 processor = MODEL_DICT[MODEL]['processor'].from_pretrained(MODEL, trust_remote_code=True)
13 frames[/usr/local/lib/python3.12/dist-packages/transformers/models/auto/processing_auto.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
409 )
410 elif processor_class is not None:
--> 411 return processor_class.from_pretrained(
412 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
413 )
[/usr/local/lib/python3.12/dist-packages/transformers/processing_utils.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
1402 processor_dict, instantiation_kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
1403 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
-> 1404 return cls.from_args_and_dict(args, processor_dict, **instantiation_kwargs)
1405
1406 @classmethod
[/usr/local/lib/python3.12/dist-packages/transformers/processing_utils.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in from_args_and_dict(cls, args, processor_dict, **kwargs)
1171 processor = cls(*args, **valid_kwargs)
1172
-> 1173 logger.info(f"Processor {processor}")
1174 if return_unused_kwargs:
1175 return processor, unused_kwargs
[/usr/local/lib/python3.12/dist-packages/transformers/processing_utils.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in __repr__(self)
777 attributes_repr = [f"- {name}: {repr(getattr(self, name))}" for name in self.get_attributes()]
778 attributes_repr = "\n".join(attributes_repr)
--> 779 return f"{self.__class__.__name__}:\n{attributes_repr}\n\n{self.to_json_string()}"
780
781 def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
[/usr/local/lib/python3.12/dist-packages/transformers/processing_utils.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in to_json_string(self)
759 `str`: String containing all the attributes that make up this feature_extractor instance in JSON format.
760 """
--> 761 dictionary = self.to_dict()
762
763 return json.dumps(dictionary, indent=2, sort_keys=True) + "\n"
[/usr/local/lib/python3.12/dist-packages/transformers/processing_utils.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in to_dict(self)
697 `dict[str, Any]`: Dictionary of all the attributes that make up this processor instance.
698 """
--> 699 output = copy.deepcopy(self.__dict__)
700
701 # Get the kwargs in `__init__`.
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in deepcopy(x, memo, _nil)
134 copier = _deepcopy_dispatch.get(cls)
135 if copier is not None:
--> 136 y = copier(x, memo)
137 else:
138 if issubclass(cls, type):
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in _deepcopy_dict(x, memo, deepcopy)
219 memo[id(x)] = y
220 for key, value in x.items():
--> 221 y[deepcopy(key, memo)] = deepcopy(value, memo)
222 return y
223 d[dict] = _deepcopy_dict
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in deepcopy(x, memo, _nil)
160 y = x
161 else:
--> 162 y = _reconstruct(x, memo, *rv)
163
164 # If is its own copy, don't memoize.
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
257 if state is not None:
258 if deep:
--> 259 state = deepcopy(state, memo)
260 if hasattr(y, '__setstate__'):
261 y.__setstate__(state)
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in deepcopy(x, memo, _nil)
134 copier = _deepcopy_dispatch.get(cls)
135 if copier is not None:
--> 136 y = copier(x, memo)
137 else:
138 if issubclass(cls, type):
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in _deepcopy_dict(x, memo, deepcopy)
219 memo[id(x)] = y
220 for key, value in x.items():
--> 221 y[deepcopy(key, memo)] = deepcopy(value, memo)
222 return y
223 d[dict] = _deepcopy_dict
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in deepcopy(x, memo, _nil)
160 y = x
161 else:
--> 162 y = _reconstruct(x, memo, *rv)
163
164 # If is its own copy, don't memoize.
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
259 state = deepcopy(state, memo)
260 if hasattr(y, '__setstate__'):
--> 261 y.__setstate__(state)
262 else:
263 if isinstance(state, tuple) and len(state) == 2:
Exception: Error while attempting to unpickle Tokenizer: Token `Ġ<SPECIAL_116>` out of vocabulary at line 1 column 9601910
I tested out the following standalone code provided in the model card of mistralai/Voxtral-Mini-3B-2507 but it too does not work:
from transformers import VoxtralForConditionalGeneration, AutoProcessor
import torch
device = "cuda"
repo_id = "mistralai/Voxtral-Mini-3B-2507"
processor = AutoProcessor.from_pretrained(repo_id)
model = VoxtralForConditionalGeneration.from_pretrained(repo_id, torch_dtype=torch.bfloat16, device_map=device)
conversation = [
{
"role": "user",
"content": [
{
"type": "audio",
"path": "https://huggingface.co/datasets/hf-internal-testing/dummy-audio-samples/resolve/main/winning_call.mp3",
},
],
}
]
inputs = processor.apply_chat_template(conversation)
inputs = inputs.to(device, dtype=torch.bfloat16)
outputs = model.generate(**inputs, max_new_tokens=500)
decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
print("\nGenerated response:")
print("=" * 80)
print(decoded_outputs[0])
print("=" * 80)
Error given by above script:
Converting tekken.json to tokenizer.json: 100%|██████████| 150000/150000 [00:01<00:00, 116597.64it/s]
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
[/tmp/ipykernel_7845/3351359521.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in <cell line: 0>()
5 repo_id = "mistralai/Voxtral-Mini-3B-2507"
6
----> 7 processor = AutoProcessor.from_pretrained(repo_id)
8 model = VoxtralForConditionalGeneration.from_pretrained(repo_id, torch_dtype=torch.bfloat16, device_map=device)
9
13 frames[/usr/local/lib/python3.12/dist-packages/transformers/models/auto/processing_auto.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
409 )
410 elif processor_class is not None:
--> 411 return processor_class.from_pretrained(
412 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
413 )
[/usr/local/lib/python3.12/dist-packages/transformers/processing_utils.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
1402 processor_dict, instantiation_kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
1403 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
-> 1404 return cls.from_args_and_dict(args, processor_dict, **instantiation_kwargs)
1405
1406 @classmethod
[/usr/local/lib/python3.12/dist-packages/transformers/processing_utils.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in from_args_and_dict(cls, args, processor_dict, **kwargs)
1171 processor = cls(*args, **valid_kwargs)
1172
-> 1173 logger.info(f"Processor {processor}")
1174 if return_unused_kwargs:
1175 return processor, unused_kwargs
[/usr/local/lib/python3.12/dist-packages/transformers/processing_utils.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in __repr__(self)
777 attributes_repr = [f"- {name}: {repr(getattr(self, name))}" for name in self.get_attributes()]
778 attributes_repr = "\n".join(attributes_repr)
--> 779 return f"{self.__class__.__name__}:\n{attributes_repr}\n\n{self.to_json_string()}"
780
781 def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs):
[/usr/local/lib/python3.12/dist-packages/transformers/processing_utils.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in to_json_string(self)
759 `str`: String containing all the attributes that make up this feature_extractor instance in JSON format.
760 """
--> 761 dictionary = self.to_dict()
762
763 return json.dumps(dictionary, indent=2, sort_keys=True) + "\n"
[/usr/local/lib/python3.12/dist-packages/transformers/processing_utils.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in to_dict(self)
697 `dict[str, Any]`: Dictionary of all the attributes that make up this processor instance.
698 """
--> 699 output = copy.deepcopy(self.__dict__)
700
701 # Get the kwargs in `__init__`.
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in deepcopy(x, memo, _nil)
134 copier = _deepcopy_dispatch.get(cls)
135 if copier is not None:
--> 136 y = copier(x, memo)
137 else:
138 if issubclass(cls, type):
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in _deepcopy_dict(x, memo, deepcopy)
219 memo[id(x)] = y
220 for key, value in x.items():
--> 221 y[deepcopy(key, memo)] = deepcopy(value, memo)
222 return y
223 d[dict] = _deepcopy_dict
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in deepcopy(x, memo, _nil)
160 y = x
161 else:
--> 162 y = _reconstruct(x, memo, *rv)
163
164 # If is its own copy, don't memoize.
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
257 if state is not None:
258 if deep:
--> 259 state = deepcopy(state, memo)
260 if hasattr(y, '__setstate__'):
261 y.__setstate__(state)
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in deepcopy(x, memo, _nil)
134 copier = _deepcopy_dispatch.get(cls)
135 if copier is not None:
--> 136 y = copier(x, memo)
137 else:
138 if issubclass(cls, type):
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in _deepcopy_dict(x, memo, deepcopy)
219 memo[id(x)] = y
220 for key, value in x.items():
--> 221 y[deepcopy(key, memo)] = deepcopy(value, memo)
222 return y
223 d[dict] = _deepcopy_dict
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in deepcopy(x, memo, _nil)
160 y = x
161 else:
--> 162 y = _reconstruct(x, memo, *rv)
163
164 # If is its own copy, don't memoize.
[/usr/lib/python3.12/copy.py](https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD#) in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
259 state = deepcopy(state, memo)
260 if hasattr(y, '__setstate__'):
--> 261 y.__setstate__(state)
262 else:
263 if isinstance(state, tuple) and len(state) == 2:
Exception: Error while attempting to unpickle Tokenizer: Token `<SPECIAL_32><SPECIAL_32>` out of vocabulary at line 1 column 9847997
There are many issues with a lot of other APIs in transformers==5.3.0. Please resolve these issues ASAP.
Who can help?
@eustlb @ebezzam @vasqu @Cyrilvallez
Information
- The official example scripts
- My own modified scripts
Tasks
- An officially supported task in the
examplesfolder (such as GLUE/SQuAD, ...) - My own task or dataset (give details below)
Reproduction
Steps to reproduce the issue:
- Connect to the correct GPU runtime (I tried on T4 GPU).
- Upload a valid audio file with extension
.WAV. - Ensure
MODEL = "mistralai/Voxtral-Mini-3B-2507"andAUDIO_PATH = "<valid audio path>.WAV". - Run the notebook at https://colab.research.google.com/drive/18tlO4aFg0RxV7hcBf04abb1wKW-B3zbD?usp=sharing.
Expected behavior
Error on loading of processor for mistralai/Voxtral-Mini-3B-2507 model.
Reactions are currently unavailable