Skip to content

Commit dfdb56e

Browse files
committed
Enable evaluator to support dataset download from online sources
1 parent 747804e commit dfdb56e

File tree

1 file changed

+12
-26
lines changed

1 file changed

+12
-26
lines changed

src/llamafactory/eval/evaluator.py

Lines changed: 12 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -81,32 +81,18 @@ def eval(self) -> None:
8181
eval_task = self.eval_args.task.split("_")[0]
8282
eval_split = self.eval_args.task.split("_")[1]
8383

84+
from huggingface_hub import hf_hub_download
8485
if not os.path.exists(self.eval_args.task_dir):
85-
if use_modelscope():
86-
from modelscope import snapshot_download
87-
local_dir = snapshot_download(
88-
self.eval_args.task_dir,
89-
cache_dir=self.model_args.cache_dir,
90-
token=self.model_args.ms_hub_token
91-
)
92-
mapping = os.path.join(local_dir, "mapping.json")
93-
elif use_openmind():
94-
from openmind.utils.hub import snapshot_download
95-
local_dir = snapshot_download(
96-
self.eval_args.task_dir,
97-
cache_dir=self.model_args.cache_dir,
98-
token=self.model_args.om_hub_token
99-
)
100-
mapping = os.path.join(local_dir, "mapping.json")
101-
else:
102-
from huggingface_hub import hf_hub_download
103-
mapping = hf_hub_download(
104-
repo_id=self.eval_args.task_dir,
105-
filename="mapping.json",
106-
repo_type="dataset",
107-
cache_dir=self.model_args.cache_dir,
108-
token=self.model_args.hf_hub_token
109-
)
86+
# the modelscope / openmind not sure the api, so do for hugginface online dataset first
87+
# different api, because the cache did not work even cleared .cache/hugginface/datasets
88+
from huggingface_hub import hf_hub_download
89+
mapping = hf_hub_download(
90+
repo_id=self.eval_args.task_dir,
91+
filename="mapping.json",
92+
repo_type="dataset",
93+
cache_dir=self.model_args.cache_dir,
94+
token=self.model_args.hf_hub_token
95+
)
11096
else:
11197
mapping = cached_file(
11298
path_or_repo_id=os.path.join(self.eval_args.task_dir, eval_task),
@@ -123,7 +109,7 @@ def eval(self) -> None:
123109
results = {}
124110
for subject in pbar:
125111
dataset = load_dataset(
126-
path=os.path.join(self.eval_args.task_dir, eval_task),
112+
path=os.path.join(self.eval_args.task_dir, eval_task) if os.path.exists(self.eval_args.task_dir) else self.eval_args.task_dir,
127113
name=subject,
128114
cache_dir=self.model_args.cache_dir,
129115
download_mode=self.eval_args.download_mode,

0 commit comments

Comments
 (0)