Luc*_*rro 9 python pickle huggingface-transformers ray-tune
我或多或少地遵循这个示例,使用我自己的数据集将光线调整超参数库与 Huggingface 变压器库集成。
这是我的脚本:
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.examples.pbt_transformers.utils import download_data, \
build_compute_metrics_fn
from ray.tune.schedulers import PopulationBasedTraining
from transformers import glue_tasks_num_labels, AutoConfig, \
AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
def get_model():
# tokenizer = AutoTokenizer.from_pretrained(model_name, additional_special_tokens = ['[CHARACTER]'])
model = ElectraForSequenceClassification.from_pretrained('google/electra-small-discriminator', num_labels=2)
model.resize_token_embeddings(len(tokenizer))
return model
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
def compute_metrics(pred):
labels = pred.label_ids
preds = pred.predictions.argmax(-1)
precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
acc = accuracy_score(labels, preds)
return {
'accuracy': acc,
'f1': f1,
'precision': precision,
'recall': recall
}
training_args = TrainingArguments(
"electra_hp_tune",
report_to = "wandb",
learning_rate=2e-5, # config
do_train=True,
do_eval=True,
evaluation_strategy="epoch",
load_best_model_at_end=True,
num_train_epochs=2, # config
per_device_train_batch_size=16, # config
per_device_eval_batch_size=16, # config
warmup_steps=0,
weight_decay=0.1, # config
logging_dir="./logs",
)
trainer = Trainer(
model_init=get_model,
args=training_args,
train_dataset=chunked_encoded_dataset['train'],
eval_dataset=chunked_encoded_dataset['validation'],
compute_metrics=compute_metrics
)
tune_config = {
"per_device_train_batch_size": 32,
"per_device_eval_batch_size": 32,
"num_train_epochs": tune.choice([2, 3, 4, 5])
}
scheduler = PopulationBasedTraining(
time_attr="training_iteration",
metric="eval_acc",
mode="max",
perturbation_interval=1,
hyperparam_mutations={
"weight_decay": tune.uniform(0.0, 0.3),
"learning_rate": tune.uniform(1e-5, 2.5e-5),
"per_device_train_batch_size": [16, 32, 64],
})
reporter = CLIReporter(
parameter_columns={
"weight_decay": "w_decay",
"learning_rate": "lr",
"per_device_train_batch_size": "train_bs/gpu",
"num_train_epochs": "num_epochs"
},
metric_columns=[
"eval_f1", "eval_loss", "epoch", "training_iteration"
])
from ray.tune.integration.wandb import WandbLogger
trainer.hyperparameter_search(
hp_space=lambda _: tune_config,
backend="ray",
n_trials=10,
scheduler=scheduler,
keep_checkpoints_num=1,
checkpoint_score_attr="training_iteration",
progress_reporter=reporter,
name="tune_transformer_gr")
Run Code Online (Sandbox Code Playgroud)
最后一次函数调用(对 trainer.hyperparameter_search)是在引发错误时。错误信息是:
AttributeError:模块“pickle”没有属性“PickleBuffer”
这是完整的堆栈跟踪:
AttributeError Traceback(最近一次调用最后一次)
在 () 8 checkpoint_score_attr="training_iteration", 9 Progress_reporter=reporter, ---> 10 name="tune_transformer_gr")
14帧
/usr/local/lib/python3.7/dist-packages/transformers/trainer.py 在 hyperparameter_search(self, hp_space,compute_objective, n_Trials,direction, backend, hp_name, **kwargs) 1666 1667
run_hp_search = run_hp_search_optuna if backend == HPSearchBackend.OPTUNA 否则 run_hp_search_ray -> 1668 best_run = run_hp_search(self, n_trials, 方向, **kwargs) 1669 1670 self.hp_search_backend = None/usr/local/lib/python3.7/dist-packages/transformers/integrations.py 在 run_hp_search_ray(trainer, n_trials, 方向, **kwargs) 231 232 分析 = ray.tune.run( --> 233 ray.tune .with_parameters(_objective, local_trainer=trainer), 234 config=trainer.hp_space(None), 235 num_samples=n_Trials,
/usr/local/lib/python3.7/dist-packages/ray/tune/utils/trainable.py 中 with_parameters(trainable, **kwargs) 294 前缀 = f"{str(trainable)}_" 295 for k, v 在 kwargs.items() 中: --> 296parameter_registry.put(prefix + k, v) 297 298 trainable_name = getattr(trainable, " name ", "tune_with_parameters")
/usr/local/lib/python3.7/dist-packages/ray/tune/registry.py in put(self, k, v) 160 self.to_flush[k] = v 161 if ray.is_initialized(): -- > 162 self.flush() 163 164 def get(self, k):
/usr/local/lib/python3.7/dist-packages/ray/tune/registry.py inlush(self) 169 deflush(self): 170 for k, v in self.to_flush.items(): -- > 171 self.references[k] = ray.put(v) 172 self.to_flush.clear() 173
/usr/local/lib/python3.7/dist-packages/ray/_private/client_mode_hook.py in 包装器(*args, **kwargs) 45 if client_mode_should_convert(): 46 return getattr(ray, func.name ) (* args, **kwargs) ---> 47 return func(*args, **kwargs) 48 49 返回包装器
/usr/local/lib/python3.7/dist-packages/ray/worker.py in put(value)
1512 with profiling.profile("ray.put"): 1513 try: -> 1514 object_ref = worker.put_object(值)1515除了ObjectStoreFullError:1516 logger.info(put_object(self, value, object_ref) 中的 /usr/local/lib/python3.7/dist-packages/ray/worker.py 259“使用 ObjectRef 插入”) 260 --> 261 serialized_value = self.get_serialization_context()。 262 # 这必须是我们构造此 python 的第一个地方 263 # ObjectRef 因为在以下情况下创建了具有 0 个本地引用的条目
/usr/local/lib/python3.7/dist-packages/ray/serialization.py in serialize(self, value) 322 返回 RawSerializedObject(value) 323 else: --> 324 返回 self._serialize_to_msgpack(value)
/usr/local/lib/python3.7/dist-packages/ray/serialization.py in _serialize_to_msgpack(self, value) 302 元数据 = ray_constants.OBJECT_METADATA_TYPE_PYTHON 303 pickle5_serialized_object =
--> 304 self._serialize_to_pickle5(metadata, python_objects) 305 else : 306 pickle5_serialized_object = 无/usr/local/lib/python3.7/dist-packages/ray/serialization.py in _serialize_to_pickle5(self,metadata,value) 262 except Exception as e: 263 self.get_and_clear_contained_object_refs() --> 264 raise e 265 最后: 266 self.set_out_of_band_serialization()
/usr/local/lib/python3.7/dist-packages/ray/serialization.py in _serialize_to_pickle5(self,metadata,value) 259 self.set_in_band_serialization() 260 inband = pickle.dumps( --> 261 value, protocol= 5、buffer_callback=writer.buffer_callback) 262 except Exception as e: 263 self.get_and_clear_contained_object_refs()
/usr/local/lib/python3.7/dist-packages/ray/cloudpickle/cloudpickle_fast.py 在 dumps(obj, protocol, buffer_callback) 71 文件中,protocol=protocol, buffer_callback=buffer_callback 72 ) ---> 73 cp。转储(obj)74返回file.getvalue()75
/usr/local/lib/python3.7/dist-packages/ray/cloudpickle/cloudpickle_fast.py in dump(self, obj) 578 def dump(self, obj): 579 尝试: --> 580 return Pickler.dump( self, obj) 第 581 章 除了 RuntimeError 为 e:第 582 章
/usr/local/lib/python3.7/dist-packages/pyarrow/io.pxi 在 pyarrow.lib.Buffer 中。减少_ex()
AttributeError:模块“pickle”没有属性“PickleBuffer”
我的环境设置:
我尝试过的:
这个错误从何而来?我该如何解决它?
| 归档时间: |
|
| 查看次数: |
1952 次 |
| 最近记录: |