如何修复 TypeError: vars() 参数必须具有 Huggingface (PyTorch 版本) Trainer 的 __dict__ 属性?

sha*_*hut 6 python-3.x pytorch huggingface-transformers

我正在尝试从 Huggingface (PyTorch 版本)微调预训练模型。我正在使用我的自定义数据集。

这是我的自定义数据集的代码

from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch
import numpy as np

data = datasets.ImageFolder(root=img_dir)

print(data.classes)

class MyDataset(Dataset):
    def __init__(self, subset, transform=None):
        self.subset = subset
        self.transform = transform
        
    def __getitem__(self, index):
        x, y = self.subset[index]
        if self.transform:
            x = self.transform(x)
        return x, y
        
    def __len__(self):
        return len(self.subset)

train_size = int(0.8 * len(data))
validation_size = int((len(data) - train_size) * 0.5)
test_size = int((len(data) - train_size) * 0.5)
train_dataset, validation_dataset, test_dataset = torch.utils.data.random_split(data, [train_size, validation_size, test_size])

train = MyDataset(train_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))
test = MyDataset(test_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))
validation = MyDataset(validation_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))

Run Code Online (Sandbox Code Playgroud)

这是我的代码,用于微调 Huggingface 变压器的预训练模型。

from transformers import AutoFeatureExtractor
# feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
batch_size = 16
# Defining training arguments (set push_to_hub to false if you don't want to upload it to HuggingFace's model hub)
training_args = training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")

# Instantiate the Trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train,
    eval_dataset=validation
)

trainer.train()
Run Code Online (Sandbox Code Playgroud)

当我运行代码时,我收到此错误

TypeError                                 Traceback (most recent call last)
C:\Users\SHANTA~1\AppData\Local\Temp/ipykernel_2320/4032920361.py in <module>
----> 1 trainer.train()

c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   1315             self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
   1316         )
-> 1317         return inner_training_loop(
   1318             args=args,
   1319             resume_from_checkpoint=resume_from_checkpoint,

c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   1526 
   1527             step = -1
-> 1528             for step, inputs in enumerate(epoch_iterator):
   1529 
   1530                 # Skip past any already trained steps if resuming training

c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
    519             if self._sampler_iter is None:
    520                 self._reset()
--> 521             data = self._next_data()
    522             self._num_yielded += 1
    523             if self._dataset_kind == _DatasetKind.Iterable and \
...
--> 106         features = [vars(f) for f in features]
    107     first = features[0]
    108     batch = {}

TypeError: vars() argument must have __dict__ attribute
Run Code Online (Sandbox Code Playgroud)

有人可以帮我吗?谢谢 !!