sha*_*hut 6 python-3.x pytorch huggingface-transformers
我正在尝试从 Huggingface (PyTorch 版本)微调预训练模型。我正在使用我的自定义数据集。
这是我的自定义数据集的代码
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch
import numpy as np
data = datasets.ImageFolder(root=img_dir)
print(data.classes)
class MyDataset(Dataset):
def __init__(self, subset, transform=None):
self.subset = subset
self.transform = transform
def __getitem__(self, index):
x, y = self.subset[index]
if self.transform:
x = self.transform(x)
return x, y
def __len__(self):
return len(self.subset)
train_size = int(0.8 * len(data))
validation_size = int((len(data) - train_size) * 0.5)
test_size = int((len(data) - train_size) * 0.5)
train_dataset, validation_dataset, test_dataset = torch.utils.data.random_split(data, [train_size, validation_size, test_size])
train = MyDataset(train_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))
test = MyDataset(test_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))
validation = MyDataset(validation_dataset, transform=transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor()]))
Run Code Online (Sandbox Code Playgroud)
这是我的代码,用于微调 Huggingface 变压器的预训练模型。
from transformers import AutoFeatureExtractor
# feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
batch_size = 16
# Defining training arguments (set push_to_hub to false if you don't want to upload it to HuggingFace's model hub)
training_args = training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
# Instantiate the Trainer object
trainer = Trainer(
model=model,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=train,
eval_dataset=validation
)
trainer.train()
Run Code Online (Sandbox Code Playgroud)
当我运行代码时,我收到此错误
TypeError Traceback (most recent call last)
C:\Users\SHANTA~1\AppData\Local\Temp/ipykernel_2320/4032920361.py in <module>
----> 1 trainer.train()
c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1315 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1316 )
-> 1317 return inner_training_loop(
1318 args=args,
1319 resume_from_checkpoint=resume_from_checkpoint,
c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\transformers\trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1526
1527 step = -1
-> 1528 for step, inputs in enumerate(epoch_iterator):
1529
1530 # Skip past any already trained steps if resuming training
c:\Users\Shantanu Rahut\AppData\Local\Programs\Python\Python39\lib\site-packages\torch\utils\data\dataloader.py in __next__(self)
519 if self._sampler_iter is None:
520 self._reset()
--> 521 data = self._next_data()
522 self._num_yielded += 1
523 if self._dataset_kind == _DatasetKind.Iterable and \
...
--> 106 features = [vars(f) for f in features]
107 first = features[0]
108 batch = {}
TypeError: vars() argument must have __dict__ attribute
Run Code Online (Sandbox Code Playgroud)
有人可以帮我吗?谢谢 !!