Bog*_*ish 6 python nlp classification huggingface-transformers
我正在学习如何使用 Huggingface Transformers 库,在 Kaggle Twitter 灾难数据集上构建二元分类 BERT 模型。
进入训练循环后,在forward()函数执行期间出现以下错误:
Epoch 1/50
----------
Aici incepe train_epoch
/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
cpuset_checked))
----Checkpoint train_epoch 2----
----Checkpoint train_epoch 2----
----forward checkpoint 1----
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-175-fd9f98819b6f> in <module>()
23 device,
24 scheduler,
---> 25 df_train.shape[0]
26 )
27 print(f'Train loss {train_loss} Accuracy:{train_acc}')
4 frames
<ipython-input-173-bfbecd87c5ec> in train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples)
21 targets = d['targets'].to(device)
22 print('----Checkpoint train_epoch 2----')
---> 23 outputs = model(input_ids=input_ids,attention_mask=attention_mask)
24 print('----Checkpoint train_epoch 3----')
25 _,preds = torch.max(outputs,dim=1)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
<ipython-input-171-e754ea3edc36> in forward(self, input_ids, attention_mask)
16 input_ids=input_ids,
17 attention_mask=attention_mask,
---> 18 return_dict=False)
19
20 print('----forward checkpoint 2-----')
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
/usr/local/lib/python3.7/dist-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
923 elif input_ids is not None:
924 input_shape = input_ids.size()
--> 925 batch_size, seq_length = input_shape
926 elif inputs_embeds is not None:
927 input_shape = inputs_embeds.size()[:-1]
ValueError: too many values to unpack (expected 2)
Run Code Online (Sandbox Code Playgroud)
起初,我认为这与他们添加的 return_dict=False 更改有关,但我错了。分类器和训练循环的代码如下
分类器:
class DisasterClassifier(nn.Module):
def __init__(self, n_classes):
super(DisasterClassifier,self).__init__()
self.bert=BertModel.from_pretrained(PRE_TRAINED_MODEL,return_dict=False)
self.drop=nn.Dropout(p=0.3) # in timpul antrenarii, valori aleatorii sunt inlocuite cu 0, cu probabilitate p -> regularization and preventing the co-adaptation of neurons
self.out = nn.Linear(self.bert.config.hidden_size,n_classes)
def forward(self,input_ids,attention_mask):
print('----forward checkpoint 1----')
bertOutput = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
return_dict=False)
print('----forward checkpoint 2-----')
output = self.drop(bertOutput['pooler_output'])
return self.out(output)`
Run Code Online (Sandbox Code Playgroud)
训练时期:
optimizer = AdamW(model.parameters(),lr = 2e-5,correct_bias=False)
total_steps = len(train_data_loader)*EPOCHS
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=0,
num_training_steps=total_steps)
loss_fn = nn.CrossEntropyLoss().to(device)
def train_epoch(model,data_loader,loss_fn,optimizer,device,scheduler,n_examples):
print('Aici incepe train_epoch')
model = model.train()
losses =[]
correct_predictions = 0
for d in data_loader:
print('----Checkpoint train_epoch 2----')
input_ids = d['input_ids'].to(device)
attention_mask=d['attention_mask'].to(device)
targets = d['targets'].to(device)
print('----Checkpoint train_epoch 2----')
outputs = model(input_ids=input_ids,attention_mask=attention_mask)
print('----Checkpoint train_epoch 3----')
_,preds = torch.max(outputs,dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
#backpropagation steps
loss.backward()
nn.utils.clip_grad_norm_(model.parameters,max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
return (correct_predictions.double() / n_examples), np.mean(losses)
Run Code Online (Sandbox Code Playgroud)
以及训练循环:
history = defaultdict(list)
best_accuracy = 0
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
# train_acc,train_loss = train_epoch(model,
# train_data_loader,
# loss_fn,
# optimizer,
# device,
# scheduler,
# len(df_train))
train_acc, train_loss = train_epoch(
model,
train_data_loader,
loss_fn,
optimizer,
device,
scheduler,
df_train.shape[0]
)
print(f'Train loss {train_loss} Accuracy:{train_acc}')
val_acc, val_loss = eval_model(model,val_data_loader,loss_fn,device,len(df_val))
print(f'Validation loss {val_loss} Accuracy:{val_acc}')
print()
history['train_acc'].append(train_acc)
history['train_loss'].append(train_loss)
history['val_acc'].append(val_acc)
history['val_loss'].append(val_loss)
if val_acc > best_accuracy:
torch.save(model.state_dict(), 'best_model_state.bin')
best_accuracy = val_acc
Run Code Online (Sandbox Code Playgroud)
有人遇到过类似的情况吗?
我刚刚遇到了同样的问题。您需要检查input_ids的形状,它应该是 (batch_size, seq_length)。就你而言,我猜它类似于 (1,batch_size,seq_length) 或其他什么。所以这样做:
input_ids = input_ids.squeeze(0)
outputs = model(input_ids=input_ids,attention_mask=attention_mask)
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
10496 次 |
| 最近记录: |