Kat*_*ing 2 pytorch batch-normalization
我在使用BatchNorm1d时遇到错误,代码:
\n##% first I set a model\nclass net(nn.Module):\n def __init__(self, max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num__rnn_layers, bidirectional, batch_first=True, p=0.2):\n super(net, self).__init__()\n self.max_len = max_len\n self.feature_linear = feature_linear\n self.input_size = input_size\n self.hidden_size = hidden_size\n self.bidirectional = bidirectional\n self.num_directions = 2 if bidirectional == True else 1\n self.p = p\n self.batch_first = batch_first\n self.linear1 = nn.Linear(max_len, feature_linear) \n init.kaiming_normal_(self.linear1.weight, mode='fan_in')\n self.BN1 = BN(feature_linear) \n \n def forward(self, xb, seq_len_crt):\n rnn_input = torch.zeros(xb.shape[0], self.feature_linear, self.input_size)\n for i in range(self.input_size): \n out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(1,34), out.shape(1,100)\n out = F.relu(out) # \xe8\xbe\x93\xe5\x85\xa5\xef\xbc\x9aout.shape(1,100), \xe8\xbe\x93\xe5\x87\xba\xef\xbc\x9aout.shape(1,100)\n out = self.BN1(out) # \xe8\xbe\x93\xe5\x85\xa5\xef\xbc\x9aout.shape(1,100)\xef\xbc\x8c\xe8\xbe\x93\xe5\x87\xba\xef\xbc\x9aout.shape(1,100)\n \n return y_hat.squeeze(-1)\n\n##% make the model as a function and optimize it\ninput_size = 5\nhidden_size = 32\noutput_dim = 1\nnum_rnn_layers = 2\nbidirectional = True\nrnn = nn.LSTM\nbatch_size = batch_size\nfeature_linear = 60\nBN = nn.BatchNorm1d\n\nmodel = net(max_len, feature_linear, rnn, input_size, hidden_size, output_dim, num_rnn_layers, bidirectional, p=0.1)\nloss_func = nn.MSELoss(reduction='none')\n# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)\n# optimizer = optim.Adam(model.parameters(), lr=0.01)\noptimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.05)\n\n\n##% use this model to predict data \ndef predict(xb, model, seq_len):\n # xb's shape should be (batch_size, seq_len, n_features)\n if xb.ndim == 2: # suitable for both ndarray and Tensor \n # add a {batch_size} dim\n xb = xb[None, ]\n \n if not isinstance(xb, torch.Tensor): \n xb = torch.Tensor(xb)\n return model(xb, seq_len) # xb.shape(1,34,5)\n\n##% create training/valid/test data \nseq_len_train_iter = []\nfor i in range(0, len(seq_len_train), batch_size):\n if i + batch_size <= len(seq_len_train):\n seq_len_train_iter.append(seq_len_train[i:i+batch_size])\n else: \n seq_len_train_iter.append(seq_len_train[i:])\n \nseq_len_valid_iter = []\nfor i in range(0, len(seq_len_valid), batch_size):\n if i + batch_size <= len(seq_len_valid):\n seq_len_valid_iter.append(seq_len_valid[i:i+batch_size])\n else: \n seq_len_valid_iter.append(seq_len_valid[i:])\n \nseq_len_test_iter = []\nfor i in range(0, len(seq_len_test), batch_size):\n if i + batch_size <= len(seq_len_test):\n seq_len_test_iter.append(seq_len_test[i:i+batch_size])\n else: \n seq_len_test_iter.append(seq_len_test[i:])\n\n##% fit model\ndef fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter):\n train_loss_record = []\n valid_loss_record = []\n mean_pct_final = []\n mean_abs_final = []\n \n is_better = False\n last_epoch_abs_error = 0\n last_epoch_pct_error = 0\n\n mean_pct_final_train = []\n mean_abs_final_train = []\n \n for epoch in range(epochs):\n # seq_len_crt: current batch seq len\n for batches, ((xb, yb), seq_len_crt) in enumerate(zip(train_dl, seq_len_train_iter)):\n if isinstance(seq_len_crt, np.int64): \n seq_len_crt = [seq_len_crt]\n y_hat = model(xb, seq_len_crt)\n packed_yb = nn.utils.rnn.pack_padded_sequence(yb, seq_len_crt, batch_first=True, enforce_sorted=False)\n final_yb, input_sizes = nn.utils.rnn.pad_packed_sequence(packed_yb)\n final_yb = final_yb.permute(1, 0)\n # assert torch.all(torch.tensor(seq_len_crt).eq(input_sizes))\n loss = loss_func(y_hat, final_yb)\n\n batch_size_crt = final_yb.shape[0]\n loss = (loss.sum(-1) / input_sizes).sum() / batch_size_crt\n\n loss.backward()\n optimizer.step()\n # scheduler.step()\n optimizer.zero_grad()\n# print(i)\n\n with torch.no_grad():\n train_loss_record.append(loss.item())\n if batches % 50 == 0 and epoch % 1 == 0:\n# print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')\n\n y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)\n label = yb[0][:len(y_hat)]\n # plt.ion()\n plt.plot(y_hat, label='predicted')\n plt.plot(label, label='label')\n plt.legend(loc='upper right')\n plt.title('training mode')\n plt.text(len(y_hat)+1, max(y_hat.max(), label.max()), f'Epoch {epoch}, batch {batches} training loss: {loss.item()}')\n plt.show()\n return train_loss_record\n
Run Code Online (Sandbox Code Playgroud)\n但我遇到了:Expected more than 1 value per channel when training, got input size torch.Size([1, 60])
\n错误信息是:
ValueError Traceback (most recent call last)\n<ipython-input-119-fb062ad3f20e> in <module>\n----> 1 fit(500, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)\n\n<ipython-input-118-2eb946c379bf> in fit(epochs, model, loss_func, optimizer, train_dl, valid_dl, valid_ds, seq_len_train_iter, seq_len_valid_iter)\n 38 # print(f'Epoch {epoch}, batch {i} training loss: {loss.item()}')\n 39 \n---> 40 y_hat = predict(xb[0], model, torch.tensor([seq_len_crt[0]])).detach().numpy().squeeze() # xb[0].shape(34,5)\n 41 label = yb[0][:len(y_hat)]\n 42 # plt.ion()\n\n<ipython-input-116-28afce77e325> in predict(xb, model, seq_len)\n 7 if not isinstance(xb, torch.Tensor):\n 8 xb = torch.Tensor(xb)\n----> 9 return model(xb, seq_len) # xb.shape(None,34,5)\n\nD:\\Anaconda3\\envs\\LSTM\\lib\\site-packages\\torch\\nn\\modules\\module.py in _call_impl(self, *input, **kwargs)\n 725 result = self._slow_forward(*input, **kwargs)\n 726 else:\n--> 727 result = self.forward(*input, **kwargs)\n 728 for hook in itertools.chain(\n 729 _global_forward_hooks.values(),\n\n<ipython-input-114-3e9c30d20ed6> in forward(self, xb, seq_len_crt)\n 50 out = self.linear1(xb[:, :, i]) # xb[:,:,i].shape:(None,34), out.shape(None,100)\n 51 out = F.relu(out) # \xe8\xbe\x93\xe5\x85\xa5\xef\xbc\x9aout.shape(None,100), \xe8\xbe\x93\xe5\x87\xba\xef\xbc\x9aout.shape(None,100)\n---> 52 out = self.BN1(out) # \xe8\xbe\x93\xe5\x85\xa5\xef\xbc\x9aout.shape(None,100)\xef\xbc\x8c\xe8\xbe\x93\xe5\x87\xba\xef\xbc\x9aout.shape(None,100)\n 53 \n 54 out = self.linear2(out)\n\nD:\\Anaconda3\\envs\\LSTM\\lib\\site-packages\\torch\\nn\\modules\\module.py in _call_impl(self, *input, **kwargs)\n 725 result = self._slow_forward(*input, **kwargs)\n 726 else:\n--> 727 result = self.forward(*input, **kwargs)\n 728 for hook in itertools.chain(\n 729 _global_forward_hooks.values(),\n\nD:\\Anaconda3\\envs\\LSTM\\lib\\site-packages\\torch\\nn\\modules\\batchnorm.py in forward(self, input)\n 129 used for normalization (i.e. in eval mode when buffers are not None).\n 130 """\n--> 131 return F.batch_norm(\n 132 input,\n 133 # If buffers are not to be tracked, ensure that they won't be updated\n\nD:\\Anaconda3\\envs\\LSTM\\lib\\site-packages\\torch\\nn\\functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)\n 2052 bias=bias, training=training, momentum=momentum, eps=eps)\n 2053 if training:\n-> 2054 _verify_batch_size(input.size())\n 2055 \n 2056 return torch.batch_norm(\n\nD:\\Anaconda3\\envs\\LSTM\\lib\\site-packages\\torch\\nn\\functional.py in _verify_batch_size(size)\n 2035 size_prods *= size[i + 2]\n 2036 if size_prods == 1:\n-> 2037 raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))\n 2038 \n 2039 \n\nValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 60])\n
Run Code Online (Sandbox Code Playgroud)\n我检查过,发现在out = self.BN1(out)
,out.shape = (1,60) 中,BatchNorm1d 中似乎不允许使用batchsize=1。但我不知道如何修改它。
数学上做什么BatchNorm1d
?
尝试写下这种情况的方程batch_size=1
,你就会明白为什么 pytorch 对你生气。
怎么解决呢?
它很简单:BatchNorm
有两种“操作模式”:一种用于训练,估计当前批次的均值和方差(这就是为什么您必须进行batch_size>1
训练)。
另一种“模式”用于评估:它使用累积的均值和方差来标准化新输入,而无需重新估计均值和方差。在这种模式下,一张一张地处理样本是没有问题的。
在评估模型使用model.eval()
前后的情况时model.train()
。
归档时间: |
|
查看次数: |
9519 次 |
最近记录: |