bka*_*glu 5 machine-learning time-series deep-learning lstm pytorch
我目前正在构建一个 LSTM 网络来使用 PyTorch 预测时间序列数据。在Roman 的博客文章之后,我为单变量时间序列数据实现了一个简单的 LSTM,请参阅下面的类定义。然而,自从我停止向输入数据添加更多功能以来已经过去了几天,比如一天中的一个小时、一周中的某一天、一年中的一周等等。
\nclass Model(nn.Module):\n def __init__(self, input_size, hidden_size, output_size):\n super(Model, self).__init__()\n self.input_size = input_size\n self.hidden_size = hidden_size\n self.output_size = output_size\n self.lstm = nn.LSTMCell(self.input_size, self.hidden_size)\n self.linear = nn.Linear(self.hidden_size, self.output_size)\n\n def forward(self, input, future=0, y=None):\n outputs = []\n\n # reset the state of LSTM\n # the state is kept till the end of the sequence\n h_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)\n c_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)\n\n for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):\n h_t, c_t = self.lstm(input_t, (h_t, c_t))\n output = self.linear(h_t)\n outputs += [output]\n\n for i in range(future):\n if y is not None and random.random() > 0.5:\n output = y[:, [i]] # teacher forcing\n h_t, c_t = self.lstm(output, (h_t, c_t))\n output = self.linear(h_t)\n outputs += [output]\n outputs = torch.stack(outputs, 1).squeeze(2)\n return outputs\n\n\nclass Optimization:\n "A helper class to train, test and diagnose the LSTM"\n\n def __init__(self, model, loss_fn, optimizer, scheduler):\n self.model = model\n self.loss_fn = loss_fn\n self.optimizer = optimizer\n self.scheduler = scheduler\n self.train_losses = []\n self.val_losses = []\n self.futures = []\n\n @staticmethod\n def generate_batch_data(x, y, batch_size):\n for batch, i in enumerate(range(0, len(x) - batch_size, batch_size)):\n x_batch = x[i : i + batch_size]\n y_batch = y[i : i + batch_size]\n yield x_batch, y_batch, batch\n\n def train(\n self,\n x_train,\n y_train,\n x_val=None,\n y_val=None,\n batch_size=100,\n n_epochs=20,\n dropout=0.2,\n do_teacher_forcing=None,\n ):\n seq_len = x_train.shape[1]\n for epoch in range(n_epochs):\n start_time = time.time()\n self.futures = []\n\n train_loss = 0\n for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):\n y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)\n self.optimizer.zero_grad()\n loss = self.loss_fn(y_pred, y_batch)\n loss.backward()\n self.optimizer.step()\n train_loss += loss.item()\n self.scheduler.step()\n train_loss /= batch\n self.train_losses.append(train_loss)\n\n self._validation(x_val, y_val, batch_size)\n\n elapsed = time.time() - start_time\n print(\n "Epoch %d Train loss: %.2f. Validation loss: %.2f. Avg future: %.2f. Elapsed time: %.2fs."\n % (epoch + 1, train_loss, self.val_losses[-1], np.average(self.futures), elapsed)\n )\n\n def _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing):\n if do_teacher_forcing:\n future = random.randint(1, int(seq_len) / 2)\n limit = x_batch.size(1) - future\n y_pred = self.model(x_batch[:, :limit], future=future, y=y_batch[:, limit:])\n else:\n future = 0\n y_pred = self.model(x_batch)\n self.futures.append(future)\n return y_pred\n\n def _validation(self, x_val, y_val, batch_size):\n if x_val is None or y_val is None:\n return\n with torch.no_grad():\n val_loss = 0\n batch = 1\n for x_batch, y_batch, batch in self.generate_batch_data(x_val, y_val, batch_size):\n y_pred = self.model(x_batch)\n loss = self.loss_fn(y_pred, y_batch)\n val_loss += loss.item()\n val_loss /= batch\n self.val_losses.append(val_loss)\n\n def evaluate(self, x_test, y_test, batch_size, future=1):\n with torch.no_grad():\n test_loss = 0\n actual, predicted = [], []\n for x_batch, y_batch, batch in self.generate_batch_data(x_test, y_test, batch_size):\n y_pred = self.model(x_batch, future=future)\n y_pred = (\n y_pred[:, -len(y_batch) :] if y_pred.shape[1] > y_batch.shape[1] else y_pred\n )\n loss = self.loss_fn(y_pred, y_batch)\n test_loss += loss.item()\n actual += torch.squeeze(y_batch[:, -1]).data.cpu().numpy().tolist()\n predicted += torch.squeeze(y_pred[:, -1]).data.cpu().numpy().tolist()\n test_loss /= batch\n return actual, predicted, test_loss\n\n def plot_losses(self):\n plt.plot(self.train_losses, label="Training loss")\n plt.plot(self.val_losses, label="Validation loss")\n plt.legend()\n plt.title("Losses")\n\nRun Code Online (Sandbox Code Playgroud)\n您可以找到一些辅助函数,这些函数可以帮助我在将数据输入到 LSTM 网络之前拆分和格式化数据。
\ndef to_dataframe(actual, predicted):\n return pd.DataFrame({"value": actual, "prediction": predicted})\n\ndef inverse_transform(scaler, df, columns):\n for col in columns:\n df[col] = scaler.inverse_transform(df[col])\n return df\n\ndef split_sequences(sequences, n_steps):\n X, y = list(), list()\n for i in range(len(sequences)):\n # find the end of this pattern\n end_ix = i + n_steps\n # check if we are beyond the dataset\n if end_ix > len(sequences):\n break\n # gather input and output parts of the pattern\n seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]\n X.append(seq_x)\n y.append(seq_y)\n return array(X), array(y)\n\n\ndef train_val_test_split_new(df, test_ratio=0.2, seq_len = 100):\n y = df[\'value\']\n X = df.drop(columns = [\'value\'])\n tarin_ratio = 1 - test_ratio\n val_ratio = 1 - ((train_ratio - test_ratio) / train_ratio)\n\n X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio)\n X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_ratio)\n\n return X_train, y_train, X_val, y_val, X_test, y_test\nRun Code Online (Sandbox Code Playgroud)\n我使用以下数据框来训练我的模型。
\n# df_train \nvalue weekday monthday hour\ntimestamp \n2014-07-01 00:00:00 10844 1 1 0\n2014-07-01 00:30:00 8127 1 1 0\n2014-07-01 01:00:00 6210 1 1 1\n2014-07-01 01:30:00 4656 1 1 1\n2014-07-01 02:00:00 3820 1 1 2\n... ... ... ... ...\n2015-01-31 21:30:00 24670 5 31 21\n2015-01-31 22:00:00 25721 5 31 22\n2015-01-31 22:30:00 27309 5 31 22\n2015-01-31 23:00:00 26591 5 31 23\n2015-01-31 23:30:00 26288 5 31 23\n10320 rows \xc3\x97 4 columns\n\n# x_train \nweekday monthday hour\ntimestamp \n2014-08-26 16:30:00 1 26 16\n2014-08-18 16:30:00 0 18 16\n2014-10-22 20:00:00 2 22 20\n2014-12-10 08:00:00 2 10 8\n2014-07-27 22:00:00 6 27 22\n... ... ... ...\n2014-08-24 05:30:00 6 24 5\n2014-11-24 12:00:00 0 24 12\n2014-12-18 06:00:00 3 18 6\n2014-07-27 17:00:00 6 27 17\n2014-12-05 21:00:00 4 5 21\n6192 rows \xc3\x97 3 columns\n\n# y_train \ntimestamp\n2014-08-26 16:30:00 14083\n2014-08-18 16:30:00 14465\n2014-10-22 20:00:00 25195\n2014-12-10 08:00:00 21348\n2014-07-27 22:00:00 16356\n ... \n2014-08-24 05:30:00 2948\n2014-11-24 12:00:00 16292\n2014-12-18 06:00:00 7029\n2014-07-27 17:00:00 18883\n2014-12-05 21:00:00 26284\nName: value, Length: 6192, dtype: int64\nRun Code Online (Sandbox Code Playgroud)\n将时间序列数据转换并分割为更小的批次后,X 和 y 的训练数据集变为如下:
\nX_data shape is (6093, 100, 3)\ny_data shape is (6093,)\ntensor([[[-1.0097, 1.1510, 0.6508],\n [-1.5126, 0.2492, 0.6508],\n [-0.5069, 0.7001, 1.2238],\n ...,\n [ 1.5044, -1.4417, -1.6413],\n [ 1.0016, -0.0890, 0.7941],\n [ 1.5044, -0.9908, -0.2087]],\n\n [[-1.5126, 0.2492, 0.6508],\n [-0.5069, 0.7001, 1.2238],\n [-0.5069, -0.6526, -0.4952],\n ...,\n [ 1.0016, -0.0890, 0.7941],\n [ 1.5044, -0.9908, -0.2087],\n [ 0.4988, 0.5874, 0.5076]],\n\n [[-0.5069, 0.7001, 1.2238],\n [-0.5069, -0.6526, -0.4952],\n [ 1.5044, 1.2637, 1.5104],\n ...,\n [ 1.5044, -0.9908, -0.2087],\n [ 0.4988, 0.5874, 0.5076],\n [ 0.4988, 0.5874, -0.6385]],\n\n ...,\n\n [[ 1.0016, 0.9255, -1.2115],\n [-1.0097, -0.9908, 1.0806],\n [-0.0041, 0.8128, 0.3643],\n ...,\n [ 1.5044, 0.9255, -0.9250],\n [-1.5126, 0.9255, 0.0778],\n [-0.0041, 0.2492, -0.7818]],\n\n [[-1.0097, -0.9908, 1.0806],\n [-0.0041, 0.8128, 0.3643],\n [-0.5069, 1.3765, -0.0655],\n ...,\n [-1.5126, 0.9255, 0.0778],\n [-0.0041, 0.2492, -0.7818],\n [ 1.5044, 1.2637, 0.7941]],\n\n [[-0.0041, 0.8128, 0.3643],\n [-0.5069, 1.3765, -0.0655],\n [-0.0041, -1.6672, -0.4952],\n ...,\n [-0.0041, 0.2492, -0.7818],\n [ 1.5044, 1.2637, 0.7941],\n [ 0.4988, -1.2163, 1.3671]]])\ntensor([ 0.4424, 0.1169, 0.0148, ..., -1.1653, 0.5394, 1.6037])\nRun Code Online (Sandbox Code Playgroud)\n最后,为了检查所有这些训练、验证和测试数据集的尺寸是否正确,我打印出了它们的形状。
\ntrain shape is: torch.Size([6093, 100, 3])\ntrain label shape is: torch.Size([6093])\nval shape is: torch.Size([1965, 100, 3])\nval label shape is: torch.Size([1965])\ntest shape is: torch.Size([1965, 100, 3])\ntest label shape is: torch.Size([1965])\nRun Code Online (Sandbox Code Playgroud)\n当我尝试按如下方式构建模型时,我最终得到一个运行时错误,指出输入大小不一致。
\nmodel_params = {\'train_ratio\': 0.8, \n \'validation_ratio\': 0.2,\n \'sequence_length\': 100,\n \'teacher_forcing\': False,\n \'dropout_rate\': 0.2,\n \'batch_size\': 100,\n \'num_of_epochs\': 5,\n \'hidden_size\': 24,\n \'n_features\': 3,\n \'learning_rate\': 1e-3\n }\n\ntrain_ratio = model_params[\'train_ratio\']\nval_ratio = model_params[\'validation_ratio\']\nseq_len = model_params[\'sequence_length\']\nteacher_forcing = model_params[\'teacher_forcing\']\ndropout_rate = model_params[\'dropout_rate\']\nbatch_size = model_params[\'batch_size\']\nn_epochs = model_params[\'num_of_epochs\']\nhidden_size = model_params[\'hidden_size\']\nn_features = model_params[\'n_features\']\nlr = model_params[\'learning_rate\']\n\n\nmodel = Model(input_size=n_features, hidden_size=hidden_size, output_size=1)\nloss_fn = nn.MSELoss()\noptimizer = optim.Adam(model.parameters(), lr=lr)\nscheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)\noptimization = Optimization(model, loss_fn, optimizer, scheduler)\n\nstart_time = datetime.now()\noptimization.train(x_train, y_train, x_val, y_val, \n batch_size=batch_size, \n n_epochs=n_epochs,\n dropout=dropout_rate, \n do_teacher_forcing=teacher_forcing)\nRun Code Online (Sandbox Code Playgroud)\n---------------------------------------------------------------------------\nRuntimeError Traceback (most recent call last)\n<ipython-input-192-6fc406c0113d> in <module>\n 6 \n 7 start_time = datetime.now()\n----> 8 optimization.train(x_train, y_train, x_val, y_val, \n 9 batch_size=batch_size,\n 10 n_epochs=n_epochs,\n\n<ipython-input-189-c18d20430910> in train(self, x_train, y_train, x_val, y_val, batch_size, n_epochs, dropout, do_teacher_forcing)\n 68 train_loss = 0\n 69 for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):\n---> 70 y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)\n 71 self.optimizer.zero_grad()\n 72 loss = self.loss_fn(y_pred, y_batch)\n\n<ipython-input-189-c18d20430910> in _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing)\n 93 else:\n 94 future = 0\n---> 95 y_pred = self.model(x_batch)\n 96 self.futures.append(future)\n 97 return y_pred\n\n~\\Anaconda3\\lib\\site-packages\\torch\\nn\\modules\\module.py in _call_impl(self, *input, **kwargs)\n 725 result = self._slow_forward(*input, **kwargs)\n 726 else:\n--> 727 result = self.forward(*input, **kwargs)\n 728 for hook in itertools.chain(\n 729 _global_forward_hooks.values(),\n\n<ipython-input-189-c18d20430910> in forward(self, input, future, y)\n 17 \n 18 for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):\n---> 19 h_t, c_t = self.lstm(input_t, (h_t, c_t))\n 20 output = self.linear(h_t)\n 21 outputs += [output]\n\n~\\Anaconda3\\lib\\site-packages\\torch\\nn\\modules\\module.py in _call_impl(self, *input, **kwargs)\n 725 result = self._slow_forward(*input, **kwargs)\n 726 else:\n--> 727 result = self.forward(*input, **kwargs)\n 728 for hook in itertools.chain(\n 729 _global_forward_hooks.values(),\n\n~\\Anaconda3\\lib\\site-packages\\torch\\nn\\modules\\rnn.py in forward(self, input, hx)\n 963 \n 964 def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:\n--> 965 self.check_forward_input(input)\n 966 if hx is None:\n 967 zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)\n\n~\\Anaconda3\\lib\\site-packages\\torch\\nn\\modules\\rnn.py in check_forward_input(self, input)\n 789 def check_forward_input(self, input: Tensor) -> None:\n 790 if input.size(1) != self.input_size:\n--> 791 raise RuntimeError(\n 792 "input has inconsistent input_size: got {}, expected {}".format(\n 793 input.size(1), self.input_size))\n\nRuntimeError: input has inconsistent input_size: got 1, expected 3\nRun Code Online (Sandbox Code Playgroud)\n我怀疑我当前的 LSTM 模型类不支持具有多个特征的数据,并且我最近一直在尝试不同的方法,但到目前为止还没有运气。请随意分享您的想法或为我指出可以帮助我解决此问题的正确方向。
\n正如 @stackoverflowuser2010 所建议的,我打印出了张量 input_t、h_t 和 c_t 的形状,这些张量在抛出错误之前被输入到前向步骤中。
\ninput_t\ntorch.Size([100, 1, 3])\nh_t\ntorch.Size([100, 24])\nc_t\ntorch.Size([100, 24])\nRun Code Online (Sandbox Code Playgroud)\n
经过几周的摸索,我解决了这个问题。这对我来说是一次富有成果的旅程,所以我想分享我的发现。如果您想查看完整的代码演练,请查看我关于此事的 Medium 帖子。
就像在 Pandas 中一样,我发现当我坚持使用 PyTorch 方式时,事情往往会更快、更顺利。这两个库都依赖于 NumPy,我确信可以使用 NumPy 数组和函数显式执行几乎所有表和矩阵操作。然而,这样做确实消除了这些库提供的所有良好的抽象和性能改进,并将每个步骤变成了 CS 练习。直到它不再有趣为止。
PyTorch 的 TensorDataset 和 DataLoaders 类对我帮助很大,而不是手动调整所有训练和验证集以将它们传递给模型。缩放用于训练和验证的特征和目标集,然后我们就有了 NumPy 数组。我们可以将这些数组转换为张量,并使用这些张量来创建我们的 TensorDataset,或根据您的要求创建自定义数据集。最后,DataLoaders 允许我们以比其他方式更少的麻烦来迭代此类数据集,因为它们已经提供了内置批处理、洗牌和删除最后一批选项。
train_features = torch.Tensor(X_train_arr)
train_targets = torch.Tensor(y_train_arr)
val_features = torch.Tensor(X_val_arr)
val_targets = torch.Tensor(y_val_arr)
train = TensorDataset(train_features, train_targets)
train_loader = DataLoader(train, batch_size=64, shuffle=False, drop_last=True)
val = TensorDataset(val_features, val_targets)
val_loader = DataLoader(val, batch_size=64, shuffle=False, drop_last=True)
Run Code Online (Sandbox Code Playgroud)
将我们的数据转换为可迭代的数据集后,它们稍后可以用于进行小批量训练。我们可以通过 DataLoaders 轻松地迭代它们,而不是显式定义批次或与矩阵运算进行斗争,如下所示。
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=1e-2)
train_losses = []
val_losses = []
train_step = make_train_step(model, criterion, optimizer)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
for epoch in range(n_epochs):
batch_losses = []
for x_batch, y_batch in train_loader:
x_batch = x_batch.view([batch_size, -1, n_features]).to(device)
y_batch = y_batch.to(device)
loss = train_step(x_batch, y_batch)
batch_losses.append(loss)
training_loss = np.mean(batch_losses)
train_losses.append(training_loss)
with torch.no_grad():
batch_val_losses = []
for x_val, y_val in val_loader:
x_val = x_val.view([batch_size, -1, n_features]).to(device)
y_val = y_val.to(device)
model.eval()
yhat = model(x_val)
val_loss = criterion(y_val, yhat).item()
batch_val_losses.append(val_loss)
validation_loss = np.mean(batch_val_losses)
val_losses.append(validation_loss)
print(f"[{epoch+1}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}")
Run Code Online (Sandbox Code Playgroud)
PyTorch 提供的另一个很酷的功能是该view()函数,它允许更快且节省内存的张量重塑。由于我之前使用 定义了 LSTM 模型batch_first = True,因此特征集的批量张量必须具有(批量大小、时间步长、特征数量)的形状。上面代码中的行x_batch = x_batch.view([batch_size, -1, n_features]).to(device)就是这样做的。
我希望这个答案可以帮助那些处理类似问题的人,或者至少给出应该采取哪个方向的想法。我对原始帖子中共享的代码进行了很多更改,但为了简单起见,我不会将它们全部放在这里。请随意查看我的其他 SO 帖子中的其余部分。
| 归档时间: |
|
| 查看次数: |
5282 次 |
| 最近记录: |