使用 PyTorch 将多个输入输入到 LSTM 进行时间序列预测

bka*_*glu 5 machine-learning time-series deep-learning lstm pytorch

我目前正在构建一个 LSTM 网络来使用 PyTorch 预测时间序列数据。在Roman 的博客文章之后,我为单变量时间序列数据实现了一个简单的 LSTM,请参阅下面的类定义。然而,自从我停止向输入数据添加更多功能以来已经过去了几天,比如一天中的一个小时、一周中的某一天、一年中的一周等等。

\n
class Model(nn.Module):\n    def __init__(self, input_size, hidden_size, output_size):\n        super(Model, self).__init__()\n        self.input_size = input_size\n        self.hidden_size = hidden_size\n        self.output_size = output_size\n        self.lstm = nn.LSTMCell(self.input_size, self.hidden_size)\n        self.linear = nn.Linear(self.hidden_size, self.output_size)\n\n    def forward(self, input, future=0, y=None):\n        outputs = []\n\n        # reset the state of LSTM\n        # the state is kept till the end of the sequence\n        h_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)\n        c_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)\n\n        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):\n            h_t, c_t = self.lstm(input_t, (h_t, c_t))\n            output = self.linear(h_t)\n            outputs += [output]\n\n        for i in range(future):\n            if y is not None and random.random() > 0.5:\n                output = y[:, [i]]  # teacher forcing\n            h_t, c_t = self.lstm(output, (h_t, c_t))\n            output = self.linear(h_t)\n            outputs += [output]\n        outputs = torch.stack(outputs, 1).squeeze(2)\n        return outputs\n\n\nclass Optimization:\n    "A helper class to train, test and diagnose the LSTM"\n\n    def __init__(self, model, loss_fn, optimizer, scheduler):\n        self.model = model\n        self.loss_fn = loss_fn\n        self.optimizer = optimizer\n        self.scheduler = scheduler\n        self.train_losses = []\n        self.val_losses = []\n        self.futures = []\n\n    @staticmethod\n    def generate_batch_data(x, y, batch_size):\n        for batch, i in enumerate(range(0, len(x) - batch_size, batch_size)):\n            x_batch = x[i : i + batch_size]\n            y_batch = y[i : i + batch_size]\n            yield x_batch, y_batch, batch\n\n    def train(\n        self,\n        x_train,\n        y_train,\n        x_val=None,\n        y_val=None,\n        batch_size=100,\n        n_epochs=20,\n        dropout=0.2,\n        do_teacher_forcing=None,\n    ):\n        seq_len = x_train.shape[1]\n        for epoch in range(n_epochs):\n            start_time = time.time()\n            self.futures = []\n\n            train_loss = 0\n            for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):\n                y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)\n                self.optimizer.zero_grad()\n                loss = self.loss_fn(y_pred, y_batch)\n                loss.backward()\n                self.optimizer.step()\n                train_loss += loss.item()\n            self.scheduler.step()\n            train_loss /= batch\n            self.train_losses.append(train_loss)\n\n            self._validation(x_val, y_val, batch_size)\n\n            elapsed = time.time() - start_time\n            print(\n                "Epoch %d Train loss: %.2f. Validation loss: %.2f. Avg future: %.2f. Elapsed time: %.2fs."\n                % (epoch + 1, train_loss, self.val_losses[-1], np.average(self.futures), elapsed)\n            )\n\n    def _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing):\n        if do_teacher_forcing:\n            future = random.randint(1, int(seq_len) / 2)\n            limit = x_batch.size(1) - future\n            y_pred = self.model(x_batch[:, :limit], future=future, y=y_batch[:, limit:])\n        else:\n            future = 0\n            y_pred = self.model(x_batch)\n        self.futures.append(future)\n        return y_pred\n\n    def _validation(self, x_val, y_val, batch_size):\n        if x_val is None or y_val is None:\n            return\n        with torch.no_grad():\n            val_loss = 0\n            batch = 1\n            for x_batch, y_batch, batch in self.generate_batch_data(x_val, y_val, batch_size):\n                y_pred = self.model(x_batch)\n                loss = self.loss_fn(y_pred, y_batch)\n                val_loss += loss.item()\n            val_loss /= batch\n            self.val_losses.append(val_loss)\n\n    def evaluate(self, x_test, y_test, batch_size, future=1):\n        with torch.no_grad():\n            test_loss = 0\n            actual, predicted = [], []\n            for x_batch, y_batch, batch in self.generate_batch_data(x_test, y_test, batch_size):\n                y_pred = self.model(x_batch, future=future)\n                y_pred = (\n                    y_pred[:, -len(y_batch) :] if y_pred.shape[1] > y_batch.shape[1] else y_pred\n                )\n                loss = self.loss_fn(y_pred, y_batch)\n                test_loss += loss.item()\n                actual += torch.squeeze(y_batch[:, -1]).data.cpu().numpy().tolist()\n                predicted += torch.squeeze(y_pred[:, -1]).data.cpu().numpy().tolist()\n            test_loss /= batch\n            return actual, predicted, test_loss\n\n    def plot_losses(self):\n        plt.plot(self.train_losses, label="Training loss")\n        plt.plot(self.val_losses, label="Validation loss")\n        plt.legend()\n        plt.title("Losses")\n\n
Run Code Online (Sandbox Code Playgroud)\n

您可以找到一些辅助函数,这些函数可以帮助我在将数据输入到 LSTM 网络之前拆分和格式化数据。

\n
def to_dataframe(actual, predicted):\n    return pd.DataFrame({"value": actual, "prediction": predicted})\n\ndef inverse_transform(scaler, df, columns):\n    for col in columns:\n        df[col] = scaler.inverse_transform(df[col])\n    return df\n\ndef split_sequences(sequences, n_steps):\n    X, y = list(), list()\n    for i in range(len(sequences)):\n        # find the end of this pattern\n        end_ix = i + n_steps\n        # check if we are beyond the dataset\n        if end_ix > len(sequences):\n            break\n        # gather input and output parts of the pattern\n        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]\n        X.append(seq_x)\n        y.append(seq_y)\n    return array(X), array(y)\n\n\ndef train_val_test_split_new(df, test_ratio=0.2, seq_len = 100):\n    y = df[\'value\']\n    X = df.drop(columns = [\'value\'])\n    tarin_ratio = 1 - test_ratio\n    val_ratio = 1 - ((train_ratio - test_ratio) / train_ratio)\n\n    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio)\n    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_ratio)\n\n    return X_train, y_train, X_val, y_val, X_test, y_test\n
Run Code Online (Sandbox Code Playgroud)\n

我使用以下数据框来训练我的模型。

\n
# df_train \nvalue   weekday monthday    hour\ntimestamp               \n2014-07-01 00:00:00 10844   1   1   0\n2014-07-01 00:30:00 8127    1   1   0\n2014-07-01 01:00:00 6210    1   1   1\n2014-07-01 01:30:00 4656    1   1   1\n2014-07-01 02:00:00 3820    1   1   2\n... ... ... ... ...\n2015-01-31 21:30:00 24670   5   31  21\n2015-01-31 22:00:00 25721   5   31  22\n2015-01-31 22:30:00 27309   5   31  22\n2015-01-31 23:00:00 26591   5   31  23\n2015-01-31 23:30:00 26288   5   31  23\n10320 rows \xc3\x97 4 columns\n\n# x_train \nweekday monthday    hour\ntimestamp           \n2014-08-26 16:30:00 1   26  16\n2014-08-18 16:30:00 0   18  16\n2014-10-22 20:00:00 2   22  20\n2014-12-10 08:00:00 2   10  8\n2014-07-27 22:00:00 6   27  22\n... ... ... ...\n2014-08-24 05:30:00 6   24  5\n2014-11-24 12:00:00 0   24  12\n2014-12-18 06:00:00 3   18  6\n2014-07-27 17:00:00 6   27  17\n2014-12-05 21:00:00 4   5   21\n6192 rows \xc3\x97 3 columns\n\n# y_train \ntimestamp\n2014-08-26 16:30:00    14083\n2014-08-18 16:30:00    14465\n2014-10-22 20:00:00    25195\n2014-12-10 08:00:00    21348\n2014-07-27 22:00:00    16356\n                       ...  \n2014-08-24 05:30:00     2948\n2014-11-24 12:00:00    16292\n2014-12-18 06:00:00     7029\n2014-07-27 17:00:00    18883\n2014-12-05 21:00:00    26284\nName: value, Length: 6192, dtype: int64\n
Run Code Online (Sandbox Code Playgroud)\n

将时间序列数据转换并分割为更小的批次后,X 和 y 的训练数据集变为如下:

\n
X_data shape is (6093, 100, 3)\ny_data shape is (6093,)\ntensor([[[-1.0097,  1.1510,  0.6508],\n         [-1.5126,  0.2492,  0.6508],\n         [-0.5069,  0.7001,  1.2238],\n         ...,\n         [ 1.5044, -1.4417, -1.6413],\n         [ 1.0016, -0.0890,  0.7941],\n         [ 1.5044, -0.9908, -0.2087]],\n\n        [[-1.5126,  0.2492,  0.6508],\n         [-0.5069,  0.7001,  1.2238],\n         [-0.5069, -0.6526, -0.4952],\n         ...,\n         [ 1.0016, -0.0890,  0.7941],\n         [ 1.5044, -0.9908, -0.2087],\n         [ 0.4988,  0.5874,  0.5076]],\n\n        [[-0.5069,  0.7001,  1.2238],\n         [-0.5069, -0.6526, -0.4952],\n         [ 1.5044,  1.2637,  1.5104],\n         ...,\n         [ 1.5044, -0.9908, -0.2087],\n         [ 0.4988,  0.5874,  0.5076],\n         [ 0.4988,  0.5874, -0.6385]],\n\n        ...,\n\n        [[ 1.0016,  0.9255, -1.2115],\n         [-1.0097, -0.9908,  1.0806],\n         [-0.0041,  0.8128,  0.3643],\n         ...,\n         [ 1.5044,  0.9255, -0.9250],\n         [-1.5126,  0.9255,  0.0778],\n         [-0.0041,  0.2492, -0.7818]],\n\n        [[-1.0097, -0.9908,  1.0806],\n         [-0.0041,  0.8128,  0.3643],\n         [-0.5069,  1.3765, -0.0655],\n         ...,\n         [-1.5126,  0.9255,  0.0778],\n         [-0.0041,  0.2492, -0.7818],\n         [ 1.5044,  1.2637,  0.7941]],\n\n        [[-0.0041,  0.8128,  0.3643],\n         [-0.5069,  1.3765, -0.0655],\n         [-0.0041, -1.6672, -0.4952],\n         ...,\n         [-0.0041,  0.2492, -0.7818],\n         [ 1.5044,  1.2637,  0.7941],\n         [ 0.4988, -1.2163,  1.3671]]])\ntensor([ 0.4424,  0.1169,  0.0148,  ..., -1.1653,  0.5394,  1.6037])\n
Run Code Online (Sandbox Code Playgroud)\n

最后,为了检查所有这些训练、验证和测试数据集的尺寸是否正确,我打印出了它们的形状。

\n
train shape is: torch.Size([6093, 100, 3])\ntrain label shape is: torch.Size([6093])\nval shape is: torch.Size([1965, 100, 3])\nval label shape is: torch.Size([1965])\ntest shape is: torch.Size([1965, 100, 3])\ntest label shape is: torch.Size([1965])\n
Run Code Online (Sandbox Code Playgroud)\n

当我尝试按如下方式构建模型时,我最终得到一个运行时错误,指出输入大小不一致。

\n
model_params = {\'train_ratio\': 0.8, \n                \'validation_ratio\': 0.2,\n                \'sequence_length\': 100,\n                \'teacher_forcing\': False,\n                \'dropout_rate\': 0.2,\n                \'batch_size\': 100,\n                \'num_of_epochs\': 5,\n                \'hidden_size\': 24,\n                \'n_features\': 3,\n                \'learning_rate\': 1e-3\n               }\n\ntrain_ratio = model_params[\'train_ratio\']\nval_ratio = model_params[\'validation_ratio\']\nseq_len = model_params[\'sequence_length\']\nteacher_forcing = model_params[\'teacher_forcing\']\ndropout_rate = model_params[\'dropout_rate\']\nbatch_size = model_params[\'batch_size\']\nn_epochs = model_params[\'num_of_epochs\']\nhidden_size = model_params[\'hidden_size\']\nn_features = model_params[\'n_features\']\nlr = model_params[\'learning_rate\']\n\n\nmodel = Model(input_size=n_features, hidden_size=hidden_size, output_size=1)\nloss_fn = nn.MSELoss()\noptimizer = optim.Adam(model.parameters(), lr=lr)\nscheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)\noptimization = Optimization(model, loss_fn, optimizer, scheduler)\n\nstart_time = datetime.now()\noptimization.train(x_train, y_train, x_val, y_val, \n                     batch_size=batch_size, \n                     n_epochs=n_epochs,\n                     dropout=dropout_rate, \n                     do_teacher_forcing=teacher_forcing)\n
Run Code Online (Sandbox Code Playgroud)\n
---------------------------------------------------------------------------\nRuntimeError                              Traceback (most recent call last)\n<ipython-input-192-6fc406c0113d> in <module>\n      6 \n      7 start_time = datetime.now()\n----> 8 optimization.train(x_train, y_train, x_val, y_val, \n      9                      batch_size=batch_size,\n     10                      n_epochs=n_epochs,\n\n<ipython-input-189-c18d20430910> in train(self, x_train, y_train, x_val, y_val, batch_size, n_epochs, dropout, do_teacher_forcing)\n     68             train_loss = 0\n     69             for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):\n---> 70                 y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)\n     71                 self.optimizer.zero_grad()\n     72                 loss = self.loss_fn(y_pred, y_batch)\n\n<ipython-input-189-c18d20430910> in _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing)\n     93         else:\n     94             future = 0\n---> 95             y_pred = self.model(x_batch)\n     96         self.futures.append(future)\n     97         return y_pred\n\n~\\Anaconda3\\lib\\site-packages\\torch\\nn\\modules\\module.py in _call_impl(self, *input, **kwargs)\n    725             result = self._slow_forward(*input, **kwargs)\n    726         else:\n--> 727             result = self.forward(*input, **kwargs)\n    728         for hook in itertools.chain(\n    729                 _global_forward_hooks.values(),\n\n<ipython-input-189-c18d20430910> in forward(self, input, future, y)\n     17 \n     18         for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):\n---> 19             h_t, c_t = self.lstm(input_t, (h_t, c_t))\n     20             output = self.linear(h_t)\n     21             outputs += [output]\n\n~\\Anaconda3\\lib\\site-packages\\torch\\nn\\modules\\module.py in _call_impl(self, *input, **kwargs)\n    725             result = self._slow_forward(*input, **kwargs)\n    726         else:\n--> 727             result = self.forward(*input, **kwargs)\n    728         for hook in itertools.chain(\n    729                 _global_forward_hooks.values(),\n\n~\\Anaconda3\\lib\\site-packages\\torch\\nn\\modules\\rnn.py in forward(self, input, hx)\n    963 \n    964     def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:\n--> 965         self.check_forward_input(input)\n    966         if hx is None:\n    967             zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)\n\n~\\Anaconda3\\lib\\site-packages\\torch\\nn\\modules\\rnn.py in check_forward_input(self, input)\n    789     def check_forward_input(self, input: Tensor) -> None:\n    790         if input.size(1) != self.input_size:\n--> 791             raise RuntimeError(\n    792                 "input has inconsistent input_size: got {}, expected {}".format(\n    793                     input.size(1), self.input_size))\n\nRuntimeError: input has inconsistent input_size: got 1, expected 3\n
Run Code Online (Sandbox Code Playgroud)\n

我怀疑我当前的 LSTM 模型类不支持具有多个特征的数据,并且我最近一直在尝试不同的方法,但到目前为止还没有运气。请随意分享您的想法或为我指出可以帮助我解决此问题的正确方向。

\n

正如 @stackoverflowuser2010 所建议的,我打印出了张量 input_t、h_t 和 c_t 的形状,这些张量在抛出错误之前被输入到前向步骤中。

\n
input_t\ntorch.Size([100, 1, 3])\nh_t\ntorch.Size([100, 24])\nc_t\ntorch.Size([100, 24])\n
Run Code Online (Sandbox Code Playgroud)\n

bka*_*glu 2

经过几周的摸索,我解决了这个问题。这对我来说是一次富有成果的旅程,所以我想分享我的发现。如果您想查看完整的代码演练,请查看关于此事的 Medium 帖子。

就像在 Pandas 中一样,我发现当我坚持使用 PyTorch 方式时,事情往往会更快、更顺利。这两个库都依赖于 NumPy,我确信可以使用 NumPy 数组和函数显式执行几乎所有表和矩阵操作。然而,这样做确实消除了这些库提供的所有良好的抽象和性能改进,并将每个步骤变成了 CS 练习。直到它不再有趣为止。

PyTorch 的 TensorDataset 和 DataLoaders 类对我帮助很大,而不是手动调整所有训练和验证集以将它们传递给模型。缩放用于训练和验证的特征和目标集,然后我们就有了 NumPy 数组。我们可以将这些数组转换为张量,并使用这些张量来创建我们的 TensorDataset,或根据您的要求创建自定义数据集。最后,DataLoaders 允许我们以比其他方式更少的麻烦来迭代此类数据集,因为它们已经提供了内置批处理、洗牌和删除最后一批选项。

train_features = torch.Tensor(X_train_arr)
train_targets = torch.Tensor(y_train_arr)

val_features = torch.Tensor(X_val_arr)
val_targets = torch.Tensor(y_val_arr)

train = TensorDataset(train_features, train_targets)
train_loader = DataLoader(train, batch_size=64, shuffle=False, drop_last=True)

val = TensorDataset(val_features, val_targets)
val_loader = DataLoader(val, batch_size=64, shuffle=False, drop_last=True)
Run Code Online (Sandbox Code Playgroud)

将我们的数据转换为可迭代的数据集后,它们稍后可以用于进行小批量训练。我们可以通过 DataLoaders 轻松地迭代它们,而不是显式定义批次或与矩阵运算进行斗争,如下所示。

model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)

criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=1e-2)

train_losses = []
val_losses = []
train_step = make_train_step(model, criterion, optimizer)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for epoch in range(n_epochs):
    batch_losses = []
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.view([batch_size, -1, n_features]).to(device)
        y_batch = y_batch.to(device)
        loss = train_step(x_batch, y_batch)
        batch_losses.append(loss)
    training_loss = np.mean(batch_losses)
    train_losses.append(training_loss)    
    with torch.no_grad():
        batch_val_losses = []
        for x_val, y_val in val_loader:
            x_val = x_val.view([batch_size, -1, n_features]).to(device)
            y_val = y_val.to(device)        
            model.eval()
            yhat = model(x_val)
            val_loss = criterion(y_val, yhat).item()
            batch_val_losses.append(val_loss)
        validation_loss = np.mean(batch_val_losses)
        val_losses.append(validation_loss)
    
    print(f"[{epoch+1}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}")
Run Code Online (Sandbox Code Playgroud)

PyTorch 提供的另一个很酷的功能是该view()函数,它允许更快且节省内存的张量重塑。由于我之前使用 定义了 LSTM 模型batch_first = True,因此特征集的批量张量必须具有(批量大小、时间步长、特征数量)的形状。上面代码中的行x_batch = x_batch.view([batch_size, -1, n_features]).to(device)就是这样做的。

我希望这个答案可以帮助那些处理类似问题的人,或者至少给出应该采取哪个方向的想法。我对原始帖子中共享的代码进行了很多更改,但为了简单起见,我不会将它们全部放在这里。请随意查看我的其他 SO 帖子中的其余部分