pytorch 闪电 epoch_end/validation_epoch_end

Xtr*_*erX 1 neural-network pytorch pytorch-lightning

有人可以分解代码并向我解释吗?需要帮助的部分用“#Thispart”表示。我将非常感谢任何帮助,谢谢

def validation_epoch_end(self, outputs):
    batch_losses = [x["val_loss"]for x in outputs] #This part
    epoch_loss = torch.stack(batch_losses).mean() 
    batch_accs =  [x["val_acc"]for x in outputs]   #This part
    epoch_acc = torch.stack(batch_accs).mean()   
    return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}

def epoch_end(self, epoch, result):
    print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format( epoch,result['val_loss'], result['val_acc'])) #This part
Run Code Online (Sandbox Code Playgroud)

Álv*_*H.G 6

在 Pytorch 2.01 中validation_epoch_end已被替换为on_validation_epoch_end. 这是 GitHub 上的合并评论(链接)。

要检查 Pytorch Lightning 中可用的所有模型挂钩,您可以访问此文档

在这里你可以看到on_validation_epoch_end现在没有收到任何outputs参数。因此,为了计算每个时期之后的验证损失或任何指标,您应该(1)在类的构造函数中创建一个列表,(2)将输出保存在模型挂钩中,validation_step以及(3)在之后清理输出的内存中的每个纪元on_validation_epoch_end

这是一个最小的工作示例,我使用具有灵活配置隐藏层和神经元数量的 MLP 模型来测量 3 多类问题中的 F1 宏分数和交叉熵损失:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
# from lightning.pytorch.callbacks import ModelCheckpoint ## IMPORTANT - Do not import both, just lightning.pytorch or pytorch_lightning (recommended)
from pytorch_lightning.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import CSVLogger

# DATA LOADER
class DataFrameDataset(Dataset):
    def __init__(self, dataframe, labels):
        self.data = torch.tensor(dataframe.values, dtype=torch.float32)
        self.labels = torch.tensor(labels.values, dtype=torch.long)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


# MODEL AND MODEL HOOKS
class MLP(pl.LightningModule):
    def __init__(self, input_dim, hidden_layers, output_dim):
        super().__init__()
        self.layers = nn.ModuleList()
        in_dim = input_dim

        # --> HERE STEP 1 <--
        # ATTRIBUTES TO SAVE BATCH OUTPUTS
        self.training_step_outputs = []   # save outputs in each batch to compute metric overall epoch
        self.training_step_targets = []   # save targets in each batch to compute metric overall epoch
        self.val_step_outputs = []        # save outputs in each batch to compute metric overall epoch
        self.val_step_targets = []        # save targets in each batch to compute metric overall epoch

        for hidden_dim in hidden_layers:
            self.layers.append(nn.Linear(in_dim, hidden_dim))
            in_dim = hidden_dim

        self.layers.append(nn.Linear(in_dim, output_dim))

    def forward(self, x):
        for layer in self.layers[:-1]:
            x = nn.functional.relu(layer(x))
        x = self.layers[-1](x)
        return x

    def training_step(self, batch, batch_idx):
        # Train loss
        x, y = batch
        y_hat = self(x)
        train_loss = criterion(y_hat, y)

        # Train loss per batch in epoch
        self.log('train_loss', train_loss, on_step=False, on_epoch=True, prog_bar=True )

        
        # GET AND SAVE OUTPUTS AND TARGETS PER BATCH
        y_pred = y_hat.argmax(dim=1).cpu().numpy()
        y_true = y.cpu().numpy()
        
        # --> HERE STEP 2 <--
        self.training_step_outputs.extend(y_pred)
        self.training_step_targets.extend(y_true)

        return train_loss

    def on_train_epoch_end(self):
        ## F1 Macro all epoch saving outputs and target per batch
        train_all_outputs = self.training_step_outputs
        train_all_targets = self.training_step_targets
        f1_macro_epoch = f1_score(train_all_outputs, train_all_targets, average='macro')
        self.log("training_f1_epoch", f1_macro_epoch, on_step=False, on_epoch=True, prog_bar=True)

        # free up the memory
        # --> HERE STEP 3 <--
        self.training_step_outputs.clear()
        self.training_step_targets.clear()


    def validation_step(self, batch, batch_idx):
        # Val loss
        x, y = batch
        y_hat = self(x)
        val_loss = criterion(y_hat, y)

        # Train loss per batch in epoch
        self.log('val_loss', val_loss, on_step=False, on_epoch=True, prog_bar=True )

        # GET AND SAVE OUTPUTS AND TARGETS PER BATCH
        y_pred = y_hat.argmax(dim=1).cpu().numpy()
        y_true = y.cpu().numpy()
        
        # --> HERE STEP 2 <--
        self.val_step_outputs.extend(y_pred)
        self.val_step_targets.extend(y_true)

        return val_loss

    def on_validation_epoch_end(self):
        ## F1 Macro all epoch saving outputs and target per batch
        val_all_outputs = self.val_step_outputs
        val_all_targets = self.val_step_targets
        val_f1_macro_epoch = f1_score(val_all_outputs, val_all_targets, average='macro')
        self.log("val_f1_epoch", val_f1_macro_epoch, on_step=False, on_epoch=True, prog_bar=True)


        # free up the memory
        # --> HERE STEP 3 <--
        self.val_step_outputs.clear()
        self.val_step_targets.clear()

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001, betas=(0.9, 0.999))
        return optimizer 

Run Code Online (Sandbox Code Playgroud)

现在您可以创建并训练模型(例如,两个神经元大小分别为 100 和 50 的隐藏层):


# MODEL CREATION
# Assuming df is your dataframe
df = pd.read_csv("insert_path")

# Split your data into features and labels
features = df.drop(columns=['LABEL'])
labels = df['LABEL']


# Split your data into training and validation sets
features_train, features_val, labels_train, labels_val = train_test_split(features , labels, test_size=0.3, random_state=42)

# Create your custom datasets
train_dataset = DataFrameDataset(features_train, labels_train)
val_dataset = DataFrameDataset(features_val, labels_val)

# Create your Data Loaders
train_loader = DataLoader(train_dataset, batch_size=200, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=200, shuffle=False)


# Define the model (Flexible configuration)
model_MLP = MLP(input_dim=features_train.shape[1], hidden_layers=[100, 50], output_dim=3)

# TRAIN
# Define the Lightning trainer with logger callback
logger = CSVLogger(
                   save_dir = "outputs/",
                   name="my_exp_name", 
                   flush_logs_every_n_steps=500, 
                   )


checkpoint_callback = ModelCheckpoint(
    mode="min",
    filename="MLP-{epoch:02d}",
    dirpath="outputs/", 
    save_top_k=1, monitor="val_loss"
    
    )


# Criterion
class_weights = torch.Tensor([0.35, 0.35, 0.3])
criterion = nn.CrossEntropyLoss(weight=class_weights, reduction='mean')



epochs = 20
device = "gpu" if torch.cuda.is_available() else "cpu"


trainer = pl.Trainer(logger=logger,
                     max_epochs=epochs, 
                     callbacks=[checkpoint_callback],
                     accelerator = device
                     )

# Train the model
trainer.fit(model_MLP , train_loader, val_loader)
Run Code Online (Sandbox Code Playgroud)