Luc*_*toe 5 python machine-learning conv-neural-network pytorch pytorch-lightning
我试图使用 PyTorch 和 PyTorch Lightning 制作多输入模型,但我不明白为什么训练器卡在 epoch 0。我试图将此代码从 TensorFlow 迁移到 PyTorch,但 PyTorch 学习曲线是有点陡,我不知道从这里该去哪里。
RC_train_config = config.init_dataset_config(
'RC',
'GI4E',
'label',
16,
lr = 0.001,
epochs = 500,
train_ratio = 0.8
Run Code Online (Sandbox Code Playgroud)
模型的配置,包括超参数和使用的数据集。它也用于数据选择,因为不同的数据集需要不同的处理方法。
class RCDataset(Dataset):
def __init__(self, config_dataset):
super().__init__()
self.config_dataset = config_dataset
# Image-handling
if self.config_dataset['dataset'] == 'all':
pass
elif self.config_dataset['dataset'] == 'BIOID':
if self.config_dataset['mode'] == 'label':
pass
elif self.config_dataset['mode'] == 'filter':
pass
elif self.config_dataset['dataset'] == 'GI4E':
if self.config_dataset['mode'] == 'label':
image1_noteye_paths = glob(C.WORKING_DATASETS['GI4E']['images_label'] + '/0/noteye/*')
image1_eye_paths = glob(C.WORKING_DATASETS['GI4E']['images_label'] + '/0/left/*')
image1_eye_paths += glob(C.WORKING_DATASETS['GI4E']['images_label'] + '/0/right/*')
image2_noteye_paths = glob(C.WORKING_DATASETS['GI4E']['images_label'] + '/1/noteye/*')
image2_eye_paths = glob(C.WORKING_DATASETS['GI4E']['images_label'] + '/1/left/*')
image2_eye_paths += glob(C.WORKING_DATASETS['GI4E']['images_label'] + '/1/right/*')
image3_noteye_paths = glob(C.WORKING_DATASETS['GI4E']['images_label'] + '/2/noteye/*')
image3_eye_paths = glob(C.WORKING_DATASETS['GI4E']['images_label'] + '/2/left/*')
image3_eye_paths += glob(C.WORKING_DATASETS['GI4E']['images_label'] + '/2/right/*')
count_eye = len(glob(C.WORKING_DATASETS['GI4E']['images_label'] + '/2/left/*'))
elif self.config_dataset['mode'] == 'filter':
image1_noteye_paths = glob(C.WORKING_DATASETS['GI4E']['images_filter'] + '/0/noteye/*')
image1_eye_paths = glob(C.WORKING_DATASETS['GI4E']['images_filter'] + '/0/left/*')
image1_eye_paths += glob(C.WORKING_DATASETS['GI4E']['images_filter'] + '/0/right/*')
image2_noteye_paths = glob(C.WORKING_DATASETS['GI4E']['images_filter'] + '/1/noteye/*')
image2_eye_paths = glob(C.WORKING_DATASETS['GI4E']['images_filter'] + '/1/left/*')
image2_eye_paths += glob(C.WORKING_DATASETS['GI4E']['images_filter'] + '/1/right/*')
image3_noteye_paths = glob(C.WORKING_DATASETS['GI4E']['images_filter'] + '/2/noteye/*')
image3_eye_paths = glob(C.WORKING_DATASETS['GI4E']['images_filter'] + '/2/left/*')
image3_eye_paths += glob(C.WORKING_DATASETS['GI4E']['images_filter'] + '/2/right/*')
count_eye = len(glob(C.WORKING_DATASETS['GI4E']['images_filter'] + '/2/left/*'))
self.image1_paths = image1_noteye_paths + image1_eye_paths
self.image2_paths = image2_noteye_paths + image2_eye_paths
self.image3_paths = image3_noteye_paths + image3_eye_paths
# Label-handling
label_noteye1 = torch.zeros(len(image1_noteye_paths))
label_noteye2 = torch.zeros(len(image2_noteye_paths))
label_noteye3 = torch.zeros(len(image3_noteye_paths))
label_left = torch.ones(count_eye)
label_right = torch.full([count_eye], 2)
self.labels1 = torch.concat((label_noteye1, label_left, label_right))
self.labels2 = torch.concat((label_noteye2, label_left, label_right))
self.labels3 = torch.concat((label_noteye3, label_left, label_right))
def __getitem__(self, idx):
image1 = None
image2 = None
image3 = None
with Image.open(self.image1_paths[idx]) as img:
image1(img)
with Image.open(self.image2_paths[idx]) as img:
image2(img)
with Image.open(self.image3_paths[idx]) as img:
image3(img)
image1 = transforms.ToTensor()
image2 = transforms.ToTensor()
image3 = transforms.ToTensor()
return image1, image2, image3, self.labels1[idx], self.labels2[idx], self.labels3[idx]
def __len__(self):
return len(self.image1_paths)
Run Code Online (Sandbox Code Playgroud)
PyTorch 基础数据集
class RCDataModule(pl.LightningDataModule):
def __init__(self, config_dataset: dict, mode: str):
super().__init__()
dataset = RCDataset(config_dataset)
self.config_dataset = config_dataset
self.data_train = []
self.data_val = []
self.data_test = []
if mode == 'train':
self.data_train, self.data_val = random_split(
dataset,
[round(len(dataset) * config_dataset['train_ratio']),
round(len(dataset) * (config_dataset['testval_ratio'] * 2))]
)
elif mode == 'predict':
self.data_test = dataset
def train_dataloader(self):
return DataLoader(
self.data_train,
batch_size=self.config_dataset['batch_size'],
num_workers=12
)
def val_dataloader(self):
return DataLoader(
self.data_val,
batch_size=self.config_dataset['batch_size'],
num_workers=12
)
def predict_dataloader(self):
return DataLoader(
self.data_test,
batch_size=self.config_dataset['batch_size'],
num_workers=12
)
Run Code Online (Sandbox Code Playgroud)
闪电包装纸
class RCBase(nn.Module):
def __init__(self):
super().__init__()
#RC1
self.RC1 = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=5, stride=2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Flatten(),
nn.Linear(64, 64),
nn.ReLU(),
nn.Linear(64, 3),
nn.Softmax(1)
)
#RC2
self.RC2 = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=5, stride=2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Flatten(),
nn.Linear(64, 64),
nn.ReLU(),
nn.Linear(64, 3),
nn.Softmax(1)
)
#RC3
self.RC3 = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=5, stride=2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Flatten(),
nn.Linear(64, 64),
nn.ReLU(),
nn.Linear(64, 3),
nn.Softmax(1)
)
def forward(self, img1, img2, img3):
output1 = self.RC1(img1)
output2 = self.RC2(img2)
output3 = self.RC3(img3)
return output1, output2, output
Run Code Online (Sandbox Code Playgroud)
基础 PyTorch 模型
class RCPL(pl.LightningModule):
def __init__(self, config_dataset: dict):
super().__init__()
self.RC_base = RCBase()
self.config_dataset = config_dataset
def forward(self, img1, img2, img3):
output1, output2, output3 = self.RC_base(img1, img2, img3)
return output1, output2, output3
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=self.config_dataset['lr'])
def training_step(self, batch, batch_idx):
img1, img2, img3, lbl1, lbl2, lbl3 = batch
output1, output2, output3 = self.RC_base(img1, img2, img3)
train_loss1 = F.cross_entropy(output1, lbl1)
train_loss2 = F.cross_entropy(output2, lbl2)
train_loss3 = F.cross_entropy(output3, lbl3)
avg_loss = (train_loss1 + train_loss2 + train_loss3) / 3
self.log('avg_train_loss', avg_loss, on_epoch = True)
self.log('train_loss1', train_loss1, on_epoch = True)
self.log('train_loss2', train_loss2, on_epoch = True)
self.log('train_loss3', train_loss3, on_epoch = True)
return avg_loss, train_loss1, train_loss2, train_loss3
def validation_step(self, batch, batch_idx):
img1, img2, img3, lbl1, lbl2, lbl3 = batch
output1, output2, output3 = self.RC_base(img1, img2, img3)
val_loss1 = F.cross_entropy(output1, lbl1)
val_loss2 = F.cross_entropy(output2, lbl2)
val_loss3 = F.cross_entropy(output3, lbl3)
avg_val_loss = (val_loss1 + val_loss2 + val_loss3) / 3
self.log('avg_val_loss', avg_val_loss, on_epoch = True)
self.log('val_loss1', val_loss1, on_epoch = True)
self.log('val_loss2', val_loss2, on_epoch = True)
self.log('val_loss3', val_loss3, on_epoch = True)
def predict_step(self, batch, batch_idx):
img1, img2, img3, lbl1, lbl2, lbl3 = batch
return self(img1, img2, img3)
Run Code Online (Sandbox Code Playgroud)
PyTorch 模型包装器。
从 TensorFlow 到 PyTorch 有点困难,因为在 TensorFlow 中使用一切都更加自动化和直观。这里没有产生错误输出,所以我不确定这有什么问题。
输出(为了便于阅读而简化):
LOCAL_RANK:0 - CUDA_VISIBLE_DEVICES:[0]
纪元 0: 0%| | 0/782 [00:00<?, ?it/s]