ValueError:num_samples 应该是正整数值,但得到 num_samples=0

Cas*_*aJr 4 python path dataset deep-learning pytorch

我的数据组织如下: /dataset/train_or_validation/neg_or_pos_class/images.png 因此,在训练或验证中,我有 2 个文件夹,1 个用于负数,1 个用于正数。我有标题错误ValueError: num_samples should be a positive integer value, but got num_samples=0,因为基本上我在 /dataset/train_or_validation 内,但随后我需要访问文件夹 neg 或 pos。图像采用以下格式:MCUCXR_0000_1.png 用于正类图像,而 MCUCXR_0000_0.png 用于负类图像。我正在考虑从文件夹中提取所有图像,以便拥有 /dataset/train_or_validation/images.png,但在这种情况下,我如何指定哪个类?或者,如何迭代正/负文件夹?这是我的代码:

"""Montgomery Shard Descriptor."""

import logging
import os
from typing import List
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from pathlib import Path

import numpy as np
import requests

from openfl.interface.interactive_api.shard_descriptor import ShardDataset
from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor

from torchvision import transforms

# Compose transformations
train_transform = transforms.Compose([
  transforms.RandomHorizontalFlip(),
  transforms.RandomVerticalFlip(),
  transforms.Resize((512, 512)),
  transforms.ToTensor(),
])

test_transform = transforms.Compose([
  transforms.Resize((512, 512)),
  transforms.ToTensor(),
])

logger = logging.getLogger(__name__)


class MontgomeryShardDataset(ShardDataset):
    """Montgomery Shard dataset class."""

    def __init__(self, dataset_dir: Path, dataset_type: str,):
        """Initialize MontgomeryDataset."""
        self.data_type = dataset_type
        self.dataset_dir = dataset_dir
        print(self.dataset_dir)
        self.imgs_path = list(dataset_dir.glob('*.png'))
        
    def __getitem__(self, index: int):
        """Return an item by the index."""
        img_path = self.imgs_path[index]
        img = Image.open(img_path)
        return img

    def __len__(self):
        """Return the len of the dataset."""
        return len(self.imgs_path)


class MontgomeryShardDescriptor(ShardDescriptor):
    """Montgomery Shard descriptor class."""

    def __init__(
            self,
            data_folder: str = 'montgomery_data',
            **kwargs
    ):
        """Initialize MontgomeryShardDescriptor."""
        #print("Path at terminal when executing this file")
        print(os.getcwd() + "\n")
        #print(self.common_data_folder)
        self.data_folder = data_folder 
        self.dataset_dir = Path.cwd() / data_folder
        trainset, testset = self.get_data()
        print("IO SONO" + "\n")
        print(self.dataset_dir)
        self.data_by_type = {
            'train': self.dataset_dir / 'TRAIN',
            'val': self.dataset_dir / 'TEST'
        }

    def get_shard_dataset_types(self) -> List[str]:
        """Get available shard dataset types."""
        return list(self.data_by_type)

    def get_dataset(self, dataset_type='train'):
        """Return a shard dataset by type."""
        print("Path at terminal when executing this file")
        print(os.getcwd() + "\n")
        #os.chdir("/home/lmancuso/openfl/openfl-tutorials/interactive_api/OPENLAB/envoy")
        if dataset_type not in self.data_by_type:
            raise Exception(f'Wrong dataset type: {dataset_type}')
        return MontgomeryShardDataset(
            dataset_dir=self.data_by_type[dataset_type],
            dataset_type=dataset_type,
        )

    @property
    def sample_shape(self):
        """Return the sample shape info."""
        return ['3', '512', '512']

    @property
    def target_shape(self):
        """Return the target shape info."""
        return ['3', '512', '512']

    @property
    def dataset_description(self) -> str:
        """Return the dataset description."""
        return (f'Montgomery dataset, shard number')

    def get_data(self):
        root_dir = "montgomery_data"
        #train_set = ImageFolder(os.path.join(root_dir, "TRAIN"), transform=train_transform)
        #test_set = ImageFolder(os.path.join(root_dir, "TEST"), transform=test_transform)
        train_set = os.path.join(root_dir, "TRAIN")
        test_set = os.path.join(root_dir, "TEST")

        print('Montgomery data was loaded!')
        return train_set, test_set
Run Code Online (Sandbox Code Playgroud)

我正在使用英特尔开发的联邦学习框架 OpenFL。正如您所看到的,我还尝试使用 ImageFolder,因为我认为它在这种情况下很有用。

编辑完整的回溯:

new_state[k] = pt.from_numpy(tensor_dict.pop(k)).to(device)
           ERROR    Collaborator failed with error: num_samples should be a positive integer value, but got num_samples=0:                           envoy.py:93
                    Traceback (most recent call last):
                      File "/home/lmancuso/openfl/openfl/component/envoy/envoy.py", line 91, in run
                        self._run_collaborator()
                      File "/home/lmancuso/openfl/openfl/component/envoy/envoy.py", line 164, in _run_collaborator
                        col.run()
                      File "/home/lmancuso/openfl/openfl/component/collaborator/collaborator.py", line 145, in run
                        self.do_task(task, round_number)
                      File "/home/lmancuso/openfl/openfl/component/collaborator/collaborator.py", line 259, in do_task
                        **kwargs)
                      File "/home/lmancuso/openfl/openfl/federated/task/task_runner.py", line 117, in collaborator_adapted_task
                        loader = self.data_loader.get_train_loader()
                      File "/tmp/ipykernel_8572/1777129341.py", line 35, in get_train_loader
                      File "/home/lmancuso/bruno/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 262, in __init__
                        sampler = RandomSampler(dataset, generator=generator)  # type: ignore
                      File "/home/lmancuso/bruno/lib/python3.7/site-packages/torch/utils/data/sampler.py", line 104, in __init__
                        "value, but got num_samples={}".format(self.num_samples))
                    ValueError: num_samples should be a positive integer value, but got num_samples=0
           INFO     Send WaitExperiment request                                                                                            director_client.py:80
           INFO     WaitExperiment response has received                                                                                   director_client.py:82
Run Code Online (Sandbox Code Playgroud)

Om *_*ogi 6

问题是数据集是空的。数据路径可能是错误的,或者预处理可能会导致问题,最终导致数据集对象中没有对象。