-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathdataset.py
More file actions
executable file
·72 lines (51 loc) · 2.04 KB
/
dataset.py
File metadata and controls
executable file
·72 lines (51 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import pickle
from collections import namedtuple
import torch
from torch.utils.data import Dataset
from torchvision import datasets
import lmdb
CodeRow = namedtuple('CodeRow', ['top', 'bottom', 'filename'])
class NamedDataset(Dataset):
def __init__(self, dataset):
self.dataset = dataset
def __getitem__(self, index):
return list(self.dataset[index]) + [index]
def __len__(self):
return len(self.dataset)
class ImageFileDataset(datasets.ImageFolder):
def __getitem__(self, index):
sample, target = super().__getitem__(index)
path, _ = self.samples[index]
dirs, filename = os.path.split(path)
_, class_name = os.path.split(dirs)
filename = os.path.join(class_name, filename)
return sample, target, filename
class LMDBDataset(Dataset):
def __init__(self, path, architecture):
if architecture == 'vqvae' or architecture == 'vqvae2':
self.architecture = architecture
else:
raise ValueError('Valid architectures are vqvae and vqvae2. Got: {}'.format(architecture))
self.env = lmdb.open(
path,
max_readers=32,
readonly=True,
lock=False,
readahead=False,
meminit=False,
)
if not self.env:
raise IOError('Cannot open lmdb dataset', path)
with self.env.begin(write=False) as txn:
self.length = int(txn.get('length'.encode('utf-8')).decode('utf-8'))
def __len__(self):
return self.length
def __getitem__(self, index):
with self.env.begin(write=False) as txn:
key = str(index).encode('utf-8')
row = pickle.loads(txn.get(key))
if self.architecture == 'vqvae':
return torch.from_numpy(row.bottom), torch.from_numpy(row.bottom), row.filename
elif self.architecture == 'vqvae2':
return torch.from_numpy(row.top), torch.from_numpy(row.bottom), row.filename