|
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- @Author: Yue Wang
- @Contact: yuewangx@mit.edu
- @File: data.py
- @Time: 2018/10/13 6:21 PM
-
- Modified by
- @Author: An Tao
- @Contact: ta19@mails.tsinghua.edu.cn
- @Time: 2020/2/27 9:32 PM
-
- Modified by
- @Author: Dinghao Yang
- @Contact: dinghaoyang@gmail,cin
- @Time: 2020/9/28 7:29 PM
-
- Modified by
- @Author: Yu Deng
- @Contact: dengy02@pcl.ac.cn
- @Time: 2022/7/6 17:20 PM
- """
-
-
- import os
- import glob
- import h5py
- import numpy as np
- import tensorflow as tf
-
-
- def download_modelnet40():
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
-
- if not os.path.exists(DATA_DIR):
- os.mkdir(DATA_DIR)
- if not os.path.exists(os.path.join(DATA_DIR, 'modelnet40_ply_hdf5_2048')):
- www = 'https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip'
- zipfile = os.path.basename(www)
- os.system('wget %s; unzip %s' % (www, zipfile))
- os.system('mv %s %s' % (zipfile[:-4], DATA_DIR))
- os.system('rm %s' % (zipfile))
-
-
- def load_data_cls(partition):
- download_modelnet40()
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- all_data = []
- all_label = []
- print(os.path.join(DATA_DIR, 'modelnet40*hdf5_2048', '*%s*.h5'%partition))
- for h5_name in glob.glob(os.path.join(DATA_DIR, 'modelnet40*hdf5_2048', '*%s*.h5'%partition)):
- f = h5py.File(h5_name, 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- f.close()
- all_data.append(data)
- all_label.append(label)
- all_data = np.concatenate(all_data, axis=0)
- all_label = np.concatenate(all_label, axis=0)
- return all_data, all_label
-
-
- def load_data_cls_lle(partition):
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- f = h5py.File(os.path.join(DATA_DIR, 'modelnet40_%s_add_lle.h5' %partition), 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- f.close()
- # print("load_data_cls_lle data shape: ", data.shape)
- return data, label
-
-
- def load_data_cls_ltsa(partition):
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- f = h5py.File(os.path.join(DATA_DIR, 'modelnet40_%s_add_ltsa.h5'%partition), 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- f.close()
- return data, label
-
-
- def load_data_cls_isomap(partition):
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- f = h5py.File(os.path.join(DATA_DIR, 'modelnet40_%s_add_isomap.h5'%partition), 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- f.close()
- return data, label
-
-
- def load_data_cls_se(partition):
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- f = h5py.File(os.path.join(DATA_DIR, 'modelnet40_%s_add_se.h5'%partition), 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- f.close()
- return data, label
-
-
- def translate_pointcloud(pointcloud):
- xyz1 = np.random.uniform(low=2./3., high=3./2., size=[3])
- xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[3])
- # print("xyz1 shape", xyz1.shape)
- translated_pointcloud = np.add(np.multiply(pointcloud, xyz1), xyz2).astype('float32')
- # translated_pointcloud = np.transpose(translated_pointcloud, [0, 2, 1])
- # print("translated_pointcloud shape: " , translated_pointcloud.shape)
-
- return translated_pointcloud
-
-
- def translate_pointcloud_manifold(pointcloud):
- xyz1 = np.random.uniform(low=2./3., high=3./2., size=[5])
- xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[5])
-
- translated_pointcloud = np.add(np.multiply(pointcloud, xyz1), xyz2).astype('float32')
- # translated_pointcloud = np.transpose(translated_pointcloud, [0, 2, 1])
- # print("translated_pointcloud shape: " , translated_pointcloud.shape)
-
- return translated_pointcloud
-
-
- class ModelNet40():
- def __init__(self, num_points, batch_size=32, partition='train'):
- self.data_ori, self.label_ori = load_data_cls(partition)
- # print("self.data shape", self.data_ori.shape)
- self.num_points = num_points
- self.partition = partition
- self.batch_size = batch_size
- self.if_train = (self.partition == 'train')
- self.data = tf.data.Dataset.from_tensor_slices(self.data_ori)
- self.label = tf.data.Dataset.from_tensor_slices(self.label_ori)
-
- if self.if_train:
- self.data = self.data.map(lambda x: tf.py_function(translate_pointcloud, [x[:self.num_points]], tf.float32))
- # print("len self.data: ", len(list(self.data.as_numpy_iterator())))
- # print("shape self.data: ", np.array(list(self.data.as_numpy_iterator())).shape) # (9840, 1024, 5)
- else:
- #self.data not if_train: <MapDataset shapes: (1024, 5), types: tf.float32>
- self.data = self.data.map(lambda x: x[:self.num_points])
- # print("len self.data: ", len(list(self.data.as_numpy_iterator())))
-
- self.data = tf.data.Dataset.zip((self.data, self.label))
- # self.data = self.data.cache()
- self.data = self.data.shuffle(buffer_size=self.data_ori.shape[0], reshuffle_each_iteration=True)
- # self.data = self.data.cache()
- self.data = self.data.batch(batch_size=self.batch_size, drop_remainder=self.if_train) # 把最后不足1 batch的丢掉
- self.data = self.data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
- # self.data = self.data.repeat(1) # 1, epochs这个值要具体根据数据集和batchsize大小计算一下
-
- def __len__(self):
- return self.data_ori.shape[0]
-
-
- class ModelNet40_LLE():
- def __init__(self, num_points, batch_size=32, partition='train'):
- self.data_ori, self.label_ori = load_data_cls_lle(partition)
- # print("self.data shape", self.data_ori.shape)
- self.num_points = num_points
- self.partition = partition
- self.batch_size = batch_size
- self.if_train = (self.partition == 'train')
- self.data = tf.data.Dataset.from_tensor_slices(self.data_ori)
- self.label = tf.data.Dataset.from_tensor_slices(self.label_ori)
-
- if self.if_train:
- self.data = self.data.map(lambda x: tf.py_function(translate_pointcloud_manifold, [x[:self.num_points]], tf.float32))
- # print("len self.data: ", len(list(self.data.as_numpy_iterator())))
- # print("shape self.data: ", np.array(list(self.data.as_numpy_iterator())).shape) # (9840, 1024, 5)
- else:
- #self.data not if_train: <MapDataset shapes: (1024, 5), types: tf.float32>
- self.data = self.data.map(lambda x: x[:self.num_points])
- # print("len self.data: ", len(list(self.data.as_numpy_iterator())))
-
- self.data = tf.data.Dataset.zip((self.data, self.label))
- # self.data = self.data.cache()
- self.data = self.data.shuffle(buffer_size=self.data_ori.shape[0], reshuffle_each_iteration=True)
- # self.data = self.data.cache()
- self.data = self.data.batch(batch_size=self.batch_size, drop_remainder=self.if_train) #把最后不足1 batch的丢掉
- self.data = self.data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
- # self.data = self.data.repeat() # 1, epochs这个值要具体根据数据集和batchsize大小计算一下
-
- def __len__(self):
- return self.data_ori.shape[0]
-
-
- class ModelNet40_LTSA():
- def __init__(self, num_points, batch_size=32, partition='train'):
- self.data_ori, self.label_ori = load_data_cls_ltsa(partition)
- # print("self.data shape", self.data_ori.shape)
- self.num_points = num_points
- self.partition = partition
- self.batch_size = batch_size
- self.if_train = (self.partition == 'train')
- self.data = tf.data.Dataset.from_tensor_slices(self.data_ori)
- self.label = tf.data.Dataset.from_tensor_slices(self.label_ori)
-
- if self.if_train:
- self.data = self.data.map(lambda x: tf.py_function(translate_pointcloud_manifold, [x[:self.num_points]], tf.float32))
- # print("len self.data: ", len(list(self.data.as_numpy_iterator())))
- # print("shape self.data: ", np.array(list(self.data.as_numpy_iterator())).shape) # (9840, 1024, 5)
- else:
- #self.data not if_train: <MapDataset shapes: (1024, 5), types: tf.float32>
- self.data = self.data.map(lambda x: x[:self.num_points])
- # print("len self.data: ", len(list(self.data.as_numpy_iterator())))
-
- self.data = tf.data.Dataset.zip((self.data, self.label))
- # self.data = self.data.cache()
- self.data = self.data.shuffle(buffer_size=self.data_ori.shape[0], reshuffle_each_iteration=True)
- # self.data = self.data.cache()
- self.data = self.data.batch(batch_size=self.batch_size, drop_remainder=self.if_train) #把最后不足1 batch的丢掉
- self.data = self.data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
- # self.data = self.data.repeat() # 1, epochs这个值要具体根据数据集和batchsize大小计算一下
-
-
- def __len__(self):
- return self.data_ori.shape[0]
-
-
- class ModelNet40_ISOMAP():
- def __init__(self, num_points, batch_size=32, partition='train'):
- self.data_ori, self.label_ori = load_data_cls_isomap(partition)
- # print("self.data shape", self.data_ori.shape)
- self.num_points = num_points
- self.partition = partition
- self.batch_size = batch_size
- self.if_train = (self.partition == 'train')
- self.data = tf.data.Dataset.from_tensor_slices(self.data_ori)
- self.label = tf.data.Dataset.from_tensor_slices(self.label_ori)
-
- if self.if_train:
- self.data = self.data.map(lambda x: tf.py_function(translate_pointcloud_manifold, [x[:self.num_points]], tf.float32))
- # print("len self.data: ", len(list(self.data.as_numpy_iterator())))
- # print("shape self.data: ", np.array(list(self.data.as_numpy_iterator())).shape) # (9840, 1024, 5)
- else:
- #self.data not if_train: <MapDataset shapes: (1024, 5), types: tf.float32>
- self.data = self.data.map(lambda x: x[:self.num_points])
- # print("len self.data: ", len(list(self.data.as_numpy_iterator())))
-
- self.data = tf.data.Dataset.zip((self.data, self.label))
- # self.data = self.data.cache()
- self.data = self.data.shuffle(buffer_size=self.data_ori.shape[0], reshuffle_each_iteration=True)
- # self.data = self.data.cache()
- self.data = self.data.batch(batch_size=self.batch_size, drop_remainder=self.if_train) #把最后不足1 batch的丢掉
- self.data = self.data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
- # self.data = self.data.repeat() # 1, epochs这个值要具体根据数据集和batchsize大小计算一下
-
-
- def __len__(self):
- return self.data_ori.shape[0]
-
-
- class ModelNet40_SE():
- def __init__(self, num_points, batch_size=32, partition='train'):
- self.data_ori, self.label_ori = load_data_cls_se(partition)
- # print("self.data shape", self.data_ori.shape)
- self.num_points = num_points
- self.partition = partition
- self.batch_size = batch_size
- self.if_train = (self.partition == 'train')
- self.data = tf.data.Dataset.from_tensor_slices(self.data_ori)
- self.label = tf.data.Dataset.from_tensor_slices(self.label_ori)
-
- if self.if_train:
- self.data = self.data.map(lambda x: tf.py_function(translate_pointcloud_manifold, [x[:self.num_points]], tf.float32))
- # print("len self.data: ", len(list(self.data.as_numpy_iterator())))
- # print("shape self.data: ", np.array(list(self.data.as_numpy_iterator())).shape) # (9840, 1024, 5)
- else:
- #self.data not if_train: <MapDataset shapes: (1024, 5), types: tf.float32>
- self.data = self.data.map(lambda x: x[:self.num_points])
- # print("len self.data: ", len(list(self.data.as_numpy_iterator())))
-
- self.data = tf.data.Dataset.zip((self.data, self.label))
- # self.data = self.data.cache()
- self.data = self.data.shuffle(buffer_size=self.data_ori.shape[0], reshuffle_each_iteration=True)
- # self.data = self.data.cache()
- self.data = self.data.batch(batch_size=self.batch_size, drop_remainder=self.if_train) #把最后不足1 batch的丢掉
- self.data = self.data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
- # self.data = self.data.repeat() # 1, epochs这个值要具体根据数据集和batchsize大小计算一下
-
-
- def __len__(self):
- return self.data_ori.shape[0]
-
-
- if __name__ == '__main__':
- from torch.utils.data import DataLoader
- import argparse
- parser = argparse.ArgumentParser(description='Point Cloud Recognition')
- parser.add_argument('--model', type=str, default='pm_nnml', metavar='N',
- choices=['pm_lle', 'pm_nnml'],
- help='Model to use, [pointmanifold_lle, pointmanifold_nnml]')
- parser.add_argument('--batch_size', type=int, default=32, metavar='batch_size',
- help='Size of batch)')
- parser.add_argument('--test_batch_size', type=int, default=16, metavar='batch_size',
- help='Size of batch)')
- parser.add_argument('--epochs', type=int, default=350, metavar='N',
- help='number of episode to train ')
- parser.add_argument('--use_sgd', type=bool, default=True,
- help='Use SGD')
- parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
- help='learning rate (default: 0.001, 0.1 if using sgd)')
- parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
- help='SGD momentum (default: 0.9)')
- parser.add_argument('--scheduler', type=str, default='cos', metavar='N',
- choices=['cos', 'step'],
- help='Scheduler to use, [cos, step]')
- parser.add_argument('--no_cuda', type=bool, default=False,
- help='enables CUDA training')
- parser.add_argument('--seed', type=int, default=1, metavar='S',
- help='random seed (default: 1)')
- parser.add_argument('--eval', type=bool, default=False,
- help='evaluate the model')
- parser.add_argument('--num_points', type=int, default=1024,
- help='num of points to use')
- parser.add_argument('--dropout', type=float, default=0.5,
- help='initial dropout rate')
- parser.add_argument('--emb_dims', type=int, default=1024, metavar='N',
- help='Dimension of embeddings')
- parser.add_argument('--k', type=int, default=20, metavar='N',
- help='Num of nearest neighbors to use')
- parser.add_argument('--model_path', type=str, default='', metavar='N',
- help='Pretrained model path')
- parser.add_argument('--hyper_times', type=int, default=1, metavar='N',
- help='The time of model size')
- args = parser.parse_args()
-
-
- # partition='train'
- # data, label = load_data_cls_lle(partition=partition)
- # print("data shape", data.shape)
- # print("label shape", label.shape)
- # xyz1 = np.random.uniform(low=2./3., high=3./2., size=[5])
- # xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[5])
- # print("xyz1 shape", xyz1.shape)
- # print("xyz2 shape", xyz2.shape)
- # translated_pointcloud = np.add(np.multiply(data, xyz1), xyz2).astype('float32')
- # print("==== translated_pointcloud =====", translated_pointcloud.shape)
- # print()
-
- print(len(ModelNet40_LLE(partition='train', num_points=args.num_points))) # 9840
- print(len(ModelNet40_LLE(partition='test', num_points=args.num_points))) # 2468
- print(len(ModelNet40(partition='train', num_points=args.num_points))) # 9840
- print(len(ModelNet40(partition='test', num_points=args.num_points))) # 2468
-
-
- # dataset_nnml = ModelNet40(1024, batch_size=args.batch_size, partition='train')
- # dataset_lle = ModelNet40_LLE(1024, batch_size=args.batch_size, partition='train')
- # print("dataset ", dataset_nnml.data)
- # print("dataset_lle ", dataset_lle.data)
- # # nnml train
- # for idx, (data, label) in enumerate(dataset_nnml.data):
- # print("data shape", data.shape) # train: [32, 1024, 3]
- # print("label shape", label.shape) # train: [32, 1]
- # label = tf.squeeze(label, -1) # train: [32]
- # print("label shape", label.shape)
- # data = tf.transpose(data, (0, 2, 1)) # train: [32, 3, 1024]
- # print("data per shape", data.shape)
- # batch_size = data.shape[0]
- # print("batch_size", args.batch_size, batch_size)
- # # break
- # print("idx", idx) # train: 306
- # print("data per shape", data.shape)
- # print()
-
- # # lle train
- # for idx, (data, label) in enumerate(dataset_lle.data):
- # print("data shape", data.shape) # train: [32, 1024, 5]
- # print("label shape", label.shape) # train: [32, 1]
- # label = tf.squeeze(label, -1) # train: [32]
- # print("label shape", label.shape)
- # data = tf.transpose(data, (0, 2, 1)) # train: [32, 5, 1024]
- # print("data per shape", data.shape)
- # batch_size = data.shape[0]
- # print("batch_size", args.batch_size, batch_size)
- # # break
- # print("idx", idx) # train: 306
- # print("data per shape", data.shape)
- # print()
-
- # print('==================================================================')
-
- # dataset_nnml = ModelNet40(1024, batch_size=args.test_batch_size, partition='test')
- # dataset_lle = ModelNet40_LLE(1024, batch_size=args.test_batch_size, partition='test')
- # print("dataset ", dataset_nnml.data)
- # print("dataset_lle ", dataset_lle.data)
- # print("len data nnml", len(dataset_nnml.data))
- # print("len data lle", len(dataset_lle.data))
-
- # # nnml test
- # for idx, (data, label) in enumerate(dataset_nnml.data):
- # print("data shape", data.shape) # test: [16, 1024, 3], where last batchsize: [4, 1024, 3]
- # print("label shape", label.shape) # test: [16, 1], where last batchsize: [4, 1]
- # label = tf.squeeze(label, -1) # test: [16,], where last batchsize: [4,]
- # print("label shape", label.shape)
- # data = tf.transpose(data, (0, 2, 1)) # test: [16, 3, 1024], where last batchsize: [4, 3, 1024]
- # print("data per shape", data.shape)
- # batch_size = data.shape[0]
- # print("batch_size", args.test_batch_size, batch_size)
- # # break
- # print("idx", idx) # test: 154
- # print("data per shape", data.shape)
-
- # # lle test
- # for idx, (data, label) in enumerate(dataset_lle.data):
- # print("data shape", data.shape) # test: [16, 1024, 5], where last batchsize: [4, 1024, 5]
- # print("label shape", label.shape) # test: [16, 1], where last batchsize: [4, 1]
- # label = tf.squeeze(label, -1) # test: [16,], where last batchsize: [4,]
- # print("label shape", label.shape)
- # data = tf.transpose(data, (0, 2, 1)) # test: [16, 5, 1024], where last batchsize: [4, 5, 1024]
- # print("data per shape", data.shape)
- # batch_size = data.shape[0]
- # print("batch_size", args.test_batch_size, batch_size)
- # # break
- # print("idx", idx) # test: 154
- # print("data per shape", data.shape)
|