1. 參考文獻(xiàn)
3D Fully Convolutional Network for Vehicle Detection in Point Cloud
2. 模型實(shí)現(xiàn)
'''
Baidu Inc. Ref:
3D Fully Convolutional Network for Vehicle Detection in Point CloudAuthor: HSW
Date: 2018-05-02
'''import sys
import numpy as np
import tensorflow as tf
from prepare_data2 import *
from baidu_cnn_3d import * KITTI_TRAIN_DATA_CNT = 7481
KITTI_TEST_DATA_CNT = 7518# create 3D-CNN Model
def create_graph(sess, modelType = 0, voxel_shape = (400, 400, 20), activation=tf.nn.relu, is_train = True): '''Inputs: sess: tensorflow Session Object voxel_shape: voxel shape for network first layer activation: phrase_train: Outputs: voxel, graph, sess '''voxel = tf.placeholder(tf.float32, [None, voxel_shape[0], voxel_shape[1], voxel_shape[2], 1])phase_train = tf.placeholder(tf.bool, name="phase_train") if is_train else None with tf.variable_scope("3D_CNN_Model") as scope: model = Full_CNN_3D_Model()model.cnn3d_graph(voxel, modelType = modelType, activation=activation, phase_train = is_train)if is_train: initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="3D_CNN_model")sess.run(tf.variables_initializer(initialized_var))return voxel, model, phase_train# read batch data
def read_batch_data(batch_size, data_set_dir,objectType = "Car", split = "training", resolution=(0.2, 0.2, 0.2), scale=0.25, limitX = (0,80), limitY=(-40,40), limitZ=(-2.5,1.5)): '''Inputs: batch_size: data_set_dir: objectType: default is "Car"split: default is "training"resolution: scale: outputSize / inputSize limitX: limitY: limitZ: Outputs: '''kitti_3DVoxel = kitti_3DVoxel_interface(data_set_dir, objectType = objectType, split=split, scale = scale, resolution = resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)TRAIN_PROCESSED_IDX = 0TEST_PROCESSED_IDX = 0if split == "training": while TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT: batch_voxel = []batch_g_obj = []batch_g_cord = []idx = 0 while idx < batch_size and TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT: print(TRAIN_PROCESSED_IDX)voxel, g_obj, g_cord = kitti_3DVoxel.read_kitti_data(TRAIN_PROCESSED_IDX)TRAIN_PROCESSED_IDX += 1if voxel is None:continueidx += 1 # print(voxel.shape)batch_voxel.append(voxel)batch_g_obj.append(g_obj)batch_g_cord.append(g_cord)yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis], np.array(batch_g_obj, dtype=np.float32), np.array(batch_g_cord, dtype=np.float32)elif split == "testing": while TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT: batch_voxel = []idx = 0while idx < batch_size and TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT: voxel = kitti_3DVoxel.read_kitti_data(iter * batch_size + idx)TEST_PROCESSED_IDX += 1if voxel is None: continueidx += 1 batch_voxel.append(voxel)yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis]# train 3D-CNN Model
def train(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2,0.2,0.2), scale = 0.25, lr=0.01, limitX=(0,80), limitY=(-40,40), limitZ=(-2.5,1.5), epoch=101): '''Inputs: batch_num: data_set_dir: modelType: objectType: resolution: scale: lr: limitX, limitY, limitZ: Outputs: None'''batch_size = batch_numtraining_epochs = epochsizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))voxel_shape = (sizeX, sizeY, sizeZ)with tf.Session() as sess: voxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = True)saver = tf.train.Saver()total_loss, obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_obj, g_cord, y_pred = model.loss_Fun(lossType = 0, cord_loss_weight = 0.02)optimizer = model.create_optimizer(total_loss, optType = "Adam", learnRate = 0.001)init = tf.global_variables_initializer()sess.run(init)for epoch in range(training_epochs): batchCnt = 0; for (batch_voxel, batch_g_obj, batch_g_cord) in read_batch_data(batch_size, data_set_dir, objectType = objectType, split = "training", resolution = resolution, scale = scale, limitX = limitX, limitY = limitY, limitZ = limitZ): # print("batch_g_obj")# print(batch_g_obj.shape)sess.run(optimizer, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})cord_cost = sess.run(cord_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})obj_cost = sess.run(is_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})non_obj_cost = sess.run(non_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})print("Epoch: ", (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "cord_cost = ", "{:.9f}".format(cord_cost))print("Epoch: ", (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "obj_cost = ", "{:.9f}".format(obj_cost))print("Epoch: ", (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "non_obj_cost = ", "{:.9f}".format(non_obj_cost))batchCnt += 1if (epoch > 0) and (epoch % 10 == 0): saver.save(sess, "velodyne_kitti_train_" + str(epoch) + ".ckpt")print("Training Finishied !")# test 3D-CNN Model
def test(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2, 0.2, 0.2), scale = 0.25, limitX = (0, 80), limitY = (-40, 40), limitZ=(-2.5, 1.5)): '''Inputs: batch_num: data_set_dir: resolution: scale:limitX, limitY, limitZ: Outputs: None '''sizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))voxel_shape = (sizeX, sizeY, sizeZ)batch_size = batch_num; batch_voxel = read_batch_data(batch_num, data_set_dir, objectType = objectType, split="Testing", resolution=resolution, scale=scale, limitX=limitX, limitY=limitY, limitZ=limitZ)batch_voxel_x = batch_voxel.reshape(1, batch_voxel.shape[0], batch_voxel.shape[1], batch_voxel.shape[2], 1)with tf.Session() as sess: is_train = Falsevoxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = False)new_saver = tf.train.import_meta_graph("velodyne_kitti_train_40.ckpt.meta")last_model = "./velodyne_kitti_train_40.ckpt"saver.restore(sess, last_model)objectness = model.objectnesscordinate = model.cordinatey_pred = model.y objectness = sess.run(objectness, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]cordinate = sess.run(cordinate, feed_dict={voxel:batch_voxel_x})[0]y_pred = sess.run(y_pred, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]idx = np.where(y_pred >= 0.995)spheres = np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()centers = spheres_to_centers(spheres, scale = scale, resolution=resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)corners = cordinate[idx].reshape[-1, 8, 3] + centers[:, np.newaxis]print(centers)print(corners)if __name__ == "__main__":batch_num = 3data_set_dir = "/home/hsw/桌面/PCL_API_Doc/frustum-pointnets-master/dataset"modelType = 1objectType = "Car"resolution = (0.2, 0.2, 0.2)scale = 0.25 lr = 0.001limitX = (0, 80)limitY = (-40, 40)limitZ = (-2.5, 1.5) epoch = 101 train(batch_num, data_set_dir = data_set_dir, modelType = modelType, objectType = objectType, resolution=resolution, scale=scale, lr =lr, limitX = limitX, limitY = limitY, limitZ = limitZ)saver = tf.train.Saver()
2.1 網(wǎng)絡(luò)模型
'''
Baidu Inc. Ref:
3D Fully Convolutional Network for Vehicle Detection in Point CloudAuthor: HSW
Date: 2018-05-02
'''import numpy as np
import tensorflow as tf class Full_CNN_3D_Model(object): '''Define Full CNN Model'''def __init__(self): pass; def cnn3d_graph(self, voxel, modelType = 0, activation = tf.nn.relu, phase_train = True): if modelType == 0: # Modefied 3D-CNN, 該網(wǎng)絡(luò)結(jié)構(gòu)不可使用,因?yàn)榻挡蓸犹珖?yán)重(降采樣1/8)導(dǎo)致在預(yù)測(cè)時(shí)會(huì)出現(xiàn)較大誤差 self.layer1 = self.conv3d_layer(voxel , 1, 16, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)self.layer2 = self.conv3d_layer(self.layer1, 16, 32, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)self.layer3 = self.conv3d_layer(self.layer2, 32, 64, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)self.layer4 = self.conv3d_layer(self.layer3, 64, 64, 3, 3, 3, [1, 1, 1, 1, 1], name="layer4", activation=activation, phase_train=phase_train)self.objectness = self.conv3D_to_output(self.layer4, 64, 2, 3, 3, 3, [1, 1, 1, 1, 1], name="objectness", activation=None)self.cordinate = self.conv3D_to_output(self.layer4, 64, 24, 3, 3, 3, [1, 1, 1, 1, 1], name="cordinate", activation=None)self.y = tf.nn.softmax(self.objectness, dim=-1)elif modelType == 1: # 3D-CNN(論文網(wǎng)絡(luò)結(jié)構(gòu): 降采樣1/4,即InputSize / OutputSize = 0.25)self.layer1 = self.conv3d_layer(voxel , 1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)self.layer2 = self.conv3d_layer(self.layer1, 10, 20, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)self.layer3 = self.conv3d_layer(self.layer2, 20, 30, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)base_shape = self.layer2.get_shape().as_list()obj_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 2]cord_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 24]self.objectness = self.deconv3D_to_output(self.layer3, 30, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)self.cordinate = self.deconv3D_to_output(self.layer3, 30, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_shape, name="cordinate", activation=None)self.y = tf.nn.softmax(self.objectness, dim=-1)# batch Normalize def batch_norm(self, inputs, phase_train = True, decay = 0.9, eps = 1e-5): '''Inputs: inputs: input data for last layer phase_train: True / False, = True is train, = False is Test Outputs: norm data for next layer '''gamma = tf.get_variable("gamma", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))beta = tf.get_variable("beta", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))pop_mean = tf.get_variable("pop_mean", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))pop_var = tf.get_variable("pop_var", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))axes = range(len(inputs.get_shape()) - 1)if phase_train == True:batch_mean, batch_var = tf.nn.moments(inputs, axes = [0, 1, 2, 3])train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean*(1 - decay))train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))with tf.control_dependencies([train_mean, train_var]):return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, eps)else: return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, eps)# 3D Conv Layer def conv3d_layer(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): '''Inputs: inputs: pre-Layer output inputs_dims: pre-Layer output channels outputs_dims: cur-Layer output channels [length, height, width]: cur-Layer conv3d kernel size stride: conv3d kernel move step in length/height/width axisactivation: default use relu activation function padding: conv3d 'padding' parameter Outputs: 3D Conv. Layer outputs '''with tf.variable_scope("conv3D" + name): # conv3d layer kernel kernel = tf.get_variable("weights", shape=[length, height, width, inputs_dims, outputs_dims], dtype = tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))# conv3d layer bias bias = tf.get_variable("bias", shape=[outputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.0))# conv3d conv = tf.nn.conv3d(inputs, kernel, stride, padding=padding)bias = tf.nn.bias_add(conv, bias)if activation:bias = activation(bias, name="activation")bias = self.batch_norm(bias, phase_train)return bias# 3D Conv to Classification Layer def conv3D_to_output(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): '''Inputs: inputs: pre-Layer outputs inputs_dims: pre-Layer output channels outputs_dims: cur-Layer output channels stride: conv3d kernel move step in length/height/width axisactivation: default use relu activation function padding: conv3d 'padding' parameter outputs_shape: de-conv outputs shape Outputs: conv outputs '''with tf.variable_scope("conv3D" + name):kernel = tf.get_variable("weights", shape=[length, height, width, inputs_dims, outputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.01))conv = tf.nn.conv3d(inputs, kernel, stride, padding=padding)return conv # 3D Deconv. to Classification Layer def deconv3D_to_output(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, output_shape, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): '''Inputs: inputs: pre-Layer outputs inputs_dims: pre-Layer output channels outputs_dims: cur-Layer output channels stride: conv3d kernel move step in length/height/width axisactivation: default use relu activation function padding: conv3d 'padding' parameter outputs_shape: de-conv outputs shape Outputs: de-conv outputs '''with tf.variable_scope("deconv3D"+name):kernel = tf.get_variable("weights", shape=[length, height, width, outputs_dims, inputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.01))deconv = tf.nn.conv3d_transpose(inputs, kernel, output_shape, stride, padding="SAME")return deconv # define loss def loss_Fun(self, lossType = 0, cord_loss_weight = 0.02): '''Inputs: lossType: = for difference loss Type cord_loss_weight: 0.02 Outputs: '''if lossType == 0: # print("g_obj")# print(self.cordinate.get_shape())g_obj = tf.placeholder(tf.float32, self.cordinate.get_shape().as_list()[:4])g_cord = tf.placeholder(tf.float32, self.cordinate.get_shape().as_list())non_g_obj = tf.subtract(tf.ones_like(g_obj, dtype=tf.float32), g_obj )elosion = 0.00001y = self.y is_obj_loss = -tf.reduce_sum(tf.multiply(g_obj , tf.log(y[:,:,:,:,0] + elosion))) # object loss non_obj_loss = tf.reduce_sum(tf.multiply(non_g_obj, tf.log(y[:, :, :, :, 0] + elosion))) # non-object loss cross_entropy = tf.add(is_obj_loss, non_obj_loss)obj_loss = cross_entropycord_diff = tf.multiply(g_obj , tf.reduce_sum(tf.square(tf.subtract(self.cordinate, g_cord)), 4)) # cord loss cord_loss = tf.multiply(tf.reduce_sum(cord_diff), cord_loss_weight)return tf.add(obj_loss, cord_loss), obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_obj, g_cord, y # Create Optimizer def create_optimizer(self, all_loss, optType = "Adam", learnRate = 0.001): '''Inputs: all_loss: graph all_loss lr: learn rate Outputs: optimizer '''if optType == "Adam": opt = tf.train.AdamOptimizer(learnRate)optimizer = opt.minimize(all_loss)return optimizer
2.2? 數(shù)據(jù)預(yù)處理
'''Prepase KITTI data for 3D Object detection
Ref: 3D Fully Convolutional Network for Vehicle Detection in Point CloudAuthor: Shiwen He
Date: 28 April 2018 '''import numpy as np
from kitti_object import kitti_object as kittiReader
import kitti_util # lidar data => 3D Grid Voxel # filter lidar data by camera FoV
def filter_camera_fov(pc): '''Inputs: pc: n x 3 Outputs: filter_pc: m x 3, m <= 3 Notices: FoV: from -45 degree to 45 degree '''logic_fov = np.logical_and((pc[:, 1] < pc[:, 0] - 0.27), (-pc[:, 1] < pc[:, 0] - 0.27))filter_pc = pc[logic_fov]return filter_pc # filter lidar data by detection range
def filter_lidar_range(pc, limitX, limitY, limitZ):''' Inputs: pc: n x 3, limitX, limitY, limitZ: 1 x 2Outputs: filter_pc: m x 3, m <= n '''logic_x = np.logical_and(pc[:, 0] >= limitX[0], pc[:, 0] < limitX[1])logic_y = np.logical_and(pc[:, 1] >= limitY[0], pc[:, 1] < limitY[1])logic_z = np.logical_and(pc[:, 2] >= limitZ[0], pc[:, 2] < limitZ[1])logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))filter_pc = pc[:, :3][logic_xyz]return filter_pc# filter center + corners
def filter_center_corners(centers, corners, boxsizes, limitX, limitY, limitZ): '''Inputs: centers: n x 3 corners: n x 8 x 3 limitX, limitY, limitZ: 1 x 2Outputs: filter_centers: m x 3, m <= n filter_corners: m x 3, m <= n '''logic_x = np.logical_and(centers[:, 0] >= limitX[0], centers[:, 0] < limitX[1])logic_y = np.logical_and(centers[:, 1] >= limitY[0], centers[:, 1] < limitY[1])logic_z = np.logical_and(centers[:, 2] >= limitZ[0], centers[:, 2] < limitZ[1])logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))filter_centers_1 = centers[logic_xyz, :]filter_corners_1 = corners[logic_xyz, :, :]filter_boxsizes_1 = boxsizes[logic_xyz, :]shape_centers = filter_centers_1.shape; filter_centers = np.zeros([shape_centers[0], 3])filter_corners = np.zeros([shape_centers[0], 8, 3]); filter_boxsizes = np.zeros([shape_centers[0], 3]); idx = 0for idx2 in range(shape_centers[0]): logic_x = np.logical_and(filter_corners_1[idx2, :, 0] >= limitX[0], filter_corners_1[idx2, :, 0] < limitX[1])logic_y = np.logical_and(filter_corners_1[idx2, :, 1] >= limitY[0], filter_corners_1[idx2, :, 1] < limitY[1])logic_z = np.logical_and(filter_corners_1[idx2, :, 2] >= limitZ[0], filter_corners_1[idx2, :, 2] < limitZ[1])logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))if logic_xyz.all(): filter_centers[idx, :3] = filter_centers_1[idx2, :]filter_corners[idx, :8, :3] = filter_corners_1[idx2, :, :] filter_boxsizes[idx, :3] = filter_boxsizes_1[idx2, :]idx += 1 if idx > 0:return filter_centers[:idx, :], filter_corners[:idx, :, :], filter_boxsizes[:idx, :]else:return None, None, Nonedef filter_label(object3Ds, objectType = 'Car'): '''Inputs: object3Ds:objectType: Outputs: centers, corners, rotatey '''idx = 0data = np.zeros([50, 7]).astype(np.float32)for iter in object3Ds: if iter.type == "DontCare": continue;if iter.type == objectType: # position data[idx, 0] = iter.t[0]data[idx, 1] = iter.t[1]data[idx, 2] = iter.t[2]# size data[idx, 3] = iter.hdata[idx, 4] = iter.wdata[idx, 5] = iter.l # rotate data[idx, 6] = iter.ryidx += 1 if idx > 0:return data[:idx, :3], data[:idx, 3:6], data[:idx, 6]else:return None, None, Nonedef proj_to_velo(calib_data):"""Inputs: calib_data: Outputs: project matrix: from camera cordination to velodyne cordination"""rect = calib_data.R0; # calib_data["R0_rect"].reshape(3, 3)velo_to_cam = calib_data.V2C; # calib_data["Tr_velo_to_cam"].reshape(3, 4)inv_rect = np.linalg.inv(rect)inv_velo_to_cam = np.linalg.pinv(velo_to_cam[:, :3])return np.dot(inv_velo_to_cam, inv_rect)# corners_3d
def compute_3d_corners(centers, sizes, rotates): ''' Inputs: centers: rotates: sizes: Outputs: corners_3d: n x 8 x 3 array in Lidar coord.'''# print(centers) corners = []for place, rotate, sz in zip(centers, rotates, sizes):x, y, z = placeh, w, l = szif l > 10:continuecorner = np.array([[x - l / 2., y - w / 2., z],[x + l / 2., y - w / 2., z],[x - l / 2., y + w / 2., z],[x - l / 2., y - w / 2., z + h],[x - l / 2., y + w / 2., z + h],[x + l / 2., y + w / 2., z],[x + l / 2., y - w / 2., z + h],[x + l / 2., y + w / 2., z + h],])corner -= np.array([x, y, z])rotate_matrix = np.array([[np.cos(rotate), -np.sin(rotate), 0],[np.sin(rotate), np.cos(rotate), 0],[0, 0, 1]])a = np.dot(corner, rotate_matrix.transpose())a += np.array([x, y, z])corners.append(a)corners_3d = np.array(corners) return corners_3d# lidar data to 3D Grid Voxel
def lidar_to_binary_voxel(pc, resolution, limitX, limitY, limitZ):''' Inputs: pc: n x 3, resolution: 1 x 3, limitX, limitY, limitZ: 1 x 2Outputs: voxel: shape is inputSize '''voxel_pc = np.zeros_like(pc).astype(np.int32)# Compute PointCloud Position in 3D Grid voxel_pc[:, 0] = ((pc[:, 0] - limitX[0]) / resolution[0]).astype(np.int32)voxel_pc[:, 1] = ((pc[:, 1] - limitY[0]) / resolution[1]).astype(np.int32)voxel_pc[:, 2] = ((pc[:, 2] - limitZ[0]) / resolution[2]).astype(np.int32)# 3D Grid voxel = np.zeros((int(round(limitX[1] - limitX[0]) / resolution[0]), int(round(limitY[1] - limitY[0]) / resolution[1]), \int(round((limitZ[1] - limitZ[0]) / resolution[2])))) # 3D Grid Value voxel[voxel_pc[:, 0], voxel_pc[:, 1], voxel_pc[:, 2]] = 1return voxel # label center to 3D Grid Voxel Center(sphere)
def center_to_sphere(centers, boxsize, scale, resolution, limitX, limitY, limitZ):''' Inputs: center: n x 3 boxsize: n x 3scale: 1 x 1, = outputSize / inputSizeresolution: 1 x 3limitX, limitY, limitZ: 1 x 2Outputs: spheres: m x 3, m <= n '''# from 3D Box's bottom center => 3D center move_center = centers.copy(); print("centers")print(centers)print("boxsize")print(boxsize)move_center[:, 2] = centers[:, 2] + boxsize[:, 0] / 2; # compute Label Center PointCloud Position in 3D Grid spheres = np.zeros_like(move_center).astype(np.int32)spheres[:, 0] = ((move_center[:, 0] - limitX[0]) / resolution[0] * scale).astype(np.int32)spheres[:, 1] = ((move_center[:, 1] - limitY[0]) / resolution[1] * scale).astype(np.int32)spheres[:, 2] = ((move_center[:, 2] - limitZ[0]) / resolution[2] * scale).astype(np.int32)print("move_center")print(move_center)print("spheres")print(spheres)return spheres# 3D Grid Voxel Center(sphere) to label center
def sphere_to_center(spheres, scale, resolution, limitX, limitY, limitZ): '''Inputs: spheres: n x 3 scale: 1 x 1, = outputSize / inputSize resolution: 1 x 3limitX, limitY, limitZ: 1 x 2 Outputs: centers: m x 3, m <= 3 '''centers = np.zeros_like(spheres).astype(np.float32); centers[:, 0] = spheres[:, 0] * resolution[0] / scale + limitX[0]centers[:, 1] = spheres[:, 1] * resolution[1] / scale + limitY[0]centers[:, 2] = spheres[:, 2] * resolution[2] / scale + limitZ[0]return centers# label corners to 3D Grid Voxel: corners - centers
def corners_to_train(spheres, corners, scale, resolution, limitX, limitY, limitZ):'''Inputs: spheres: n x 3corners: n x 8 x 3 scale: 1 x 1, = outputSize / inputSizeresolution: 1 x 3limitX, limitY, limitZ: 1 x 2 Outputs: train_corners: m x 3, m <= n '''# 3D Grid Voxel Center => label center centers = sphere_to_center(spheres, scale, resolution, limitX, limitY, limitZ)train_corners = np.zeros_like(corners).astype(np.float32)# train corners for regression loss for index, (corner, center) in enumerate(zip(corners, centers)):train_corners[index] = corner - center return train_corners# create center and cordination for train
def create_train_label(centers, corners, boxsize, scale, resolution, limitX, limitY, limitZ):'''Inputs: centers: n x 3 corners: n x 8 x 3 boxsize: n x 3 scale: 1 x 1, outputSize / inputSizeresolution: 1 x 3 limitX. limitY, limitZ: 1 x 2 Outputs: train_centers: m x 3, m <= n train_corners: m x 3, m <= n '''train_centers = center_to_sphere(centers, boxsize, scale, resolution, limitX, limitY, limitZ)train_corners = corners_to_train(train_centers, corners, scale, resolution, limitX, limitY, limitZ)return train_centers, train_corners def create_obj_map(train_centers, scale, resolution, limitX, limitY, limitZ):'''Inputs: centers: n x 3 scale: 1 x 1, outputSize / inputSizeresolution: 1 x 3limitX, limitY, limitZ: 1 x 2Outputs: obj_map: shape is scale * inputSize '''# 3D Grid sizeX = int(round((limitX[1] - limitX[0]) / resolution[0] * scale))sizeY = int(round((limitY[1] - limitY[0]) / resolution[1] * scale))sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[2] * scale))obj_map = np.zeros([sizeX, sizeY, sizeZ]) # print("sizeX, sizeY, sizeZ")# print(sizeX, sizeY, sizeZ)# objectness map: label center in objectness map where value is 1 obj_map[train_centers[:,0], train_centers[:, 1], train_centers[:, 2]] = 1; return obj_map def create_cord_map(train_centers, train_corners, scale, resolution, limitX, limitY, limitZ):'''Inputs: train_centers: n x 3 train_corners: n x 8 x 3 scale: 1 x 1, outputSize / inputSizeresolution: 1 x 3 limitX, limitY, limitZ: 1 x 2Outputs: cord_map: shape is inputSize * scale ''' # reshape train_corners: n x 8 x 3 => n x 24 corners = train_corners.reshape(train_corners.shape[0], -1) # 3D Grid sizeX = int(round((limitX[1] - limitX[0]) / resolution[0] * scale))sizeY = int(round((limitY[1] - limitY[0]) / resolution[1] * scale))sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[2] * scale))sizeD = 24cord_map = np.zeros([sizeX, sizeY, sizeZ, sizeD]) # print(train_centers)cord_map[train_centers[:,0], train_centers[:, 1], train_centers[:, 2]] = cornersreturn cord_map # kitti data interface:
class kitti_3DVoxel_interface(object): def __init__(self, root_dir, objectType = 'Car', split='training', scale = 0.25, resolution = (0.2, 0.2, 0.2), limitX = (0, 80), limitY = (-40, 40), limitZ = (-2.5, 1.5)):'''Inputs: case1 root_dir: train or val. data dir, train or val.'s file struct like: root_dir->training->velodyneroot_dir->training->calibroot_dir->training->label_2 case2 root_dir: test data dir, test's file struct like: root_dir->testing->velodyneroot_dir->testing->calib Outputs: -None '''self.root_dir = root_dirself.split = splitself.object = kittiReader(self.root_dir, self.split)self.objectType = objectTypeself.scale = scaleself.resolution = resolution self.limitX = limitXself.limitY = limitYself.limitZ = limitZdef read_kitti_data(self, idx = 0): '''Inputs:idx: training or testing sample indexOutputs:voxel : inputSizeobj_map : scale * inputSizecord_map : scale * inputSize'''kitti_Object3Ds = Nonekitti_Lidar = None kitti_Calib = Noneif self.split == 'training':# read Lidar data + Lidar Label + Calib data kitti_Object3Ds = self.object.get_label_objects(idx); kitti_Lidar = self.object.get_lidar(idx); kitti_Calib = self.object.get_calibration(idx); # lidar data filter filter_fov = filter_camera_fov(kitti_Lidar) filter_range = filter_lidar_range(filter_fov, self.limitX, self.limitY, self.limitZ)# label filter centers, boxsizes, rotates = filter_label(kitti_Object3Ds, self.objectType)if centers is None:return None, None, None # label center: Notice from camera Coordination to velo. Coordination if not(kitti_Calib is None): proj_velo = proj_to_velo(kitti_Calib)[:, :3]centers = np.dot(centers, proj_velo.transpose())[:, :3] # label corners: corners = compute_3d_corners(centers, boxsizes, rotates)# print(corners)# print(corners.shape)# filter centers + corners filter_centers, filter_corners, boxsizes = filter_center_corners(centers, corners, boxsizes, self.limitX, self.limitY, self.limitZ)# print(filter_centers)# print(filter_corners)if not(filter_centers is None): # training centertrain_centers, train_corners = create_train_label(filter_centers, filter_corners, boxsizes, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)# print("filter_centers")# print(filter_centers)# print("train_centers")# print(train_centers)# obj_map / cord_map / voxel obj_map = create_obj_map(train_centers, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)cord_map = create_cord_map(train_centers, train_corners, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)voxel = lidar_to_binary_voxel(filter_range, self.resolution, self.limitX, self.limitY, self.limitZ)return voxel, obj_map, cord_mapelse: return None, None, None elif self.split == 'testing':# read Lidar Data + Calib + Data kitti_Lidar = self.object.get_lidar(idx); kitti_Calib = self.object.get_calibration(idx); # lidar data filter filter_fov = filter_camera_fov(kitti_Lidar) filter_range = filter_lidar_range(filter_fov, self.limitX, self.limitY, self.limitZ)voxel = lidar_to_binary_voxel(filter_range, self.resolution, self.limitX, self.limitY, self.limitZ)return voxelif __name__ == '__main__':data_dir = "/home/hsw/桌面/PCL_API_Doc/frustum-pointnets-master/dataset"kitti_3DVoxel = kitti_3DVoxel_interface(data_dir, objectType = 'Car', split='training', scale = 0.25, resolution = (0.2, 0.2, 0.2), limitX = (0, 80), limitY = (-40, 40), limitZ = (-2.5, 1.5))sampleIdx = 195; voxel, obj_map, cord_map = kitti_3DVoxel.read_kitti_data(sampleIdx)if not(voxel is None): print(voxel.shape)print(obj_map.shape) print(cord_map.shape)
2.3 KITTI數(shù)據(jù)讀取相關(guān)
''' Helper class and functions for loading KITTI objectsAuthor: Charles R. Qi
Date: September 2017
'''
from __future__ import print_functionimport os
import sys
import numpy as np
import cv2
from PIL import Image
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(BASE_DIR)
sys.path.append(os.path.join(ROOT_DIR, 'mayavi'))
import kitti_util as utilstry:raw_input # Python 2
except NameError:raw_input = input # Python 3# 3D static data
class kitti_object(object):'''Load and parse object data into a usable format.'''def __init__(self, root_dir, split='training'):'''root_dir contains training and testing folders'''self.root_dir = root_dirself.split = splitself.split_dir = os.path.join(root_dir, split)if split == 'training':self.num_samples = 7481elif split == 'testing':self.num_samples = 7518else:print('Unknown split: %s' % (split))exit(-1)# data dir self.image_dir = os.path.join(self.split_dir, 'image_2')self.calib_dir = os.path.join(self.split_dir, 'calib')self.lidar_dir = os.path.join(self.split_dir, 'velodyne')self.label_dir = os.path.join(self.split_dir, 'label_2')def __len__(self):return self.num_samples# read image: return image def get_image(self, idx):assert(idx<self.num_samples) img_filename = os.path.join(self.image_dir, '%06d.png'%(idx))return utils.load_image(img_filename)# read lidar: return n x 4 def get_lidar(self, idx): assert(idx<self.num_samples) lidar_filename = os.path.join(self.lidar_dir, '%06d.bin'%(idx))return utils.load_velo_scan(lidar_filename)# read calib file: def get_calibration(self, idx):assert(idx<self.num_samples) calib_filename = os.path.join(self.calib_dir, '%06d.txt'%(idx))return utils.Calibration(calib_filename)# read label def get_label_objects(self, idx):assert(idx<self.num_samples and self.split=='training') label_filename = os.path.join(self.label_dir, '%06d.txt'%(idx))return utils.read_label(label_filename)# read depth map def get_depth_map(self, idx):pass# read top_down image def get_top_down(self, idx):passclass kitti_object_video(object):''' Load data for KITTI videos '''def __init__(self, img_dir, lidar_dir, calib_dir):self.calib = utils.Calibration(calib_dir, from_video=True)self.img_dir = img_dirself.lidar_dir = lidar_dirself.img_filenames = sorted([os.path.join(img_dir, filename) \for filename in os.listdir(img_dir)])self.lidar_filenames = sorted([os.path.join(lidar_dir, filename) \for filename in os.listdir(lidar_dir)])print(len(self.img_filenames))print(len(self.lidar_filenames))#assert(len(self.img_filenames) == len(self.lidar_filenames))self.num_samples = len(self.img_filenames)def __len__(self):return self.num_samplesdef get_image(self, idx):assert(idx<self.num_samples) img_filename = self.img_filenames[idx]return utils.load_image(img_filename)def get_lidar(self, idx): assert(idx<self.num_samples) lidar_filename = self.lidar_filenames[idx]return utils.load_velo_scan(lidar_filename)def get_calibration(self, unused):return self.calibdef viz_kitti_video():video_path = os.path.join(ROOT_DIR, 'dataset/2011_09_26/')dataset = kitti_object_video(\os.path.join(video_path, '2011_09_26_drive_0023_sync/image_02/data'),os.path.join(video_path, '2011_09_26_drive_0023_sync/velodyne_points/data'),video_path)print(len(dataset))for i in range(len(dataset)):img = dataset.get_image(0)pc = dataset.get_lidar(0)Image.fromarray(img).show()draw_lidar(pc)raw_input()pc[:,0:3] = dataset.get_calibration().project_velo_to_rect(pc[:,0:3])draw_lidar(pc)raw_input()returndef show_image_with_boxes(img, objects, calib, show3d=True):''' Show image with 2D bounding boxes '''img1 = np.copy(img) # for 2d bboximg2 = np.copy(img) # for 3d bboxfor obj in objects:if obj.type=='DontCare':continuecv2.rectangle(img1, (int(obj.xmin),int(obj.ymin)),(int(obj.xmax),int(obj.ymax)), (0,255,0), 2)box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib.P)img2 = utils.draw_projected_box3d(img2, box3d_pts_2d)Image.fromarray(img1).show()if show3d:Image.fromarray(img2).show()def get_lidar_in_image_fov(pc_velo, calib, xmin, ymin, xmax, ymax,return_more=False, clip_distance=2.0):''' Filter lidar points, keep those in image FOV '''pts_2d = calib.project_velo_to_image(pc_velo)fov_inds = (pts_2d[:,0]<xmax) & (pts_2d[:,0]>=xmin) & \(pts_2d[:,1]<ymax) & (pts_2d[:,1]>=ymin)fov_inds = fov_inds & (pc_velo[:,0]>clip_distance)imgfov_pc_velo = pc_velo[fov_inds,:]if return_more:return imgfov_pc_velo, pts_2d, fov_indselse:return imgfov_pc_velodef show_lidar_with_boxes(pc_velo, objects, calib,img_fov=False, img_width=None, img_height=None): ''' Show all LiDAR points.Draw 3d box in LiDAR point cloud (in velo coord system) '''if 'mlab' not in sys.modules: import mayavi.mlab as mlabfrom viz_util import draw_lidar_simple, draw_lidar, draw_gt_boxes3dprint(('All point num: ', pc_velo.shape[0]))fig = mlab.figure(figure=None, bgcolor=(0,0,0),fgcolor=None, engine=None, size=(1000, 500))if img_fov:pc_velo = get_lidar_in_image_fov(pc_velo, calib, 0, 0,img_width, img_height)print(('FOV point num: ', pc_velo.shape[0]))draw_lidar(pc_velo, fig=fig)for obj in objects:if obj.type=='DontCare':continue# Draw 3d bounding boxbox3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib.P) box3d_pts_3d_velo = calib.project_rect_to_velo(box3d_pts_3d)# Draw heading arrowori3d_pts_2d, ori3d_pts_3d = utils.compute_orientation_3d(obj, calib.P)ori3d_pts_3d_velo = calib.project_rect_to_velo(ori3d_pts_3d)x1,y1,z1 = ori3d_pts_3d_velo[0,:]x2,y2,z2 = ori3d_pts_3d_velo[1,:]draw_gt_boxes3d([box3d_pts_3d_velo], fig=fig)mlab.plot3d([x1, x2], [y1, y2], [z1,z2], color=(0.5,0.5,0.5),tube_radius=None, line_width=1, figure=fig)mlab.show(1)def show_lidar_on_image(pc_velo, img, calib, img_width, img_height):''' Project LiDAR points to image '''imgfov_pc_velo, pts_2d, fov_inds = get_lidar_in_image_fov(pc_velo,calib, 0, 0, img_width, img_height, True)imgfov_pts_2d = pts_2d[fov_inds,:]imgfov_pc_rect = calib.project_velo_to_rect(imgfov_pc_velo)import matplotlib.pyplot as pltcmap = plt.cm.get_cmap('hsv', 256)cmap = np.array([cmap(i) for i in range(256)])[:,:3]*255for i in range(imgfov_pts_2d.shape[0]):depth = imgfov_pc_rect[i,2]color = cmap[int(640.0/depth),:]cv2.circle(img, (int(np.round(imgfov_pts_2d[i,0])),int(np.round(imgfov_pts_2d[i,1]))),2, color=tuple(color), thickness=-1)Image.fromarray(img).show() return imgdef dataset_viz():dataset = kitti_object(os.path.join(ROOT_DIR, 'dataset/KITTI/object'))for data_idx in range(len(dataset)):# Load data from datasetobjects = dataset.get_label_objects(data_idx)objects[0].print_object()img = dataset.get_image(data_idx)img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_height, img_width, img_channel = img.shapeprint(('Image shape: ', img.shape))pc_velo = dataset.get_lidar(data_idx)[:,0:3]calib = dataset.get_calibration(data_idx)# Draw 2d and 3d boxes on imageshow_image_with_boxes(img, objects, calib, False)raw_input()# Show all LiDAR points. Draw 3d box in LiDAR point cloudshow_lidar_with_boxes(pc_velo, objects, calib, True, img_width, img_height)raw_input()if __name__=='__main__':import mayavi.mlab as mlabfrom viz_util import draw_lidar_simple, draw_lidar, draw_gt_boxes3ddataset_viz()
""" Helper methods for loading and parsing KITTI data.Author: Charles R. Qi
Date: September 2017
"""
from __future__ import print_functionimport numpy as np
import cv2
import osclass Object3d(object):''' 3d object label '''def __init__(self, label_file_line):data = label_file_line.split(' ')data[1:] = [float(x) for x in data[1:]]# extract label, truncation, occlusionself.type = data[0] # 'Car', 'Pedestrian', ...self.truncation = data[1] # truncated pixel ratio [0..1]self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknownself.alpha = data[3] # object observation angle [-pi..pi]# extract 2d bounding box in 0-based coordinatesself.xmin = data[4] # leftself.ymin = data[5] # topself.xmax = data[6] # rightself.ymax = data[7] # bottomself.box2d = np.array([self.xmin,self.ymin,self.xmax,self.ymax])# extract 3d bounding box informationself.h = data[8] # box heightself.w = data[9] # box widthself.l = data[10] # box length (in meters)self.t = (data[11],data[12],data[13]) # location (x,y,z) in camera coord.self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi]def print_object(self):print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \(self.type, self.truncation, self.occlusion, self.alpha))print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \(self.xmin, self.ymin, self.xmax, self.ymax))print('3d bbox h,w,l: %f, %f, %f' % \(self.h, self.w, self.l))print('3d bbox location, ry: (%f, %f, %f), %f' % \(self.t[0],self.t[1],self.t[2],self.ry))class Calibration(object):''' Calibration matrices and utils3d XYZ in <label>.txt are in rect camera coord.2d box xy are in image2 coordPoints in <lidar>.bin are in Velodyne coord.y_image2 = P^2_rect * x_recty_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velox_ref = Tr_velo_to_cam * x_velox_rect = R0_rect * x_refP^2_rect = [f^2_u, 0, c^2_u, -f^2_u b^2_x;0, f^2_v, c^2_v, -f^2_v b^2_y;0, 0, 1, 0]= K * [1|t]image2 coord:----> x-axis (u)||v y-axis (v)velodyne coord:front x, left y, up zrect/ref camera coord:right x, down y, front zRef (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdfTODO(rqi): do matrix multiplication only once for each projection.'''def __init__(self, calib_filepath, from_video=False):if from_video:calibs = self.read_calib_from_video(calib_filepath)else:calibs = self.read_calib_file(calib_filepath)# Projection matrix from rect camera coord to image2 coordself.P = calibs['P2'] self.P = np.reshape(self.P, [3,4])# Rigid transform from Velodyne coord to reference camera coordself.V2C = calibs['Tr_velo_to_cam']self.V2C = np.reshape(self.V2C, [3,4])self.C2V = inverse_rigid_trans(self.V2C)# Rotation from reference camera coord to rect camera coordself.R0 = calibs['R0_rect']self.R0 = np.reshape(self.R0,[3,3])# Camera intrinsics and extrinsicsself.c_u = self.P[0,2]self.c_v = self.P[1,2]self.f_u = self.P[0,0]self.f_v = self.P[1,1]self.b_x = self.P[0,3]/(-self.f_u) # relative self.b_y = self.P[1,3]/(-self.f_v)def read_calib_file(self, filepath):''' Read in a calibration file and parse into a dictionary.Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py'''data = {}with open(filepath, 'r') as f:for line in f.readlines():line = line.rstrip()if len(line)==0: continuekey, value = line.split(':', 1)# The only non-float values in these files are dates, which# we don't care about anywaytry:data[key] = np.array([float(x) for x in value.split()])except ValueError:passreturn datadef read_calib_from_video(self, calib_root_dir):''' Read calibration for camera 2 from video calib files.there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir'''data = {}cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))Tr_velo_to_cam = np.zeros((3,4))Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])Tr_velo_to_cam[:,3] = velo2cam['T']data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])data['R0_rect'] = cam2cam['R_rect_00']data['P2'] = cam2cam['P_rect_02']return datadef cart2hom(self, pts_3d):''' Input: nx3 points in CartesianOupput: nx4 points in Homogeneous by pending 1'''n = pts_3d.shape[0]pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))return pts_3d_hom# =========================== # ------- 3d to 3d ---------- # =========================== def project_velo_to_ref(self, pts_3d_velo):pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4return np.dot(pts_3d_velo, np.transpose(self.V2C))def project_ref_to_velo(self, pts_3d_ref):pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4return np.dot(pts_3d_ref, np.transpose(self.C2V))def project_rect_to_ref(self, pts_3d_rect):''' Input and Output are nx3 points '''return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))def project_ref_to_rect(self, pts_3d_ref):''' Input and Output are nx3 points '''return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))def project_rect_to_velo(self, pts_3d_rect):''' Input: nx3 points in rect camera coord.Output: nx3 points in velodyne coord.''' pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)return self.project_ref_to_velo(pts_3d_ref)def project_velo_to_rect(self, pts_3d_velo):pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)return self.project_ref_to_rect(pts_3d_ref)# =========================== # ------- 3d to 2d ---------- # =========================== def project_rect_to_image(self, pts_3d_rect):''' Input: nx3 points in rect camera coord.Output: nx2 points in image2 coord.'''pts_3d_rect = self.cart2hom(pts_3d_rect)pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3pts_2d[:,0] /= pts_2d[:,2]pts_2d[:,1] /= pts_2d[:,2]return pts_2d[:,0:2]def project_velo_to_image(self, pts_3d_velo):''' Input: nx3 points in velodyne coord.Output: nx2 points in image2 coord.'''pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)return self.project_rect_to_image(pts_3d_rect)# =========================== # ------- 2d to 3d ---------- # =========================== def project_image_to_rect(self, uv_depth):''' Input: nx3 first two channels are uv, 3rd channelis depth in rect camera coord.Output: nx3 points in rect camera coord.'''n = uv_depth.shape[0]x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_xy = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_ypts_3d_rect = np.zeros((n,3))pts_3d_rect[:,0] = xpts_3d_rect[:,1] = ypts_3d_rect[:,2] = uv_depth[:,2]return pts_3d_rectdef project_image_to_velo(self, uv_depth):pts_3d_rect = self.project_image_to_rect(uv_depth)return self.project_rect_to_velo(pts_3d_rect)def rotx(t):''' 3D Rotation about the x-axis. '''c = np.cos(t)s = np.sin(t)return np.array([[1, 0, 0],[0, c, -s],[0, s, c]])def roty(t):''' Rotation about the y-axis. '''c = np.cos(t)s = np.sin(t)return np.array([[c, 0, s],[0, 1, 0],[-s, 0, c]])def rotz(t):''' Rotation about the z-axis. '''c = np.cos(t)s = np.sin(t)return np.array([[c, -s, 0],[s, c, 0],[0, 0, 1]])def transform_from_rot_trans(R, t):''' Transforation matrix from rotation matrix and translation vector. '''R = R.reshape(3, 3)t = t.reshape(3, 1)return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))def inverse_rigid_trans(Tr):''' Inverse a rigid body transform matrix (3x4 as [R|t])[R'|-R't; 0|1]'''inv_Tr = np.zeros_like(Tr) # 3x4inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])return inv_Trdef read_label(label_filename):lines = [line.rstrip() for line in open(label_filename)]objects = [Object3d(line) for line in lines]return objectsdef load_image(img_filename):return cv2.imread(img_filename)def load_velo_scan(velo_filename):scan = np.fromfile(velo_filename, dtype=np.float32)scan = scan.reshape((-1, 4))return scandef project_to_image(pts_3d, P):''' Project 3d points to image plane.Usage: pts_2d = projectToImage(pts_3d, P)input: pts_3d: nx3 matrixP: 3x4 projection matrixoutput: pts_2d: nx2 matrixP(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)=> normalize projected_pts_2d(2xn)<=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)=> normalize projected_pts_2d(nx2)'''n = pts_3d.shape[0]pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))print(('pts_3d_extend shape: ', pts_3d_extend.shape))pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3pts_2d[:,0] /= pts_2d[:,2]pts_2d[:,1] /= pts_2d[:,2]return pts_2d[:,0:2]# corners_2d + corners_3d
def compute_box_3d(obj, P):''' Takes an object and a projection matrix (P) and projects the 3dbounding box into the image plane.Returns:corners_2d: (8,2) array in left image coord.corners_3d: (8,3) array in in rect camera coord.'''# compute rotational matrix around yaw axisR = roty(obj.ry) # 3d bounding box dimensionsl = obj.l;w = obj.w;h = obj.h;# 3d bounding box cornersx_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];y_corners = [0,0,0,0,-h,-h,-h,-h];z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];# rotate and translate 3d bounding boxcorners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))#print corners_3d.shapecorners_3d[0,:] = corners_3d[0,:] + obj.t[0];corners_3d[1,:] = corners_3d[1,:] + obj.t[1];corners_3d[2,:] = corners_3d[2,:] + obj.t[2];#print 'cornsers_3d: ', corners_3d # only draw 3d bounding box for objs in front of the cameraif np.any(corners_3d[2,:]<0.1):corners_2d = Nonereturn corners_2d, np.transpose(corners_3d)# project the 3d bounding box into the image planecorners_2d = project_to_image(np.transpose(corners_3d), P);#print 'corners_2d: ', corners_2dreturn corners_2d, np.transpose(corners_3d)def compute_orientation_3d(obj, P):''' Takes an object and a projection matrix (P) and projects the 3dobject orientation vector into the image plane.Returns:orientation_2d: (2,2) array in left image coord.orientation_3d: (2,3) array in in rect camera coord.'''# compute rotational matrix around yaw axisR = roty(obj.ry)# orientation in object coordinate systemorientation_3d = np.array([[0.0, obj.l],[0,0],[0,0]])# rotate and translate in camera coordinate system, project in imageorientation_3d = np.dot(R, orientation_3d)orientation_3d[0,:] = orientation_3d[0,:] + obj.t[0]orientation_3d[1,:] = orientation_3d[1,:] + obj.t[1]orientation_3d[2,:] = orientation_3d[2,:] + obj.t[2]# vector behind image plane?if np.any(orientation_3d[2,:]<0.1):orientation_2d = Nonereturn orientation_2d, np.transpose(orientation_3d)# project orientation into the image planeorientation_2d = project_to_image(np.transpose(orientation_3d), P);return orientation_2d, np.transpose(orientation_3d)def draw_projected_box3d(image, qs, color=(255,255,255), thickness=2):''' Draw 3d bounding box in imageqs: (8,3) array of vertices for the 3d box in following order:1 -------- 0/| /|2 -------- 3 .| | | |. 5 -------- 4|/ |/6 -------- 7'''qs = qs.astype(np.int32)for k in range(0,4):# Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.htmli,j=k,(k+1)%4# use LINE_AA for opencv3cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)i,j=k+4,(k+1)%4 + 4cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)i,j=k,k+4cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)return image
3. 通過測(cè)試還在訓(xùn)練,但是我的硬件設(shè)備較差,所以,訓(xùn)練速度比較慢
總結(jié)
以上是生活随笔為你收集整理的深度学习——3D Fully Convolutional Network for Vehicle Detection in Point Cloud模型实现的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。