diff --git a/pcdet/datasets/__init__.py b/pcdet/datasets/__init__.py index 47c3900bf..ddf62d65c 100644 --- a/pcdet/datasets/__init__.py +++ b/pcdet/datasets/__init__.py @@ -8,23 +8,37 @@ from .dataset import DatasetTemplate from .kitti.kitti_dataset import KittiDataset from .nuscenes.nuscenes_dataset import NuScenesDataset +from .nuscenes_cars.nuscenes_dataset import NuScenesCarsDataset from .waymo.waymo_dataset import WaymoDataset from .pandaset.pandaset_dataset import PandasetDataset from .lyft.lyft_dataset import LyftDataset from .once.once_dataset import ONCEDataset from .argo2.argo2_dataset import Argo2Dataset -from .custom.custom_dataset import CustomDataset +from .our_data.custom_dataset import CustomDataset +#from .custom.custom_dataset import CustomDataset +from .truckscenes.truckscenes_dataset import TruckScenesDataset +from .truckscenes_multiradar.truckscenes_dataset import TruckScenesMultiRadarDataset +from .truckscenes_multiradar_cars.truckscenes_dataset import TruckScenesMultiRadarCarsDataset +from .truckscenes_cars.truckscenes_dataset import TruckScenesCarsDataset +from .truckscenes_multiradar_cars_comp.truckscenes_dataset import TruckScenesMultiRadarCarsCompDataset + __all__ = { 'DatasetTemplate': DatasetTemplate, 'KittiDataset': KittiDataset, 'NuScenesDataset': NuScenesDataset, + 'NuScenesCarsDataset': NuScenesCarsDataset, 'WaymoDataset': WaymoDataset, 'PandasetDataset': PandasetDataset, 'LyftDataset': LyftDataset, 'ONCEDataset': ONCEDataset, 'CustomDataset': CustomDataset, - 'Argo2Dataset': Argo2Dataset + 'Argo2Dataset': Argo2Dataset, + 'TruckScenesDataset': TruckScenesDataset, + 'TruckScenesMultiRadarDataset': TruckScenesMultiRadarDataset, + 'TruckScenesMultiRadarCarsDataset': TruckScenesMultiRadarCarsDataset, + 'TruckScenesCarsDataset': TruckScenesCarsDataset, + 'TruckScenesMultiRadarCarsCompDataset': TruckScenesMultiRadarCarsCompDataset } diff --git a/pcdet/datasets/nuscenes/nuscenes_dataset.py b/pcdet/datasets/nuscenes/nuscenes_dataset.py index 0f7000562..7c14f487d 100644 --- a/pcdet/datasets/nuscenes/nuscenes_dataset.py +++ b/pcdet/datasets/nuscenes/nuscenes_dataset.py @@ -10,6 +10,7 @@ from ..dataset import DatasetTemplate from pyquaternion import Quaternion from PIL import Image +from nuscenes.utils.data_classes import RadarPointCloud class NuScenesDataset(DatasetTemplate): @@ -101,20 +102,43 @@ def remove_ego_points(points, center_radius=1.0): def get_lidar_with_sweeps(self, index, max_sweeps=1): info = self.infos[index] lidar_path = self.root_path / info['lidar_path'] - points = np.fromfile(str(lidar_path), dtype=np.float32, count=-1).reshape([-1, 5])[:, :4] + + pc = RadarPointCloud.from_file(str(lidar_path)) + points_raw = pc.points.T # Shape becomes (N, 18) + + # Extract [x, y, z, rcs, vx_comp, vy_comp] + # nuScenes radar indices: 0=x, 1=y, 2=z, 5=rcs, 8=vx_comp, 9=vy_comp + points = points_raw[:, [0, 1, 2, 5, 8, 9]] sweep_points_list = [points] sweep_times_list = [np.zeros((points.shape[0], 1))] for k in np.random.choice(len(info['sweeps']), max_sweeps - 1, replace=False): - points_sweep, times_sweep = self.get_sweep(info['sweeps'][k]) - sweep_points_list.append(points_sweep) - sweep_times_list.append(times_sweep) + is_valid_sweep = True + if len(info['sweeps']) == 0: + is_valid_sweep = False + else: + sweep = info['sweeps'][k] + sweep_path = self.root_path / sweep['lidar_path'] - points = np.concatenate(sweep_points_list, axis=0) - times = np.concatenate(sweep_times_list, axis=0).astype(points.dtype) + sweep_pc = RadarPointCloud.from_file(str(sweep_path)) + sweep_points_raw = sweep_pc.points.T + sweep_points = sweep_points_raw[:, [0, 1, 2, 5, 8, 9]] + + # If there is no matrix (e.g., first frame of a scene) -> skip the transformation + if sweep['transform_matrix'] is not None: + sweep_points[:, :3] = sweep_points[:, :3] @ sweep['transform_matrix'][:3, :3].T + sweep_points[:, :3] += sweep['transform_matrix'][:3, 3] + + sweep_points_list.append(sweep_points) + sweep_times_list.append(sweep['time_lag'] * np.ones((sweep_points.shape[0], 1))) + points = np.concatenate(sweep_points_list, axis=0) + times = np.concatenate(sweep_times_list, axis=0) + + # Final shape: (N, 7) -> [x, y, z, rcs, vx_comp, vy_comp, time] points = np.concatenate((points, times), axis=1) + return points def crop_image(self, input_dict): @@ -222,6 +246,8 @@ def __getitem__(self, index): info = copy.deepcopy(self.infos[index]) points = self.get_lidar_with_sweeps(index, max_sweeps=self.dataset_cfg.MAX_SWEEPS) + #print(f"DEBUG - PATH : {info['lidar_path']}, POINTS SHAPE: {points.shape}") + input_dict = { 'points': points, 'frame_id': Path(info['lidar_path']).stem, @@ -230,7 +256,32 @@ def __getitem__(self, index): if 'gt_boxes' in info: if self.dataset_cfg.get('FILTER_MIN_POINTS_IN_GT', False): - mask = (info['num_lidar_pts'] > self.dataset_cfg.FILTER_MIN_POINTS_IN_GT - 1) + import torch + from pcdet.ops.roiaware_pool3d import roiaware_pool3d_utils + + gt_boxes = info['gt_boxes'] + + # Filter out empty gt_boxes + if len(gt_boxes) == 0: + mask = np.zeros(0, dtype=bool) + else: + boxes_tensor = torch.from_numpy(gt_boxes[:, :7]).float().reshape(-1, 7) + points_tensor = torch.from_numpy(points[:, :3]).float().reshape(-1, 3) + + try: + point_indices = roiaware_pool3d_utils.points_in_boxes_cpu(boxes_tensor, points_tensor).numpy() + except AssertionError: + point_indices = roiaware_pool3d_utils.points_in_boxes_cpu(points_tensor, boxes_tensor).numpy() + + if point_indices.ndim == 2: + point_counts = point_indices.sum(axis=1) + else: + point_counts = np.zeros(gt_boxes.shape[0], dtype=np.int32) + valid_indices = point_indices[point_indices != -1] + unique_boxes, counts = np.unique(valid_indices, return_counts=True) + point_counts[unique_boxes] = counts + + mask = point_counts >= self.dataset_cfg.FILTER_MIN_POINTS_IN_GT else: mask = None @@ -261,8 +312,8 @@ def evaluation(self, det_annos, class_names, **kwargs): nusc_annos = nuscenes_utils.transform_det_annos_to_nusc_annos(det_annos, nusc) nusc_annos['meta'] = { 'use_camera': False, - 'use_lidar': True, - 'use_radar': False, + 'use_lidar': False, + 'use_radar': True, 'use_map': False, 'use_external': False, } diff --git a/pcdet/datasets/nuscenes/nuscenes_utils.py b/pcdet/datasets/nuscenes/nuscenes_utils.py index 4e2421d15..01e21a925 100644 --- a/pcdet/datasets/nuscenes/nuscenes_utils.py +++ b/pcdet/datasets/nuscenes/nuscenes_utils.py @@ -161,7 +161,7 @@ def get_available_scenes(nusc): scene_token = scene['token'] scene_rec = nusc.get('scene', scene_token) sample_rec = nusc.get('sample', scene_rec['first_sample_token']) - sd_rec = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP']) + sd_rec = nusc.get('sample_data', sample_rec['data']['RADAR_FRONT']) has_more_frames = True scene_not_exist = False while has_more_frames: @@ -314,8 +314,8 @@ def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, m val_nusc_infos = [] progress_bar = tqdm.tqdm(total=len(nusc.sample), desc='create_info', dynamic_ncols=True) - ref_chan = 'LIDAR_TOP' # The radar channel from which we track back n sweeps to aggregate the point cloud. - chan = 'LIDAR_TOP' # The reference channel of the current sample_rec that the point clouds are mapped to. + ref_chan = 'RADAR_FRONT' # The radar channel from which we track back n sweeps to aggregate the point cloud. + chan = 'RADAR_FRONT' # The reference channel of the current sample_rec that the point clouds are mapped to. for index, sample in enumerate(nusc.sample): progress_bar.update() @@ -440,7 +440,7 @@ def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, m # the filtering gives 0.5~1 map improvement num_lidar_pts = np.array([anno['num_lidar_pts'] for anno in annotations]) num_radar_pts = np.array([anno['num_radar_pts'] for anno in annotations]) - mask = (num_lidar_pts + num_radar_pts > 0) + mask = (num_radar_pts > -1) locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3) dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3)[:, [1, 0, 2]] # wlh == > dxdydz (lwh) @@ -488,7 +488,7 @@ def boxes_lidar_to_nusenes(det_info): def lidar_nusc_box_to_global(nusc, boxes, sample_token): s_record = nusc.get('sample', sample_token) - sample_data_token = s_record['data']['LIDAR_TOP'] + sample_data_token = s_record['data']['RADAR_FRONT'] sd_record = nusc.get('sample_data', sample_data_token) cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) diff --git a/pcdet/datasets/our_data/base_point_processor.py b/pcdet/datasets/our_data/base_point_processor.py new file mode 100644 index 000000000..ba79445f0 --- /dev/null +++ b/pcdet/datasets/our_data/base_point_processor.py @@ -0,0 +1,375 @@ +import numpy as np +from abc import abstractmethod + +use_SNR = False +ALIGN_RCS_DISTRIBUTION = False + +RCS_MEAN = -5.23 +RCS_STD = 15.30 + +class BasePointProcessor: + def __init__(self, radar_offset_tx, radar_offset_ty, radar_offset_yaw, n_frames, COMP_POINTS_MOTION = False): + self.radar_offset_tx = radar_offset_tx + self.radar_offset_ty = radar_offset_ty + self.radar_offset_yaw = np.deg2rad(radar_offset_yaw) + self.COMP_POINTS_MOTION = COMP_POINTS_MOTION + + self.vel_x = 0.0 + self.vel_y = 0.0 + self.vel_yaw = 0.0 + self.img = None + + self.bins = [] + self.means = [] + self.stds = [] + self.new_pc_arrived = False + + self.timestamp_last_frame_left = 0 + self.timestamp_last_frame_right = 0 + + self.timestamp_current_odom = 0 + self.timestamp_last_odom = 0 + + self.previous_vel_x = 0.0 + self.previous_vel_y = 0.0 + self.previous_vel_yaw = 0.0 + + self.n_frames = n_frames + self.points_per_frame = [] + self.timestamp_last_frame = 0 + self.dt = 0 + self.multiframe_points = np.empty((0, 7), dtype=np.float32) # [x, y, z, intensity, time, frame_id, velocity] + + def add_timestamp(self, timestamp): + if self.timestamp_last_frame != 0: + self.dt = (timestamp - self.timestamp_last_frame) * 1e-9 # Convert nanoseconds to seconds + + #print(f"New frame timestamp: {timestamp}, dt from last frame: {self.dt:.3f} seconds") + + self.timestamp_last_frame = timestamp + + def add_auxiliar_cloud(self, points, timestamp, shift_x = 0.0, shift_y = 0.0, yaw = 0.0, v_x = None, v_y = None, v_yaw = None): + # print("**** DEBUG *********") + # print(f"Point 0 : {points[0]}, shift x, y, yaw {shift_x, shift_y, yaw}") + yaw_in_radians = np.deg2rad(yaw) + # print(f"Yaw in radians {yaw_in_radians}") + + total_shift_x = shift_x + self.radar_offset_tx + total_shift_y = shift_y + self.radar_offset_ty + total_shift_yaw = yaw_in_radians + self.radar_offset_yaw + + processed_points = self.processPointsSingleFrame(points, timestamp, total_shift_x, total_shift_y, total_shift_yaw, v_x, v_y, v_yaw) + # print(f"Point 0 after processing: {processed_points[0]}") + + #Adding time factor that shift the points of the auxiliary cloud compared to the main cloud + dt_aux = (timestamp - self.timestamp_last_frame) * 1e-9 # Convert nanoseconds to seconds + + if v_x is not None and v_y is not None and v_yaw is not None: + current_v_x, current_v_y, current_v_yaw = v_x, v_y, v_yaw + else: + current_v_x, current_v_y, current_v_yaw = self.calculate_interpolated_velocity(timestamp) + + additional_shift_x = dt_aux * (current_v_x - current_v_yaw * total_shift_y) + additional_shift_y = dt_aux * (current_v_y + current_v_yaw * total_shift_x) + additional_shift_yaw = current_v_yaw * dt_aux + + # print(f"Auxiliary cloud timestamp: {timestamp}, main cloud timestamp: {self.timestamp_last_frame}, dt_aux: {dt_aux:.3f} seconds") + # print(f"Current velocity (x, y, yaw): {current_v_x:.2f} m/s, {current_v_y:.2f} m/s, {np.rad2deg(current_v_yaw):.2f} deg/s") + # print(f"Additional shift for auxiliary cloud due to velocity: {additional_shift_x:.2f} m, {additional_shift_y:.2f} m, {np.rad2deg(additional_shift_yaw):.2f} deg") + + shift_x -= additional_shift_x + shift_y += additional_shift_y + yaw_in_radians += additional_shift_yaw + + # print(f"Total shift applied to auxiliary cloud: {shift_x:.2f} m, {shift_y:.2f} m, {np.rad2deg(yaw_in_radians):.2f} deg") + + rotated_points = self.rotate_points(processed_points, shift_x, shift_y, yaw_in_radians) + # print(f"Point 0 after rotation: {rotated_points[0]}") + + n_points_before = self.multiframe_points.shape[0] + self.points_per_frame[-1] += rotated_points.shape[0] + + self.multiframe_points = np.vstack([self.multiframe_points, rotated_points]) + + #print(f"Added {rotated_points.shape[0]} points from auxiliary cloud. Total points before: {n_points_before}, after: {self.multiframe_points.shape[0]}") + + #print(f"Tot points in vector: {sum(self.points_per_frame)}") + #print(f"Current multiframe points shape: {self.multiframe_points.shape}") + + return + + + def processPoints(self, points, vx = None, vy=None, v_yaw=None): + self.new_pc_arrived = False + + if vx is None or vy is None or v_yaw is None: + vx, vy, v_yaw = self.calculate_interpolated_velocity(self.timestamp_last_frame) + + processed_points = self.processPointsSingleFrame(points, self.timestamp_last_frame, self.radar_offset_tx, self.radar_offset_ty, self.radar_offset_yaw, vx, vy, v_yaw) + + + if len(self.points_per_frame) >= self.n_frames: + self.multiframe_points = self.multiframe_points[self.points_per_frame[0]:, :] + self.points_per_frame.pop(0) # Remove the oldest frame + + self.points_per_frame.append(len(processed_points)) + + self.multiframe_points = self.transposeFrame(self.multiframe_points, vx, vy, v_yaw) + self.multiframe_points[:, 6] = self.updateTimestamp(self.multiframe_points[:, 6]) + + if self.COMP_POINTS_MOTION: + self.multiframe_points = self.compensate_points_motion(self.multiframe_points, self.dt) + + self.multiframe_points = np.vstack([self.multiframe_points, processed_points]) + + + # print(f"Tot points in vector: {sum(self.points_per_frame)}") + # print(f"Current multiframe points shape: {self.multiframe_points.shape}") + # if len(self.points_per_frame) == 5: + # print(f"Sample of points frame 5: {self.multiframe_points[0:2, :]}") + # print(f"Sample of points frame 4: {self.multiframe_points[self.points_per_frame[0]:self.points_per_frame[0]+2, :]}") + # print(f"Sample of points frame 3: {self.multiframe_points[self.points_per_frame[0]+self.points_per_frame[1]:self.points_per_frame[0]+self.points_per_frame[1]+2, :]}") + # print(f"Sample of points frame 2: {self.multiframe_points[self.points_per_frame[0]+self.points_per_frame[1]+self.points_per_frame[2]:self.points_per_frame[0]+self.points_per_frame[1]+self.points_per_frame[2]+2, :]}") + # print(f"Sample of points frame 1: {self.multiframe_points[self.points_per_frame[0]+self.points_per_frame[1]+self.points_per_frame[2]+self.points_per_frame[3]:self.points_per_frame[0]+self.points_per_frame[1]+self.points_per_frame[2]+self.points_per_frame[3]+2, :]}") + + return self.multiframe_points + + def transposeFrame(self, points, vx, vy, v_yaw): + d_theta = -v_yaw * self.dt + cos_yaw = np.cos(d_theta) + sin_yaw = np.sin(d_theta) + + tx = self.radar_offset_tx + ty = self.radar_offset_ty + + # 1. Move points from the Old Radar bumper to the Old IMU center + p_imu_x = points[:, 0] + tx + p_imu_y = points[:, 1] + ty + + vx = vx - v_yaw * self.radar_offset_ty + vy = vy + v_yaw * self.radar_offset_tx + + # 2. Translate the points backwards by the IMU's movement + p_shifted_x = p_imu_x - vx * self.dt + p_shifted_y = p_imu_y + vy * self.dt + + # 3. Rotate the points around the new IMU center (Left-Handed coordinates) + p_rotated_x = p_shifted_x * cos_yaw - p_shifted_y * sin_yaw + p_rotated_y = p_shifted_x * sin_yaw + p_shifted_y * cos_yaw + + # 4. Move the points from the New IMU center back out to the New Radar bumper + points[:, 0] = p_rotated_x - tx + points[:, 1] = p_rotated_y - ty + + # 5. Rotate velocities + # (Velocities are directional vectors, they do not suffer from Lever Arm translation) + points = self.rotate_velocities(points, d_theta) + + return points + + def add_random_z(self,points): + + N = points.shape[0] + + # If the frame is completely empty, just return it + if N == 0: + return points + + # ========================================== + # TRICK 1: THE Z-AXIS INFLATION + # ========================================== + + points[:, 2] = np.random.uniform(0.3, 1.5, size=N) + + return points + + def snr_to_fake_rcs(self, points, snr_mean=None, snr_std=None): + VOD_RCS_MEAN = -12.0 + VOD_RCS_STD = 12.0 + + # Extract your raw SNR column + snr = points[:, 3] + + # If not provided SNR stats, calculate them on the fly for this frame + if snr_mean is None: + snr_mean = np.mean(snr) + if snr_std is None: + snr_std = np.std(snr) + 1e-6 # Add tiny epsilon to prevent division by zero + + # Standardize SNR, then scale it to VoD's RCS distribution + fake_rcs = ((snr - snr_mean) / snr_std) * VOD_RCS_STD + VOD_RCS_MEAN + + # Overwrite the SNR column with our fake RCS values + points[:, 3] = fake_rcs + + return points + + def convert_intensity_to_rcs(self, rcs_norm): + MAX_RCS = 100 + MIN_RCS = -100 + + rcs = rcs_norm * (MAX_RCS - MIN_RCS) + MIN_RCS + + #Overall RCS mean across frames: -5.23, Overall RCS std across frames: 15.30 + + + # filtered_rcs = rcs[rcs != 0] # Filter + # self.bins.append(np.histogram(filtered_rcs, range=(-60, 40), bins=1000)[0]) + + # #debug + + # rcs_mean = np.mean(filtered_rcs) + # rcs_std = np.std(filtered_rcs) + + # self.means.append(rcs_mean) + # self.stds.append(rcs_std) + + if ALIGN_RCS_DISTRIBUTION: + + rcs = self.alignRCSDistribution(rcs) + + # print("RCS stats - mean: {:.2f}, std: {:.2f}".format(np.mean(rcs), np.std(rcs))) + # print("Sample RCS values: ", rcs[:10]) + + return rcs + + def filter_invalid_points(self, points): + #valid_points = points[(np.abs(points[:, 0]) > 1.0) & (np.abs(points[:, 0]) < 51.2) & (np.abs(points[:, 1]) > 1.0) & (np.abs(points[:, 1]) < 25.6)] # Filter out points with x=0 (assuming these are invalid) + valid_points = points[(np.abs(points[:, 0]) > 1.0) & (np.abs(points[:, 1]) > 1.0)] + + valid_points = self.filter_valid_speed_points(valid_points) + + return valid_points + + def print_bins(self): + if self.bins: + overall_histogram = np.sum(self.bins, axis=0) + print(f"Overall RCS histogram (sum of all frames): {overall_histogram}") + print(f"Overall RCS mean across frames: {np.mean(self.means):.2f}, Overall RCS std across frames: {np.mean(self.stds):.2f}") + print(f"Average std of RCS across frames: {np.mean(self.stds):.2f}") + + def calculate_interpolated_velocity(self, timestamp_pc): + if self.timestamp_last_odom == 0: + return self.vel_x, self.vel_y, self.vel_yaw # No previous velocity, return current as is + + interpolated_vel_x = self.interpolate1d(self.timestamp_last_odom, self.timestamp_current_odom, timestamp_pc, self.previous_vel_x, self.vel_x) + interpolated_vel_y = self.interpolate1d(self.timestamp_last_odom, self.timestamp_current_odom, timestamp_pc, self.previous_vel_y, self.vel_y) + interpolated_vel_yaw = self.interpolate1d(self.timestamp_last_odom, self.timestamp_current_odom, timestamp_pc, self.previous_vel_yaw, self.vel_yaw) + + return interpolated_vel_x, interpolated_vel_y, interpolated_vel_yaw + + def interpolate1d(self, x0, x1, xt, y0, y1): + if x1 - x0 == 0: + return y0 # Avoid division by zero, return y0 as fallback + return y0 + (y1 - y0) * ((xt - x0) / (x1 - x0)) + + def add_odometry(self, vel_x, vel_y, vel_yaw, timestamp): + if self.timestamp_last_odom == 0: #First odometry message fill both previous and current + self.previous_vel_x = vel_x + self.previous_vel_y = vel_y + self.previous_vel_yaw = vel_yaw + self.timestamp_last_odom = timestamp + else: + self.previous_vel_x = self.vel_x + self.previous_vel_y = self.vel_y + self.previous_vel_yaw = self.vel_yaw + self.timestamp_last_odom = self.timestamp_current_odom + + self.vel_x = vel_x + self.vel_y = vel_y + self.vel_yaw = vel_yaw + + self.timestamp_current_odom = timestamp + + def calculate_compensated_velocity(self, points, shift_x, shift_y, shift_yaw, v_x, v_y, v_yaw, timestamp_pc): + """ + Calculates the absolute compensated radial velocity for radar points. + + Args: + points: (N, 7) numpy array where columns are [x, y, z, intensity, RCS, max_v_r, v_r, v_r_comp, time] + timestamp_pc: Timestamp of the point cloud + + Returns: + v_comp: (N,) numpy array of compensated velocities + """ + x = points[:, 0] + y = points[:, 1] + z = points[:, 2] + + # print("points shape: ", points.shape) + + v_meas = points[:, 6] # The raw Doppler velocity from the radar + + if v_x is None or v_y is None or v_yaw is None: + v_x, v_y, omega_z = self.calculate_interpolated_velocity(timestamp_pc) + # print(f"Interpolated velocity used for compensation: v_x={v_x:.2f} m/s, v_y={v_y:.2f} m/s, v_yaw={omega_z:.2f} rad/s") + else: + v_x, v_y, omega_z = v_x, v_y, v_yaw + # print(f"Interpolated velocity used for compensation from parameters: v_x={v_x:.2f} m/s, v_y={v_y:.2f} m/s, v_yaw={omega_z:.2f} rad/s") + + t_x, t_y, yaw = shift_x, shift_y, shift_yaw + + # radar sensor's physical velocity + v_sens_x = v_x - (omega_z * t_y) + v_sens_y = v_y + (omega_z * t_x) + + # Rotate velocity into the radar's local coordinate frame + cos_y = np.cos(yaw) + sin_y = np.sin(yaw) + v_rad_x = v_sens_x * cos_y + v_sens_y * sin_y + v_rad_y = -v_sens_x * sin_y + v_sens_y * cos_y + + # Distance from radar to point + dist = np.sqrt(x**2 + y**2 + z**2) + + # Avoid division by zero for points exactly at (0,0,0) + dist = np.clip(dist, a_min=1e-6, a_max=None) + + self.u_x = x / dist + self.u_y = y / dist + + # Ego velocity + v_ego_los = (v_rad_x * self.u_x) + (v_rad_y * self.u_y) + + # Compensate the raw measurement + v_comp = v_meas + v_ego_los + + # print("EGO VELOCITY (X, Y, YAW): ", v_x, v_y, omega_z) + # print("MEAN RADIAL VELOCITY BEFORE COMPENSATION: ", np.mean(v_meas)) + # print("MEAN EGO LOS VELOCITY: ", np.mean(v_ego_los)) + # print("MEAN RADIAL VELOCITY AFTER COMPENSATION: ", np.mean(v_comp)) + + return v_comp + + @abstractmethod + def processPointsSingleFrame(self, points, timestamp_pc, shift_x, shift_y, shift_yaw, vx, vy, v_yaw): + """Must be implemented by child classes (VoD vs NuScenes)""" + pass + + @abstractmethod + def rotate_points(self, points, shift_x, shift_y, yaw): + """Must be implemented by child classes (VoD vs NuScenes)""" + pass + + @abstractmethod + def alignRCSDistribution(self, rcs): + """Must be implemented by child classes (VoD vs NuScenes)""" + pass + + @abstractmethod + def updateTimestamp(self, timestamp): + """Must be implemented by child classes (VoD vs NuScenes)""" + pass + + @abstractmethod + def filter_valid_speed_points(self, points): + """Must be implemented by child classes (VoD vs NuScenes)""" + pass + + @abstractmethod + def compensate_points_motion(self, points, dt): + pass + + @abstractmethod + def rotate_velocities(self, points, yaw): + pass diff --git a/pcdet/datasets/our_data/custom_dataset.py b/pcdet/datasets/our_data/custom_dataset.py new file mode 100644 index 000000000..9e0a7ce76 --- /dev/null +++ b/pcdet/datasets/our_data/custom_dataset.py @@ -0,0 +1,413 @@ +import copy +import pickle +import os +import json +from pypcd4 import PointCloud +import torch + +import numpy as np + +from ...ops.roiaware_pool3d import roiaware_pool3d_utils +from ...utils import box_utils, common_utils +from ..dataset import DatasetTemplate +from .point_processor_nuscenes import PointProcessorNuscenes + + +class CustomDataset(DatasetTemplate): + def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None): + """ + Args: + root_path: + dataset_cfg: + class_names: + training: + logger: + """ + super().__init__( + dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger + ) + self.mode_with_labels = self.dataset_cfg.get('MODE_WITH_LABELS', ["train", "val", "test"]) + + self.auxiliary_radar = self.dataset_cfg.get('AUXILIARY_RADAR', ["right", "left"]) + self.COMP_POINTS_MOTIONS = self.dataset_cfg.get('COMP_POINTS_MOTIONS', False) + + print(f"Front radar will be used for motion compensation: {self.COMP_POINTS_MOTIONS}") + print(f"Auxiliary radars: {self.auxiliary_radar}") + #self.auxiliary_radar = self.dataset_cfg.get('AUXILIARY_RADAR', []) + self.auxiliary_radar_calib = {} + self.split = self.dataset_cfg.DATA_SPLIT[self.mode] + + self.radar_infos = {} + self.indexes_str = [] + + self.include_data(self.mode) + self.map_class_to_kitti = {"car": "Car", "bicycle": "Cyclist", "pedestrian": "Pedestrian", "Car": "Car", "Pedestrian": "Pedestrian", "Cyclist": "Cyclist", "Truck": "Truck", "Bus": "Bus", "Motorcycle": "Motorcycle", "Bicycle": "Bicycle", "Traffic_cone": "DontCare", "Barrier": "DontCare"} + + self.point_processor = PointProcessorNuscenes( + radar_offset_tx=self.front_radar_calib['shift_from_odom_x'], + radar_offset_ty=self.front_radar_calib['shift_from_odom_y'], + radar_offset_yaw=self.front_radar_calib['shift_from_odom_yaw'], + n_frames=self.dataset_cfg.MAX_SWEEPS, + COMP_POINTS_MOTION=self.COMP_POINTS_MOTIONS + ) + + def include_data(self, mode): + self.logger.info('Loading Custom dataset.') + with open(self.root_path / mode / 'metadata/calibration.json', 'r') as f: + calib_data = json.load(f) + self.front_radar_calib = calib_data['front'] + + for radar in self.auxiliary_radar: + self.auxiliary_radar_calib[radar] = calib_data[radar] + + with open(self.root_path / mode / 'metadata/radar_front_linked_interpolated.json', 'r') as f: + radar_front_data = json.load(f) + self.radar_infos['front'] = radar_front_data + for item in radar_front_data: + key = list(item.keys())[0] + self.indexes_str.append(key) + + for radar in self.auxiliary_radar: + with open(self.root_path / mode / f'metadata/radar_{radar}_interpolated.json', 'r') as f: + radar_data = json.load(f) + self.radar_infos[radar] = radar_data + + if self.mode in self.mode_with_labels: + self.labels_idx = [] + files = os.listdir(self.root_path / self.mode / 'label') + for file in files: + if file.endswith('.json'): + idx = file.split('.')[0] + self.labels_idx.append(idx) + + + print(f"Front radar calibration: {self.front_radar_calib}") + print(f"Auxiliary radar calibration: {self.auxiliary_radar_calib}") + print(f"Front radar infos length: {len(self.radar_infos['front'])}") + for radar in self.auxiliary_radar: + print(f"{radar.capitalize()} radar infos length: {len(self.radar_infos[radar])}") + + def get_label(self, idx): + label_file = self.root_path / self.mode / 'label' / ('%s.json' % str(idx).zfill(6)) + gt_boxes = [] + gt_names = [] + with open(label_file, 'r') as f: + label_data = json.load(f) + for item in label_data: + gt_boxes.append([item['psr']['position']['x'], item['psr']['position']['y'], item['psr']['position']['z'], item['psr']['scale']['x'], item['psr']['scale']['y'], item['psr']['scale']['z'], item['psr']['rotation']['z']]) + gt_names.append(item['obj_type']) + + return np.array(gt_boxes, dtype=np.float32), np.array(gt_names) + + def get_radar(self, idx, radar_name): + radar_file = self.root_path / self.mode / 'radar' / radar_name / f'{str(idx).zfill(6)}.pcd' + if not radar_file.exists(): + return None + pcd = PointCloud.from_path(str(radar_file)) + point_features = pcd.numpy() + #print(f"Loaded radar data from {radar_file}, shape: {point_features.shape}") + #print(f"Loaded radar data from {radar_file}, shape: {point_features.shape}") + return point_features + + def set_split(self, split): + super().__init__( + dataset_cfg=self.dataset_cfg, class_names=self.class_names, training=self.training, + root_path=self.root_path, logger=self.logger + ) + self.split = split + + split_dir = self.root_path / self.mode / 'ImageSets' / (self.split + '.txt') + self.sample_id_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None + + def __len__(self): + if self.mode in self.mode_with_labels: + return len(self.labels_idx) + return len(self.indexes_str) + + def get_velocity(self, radar_name, idx_): + key = list(self.radar_infos[radar_name][idx_].keys())[0] + vx_front = self.radar_infos[radar_name][idx_][key]["v_linear_x"] + vy_front = self.radar_infos[radar_name][idx_][key]["v_linear_y"] + v_yaw_front = self.radar_infos[radar_name][idx_][key]["v_angular_z"] + return vx_front, vy_front, v_yaw_front + + def get_aux_calib(self, radar_name): + shift_from_front_x = self.auxiliary_radar_calib[radar_name]['shift_from_front_x'] + shift_from_front_y = self.auxiliary_radar_calib[radar_name]['shift_from_front_y'] + shift_from_front_z = self.auxiliary_radar_calib[radar_name]['shift_from_front_z'] + shift_from_front_roll = self.auxiliary_radar_calib[radar_name]['shift_from_front_roll'] + shift_from_front_pitch = self.auxiliary_radar_calib[radar_name]['shift_from_front_pitch'] + shift_from_front_yaw = self.auxiliary_radar_calib[radar_name]['shift_from_front_yaw'] + return shift_from_front_x, shift_from_front_y, shift_from_front_z, shift_from_front_roll, shift_from_front_pitch, shift_from_front_yaw + + + def get_merged_sweep_radar(self, index): + # print(f"Processing index: {index}") + for i in range(self.dataset_cfg.MAX_SWEEPS, 0, -1): + idx_ = index - (i - 1) + if 0 <= idx_ < len(self.indexes_str): + points = self.get_radar(idx_, 'front') + if points is None: + continue + key = list(self.radar_infos['front'][idx_].keys())[0] + timestamp = self.radar_infos['front'][idx_][key]["timestamp"] + vx_front, vy_front, v_yaw_front = self.get_velocity('front', idx_) + + self.point_processor.add_timestamp(timestamp) + + self.point_processor.processPoints(points, vx_front, vy_front, v_yaw_front) + + for radar in self.auxiliary_radar: + # print(f"Processing auxiliary radar: {radar} for index: {idx_}") + idx_aux = self.radar_infos['front'][idx_][key][f"radar_{radar}"] + idx_aux = int(idx_aux) + points_aux = self.get_radar(idx_aux, radar) + if points_aux is None: + continue + # print(f"Loaded auxiliary radar data from {radar} radar, shape: {points_aux.shape}") + calib = self.auxiliary_radar_calib[radar] + + + key_aux = list(self.radar_infos[radar][idx_aux].keys())[0] + timestamp_aux = self.radar_infos[radar][idx_aux][key_aux]["timestamp"] + vx_aux, vy_aux, v_yaw_aux = self.get_velocity(radar, idx_aux) + # print(f"Auxiliary radar velocity for {radar} radar: vx={vx_aux:.2f} m/s, vy={vy_aux:.2f} m/s, v_yaw={v_yaw_aux:.2f} rad/s") + offset_x, offset_y, _, _, _, offset_yaw = self.get_aux_calib(radar) + self.point_processor.add_auxiliar_cloud(points_aux, timestamp_aux, offset_x, offset_y, offset_yaw, vx_aux, vy_aux, v_yaw_aux) + + return self.point_processor.multiframe_points + + def filter_gt_boxes(self, gt_boxes, gt_names, points): + boxes_tensor = torch.from_numpy(gt_boxes[:, :7]).float().reshape(-1, 7) + points_tensor = torch.from_numpy(points[:, :3]).float().reshape(-1, 3) + + try: + point_indices = roiaware_pool3d_utils.points_in_boxes_cpu(boxes_tensor, points_tensor).numpy() + except AssertionError: + point_indices = roiaware_pool3d_utils.points_in_boxes_cpu(points_tensor, boxes_tensor).numpy() + + if point_indices.ndim == 2: + point_counts = point_indices.sum(axis=1) + else: + point_counts = np.zeros(gt_boxes.shape[0], dtype=np.int32) + valid_indices = point_indices[point_indices != -1] + unique_boxes, counts = np.unique(valid_indices, return_counts=True) + point_counts[unique_boxes] = counts + + mask = point_counts >= self.dataset_cfg.FILTER_MIN_POINTS_IN_GT + return gt_boxes[mask], gt_names[mask] + + def __getitem__(self, index): + if self.mode in self.mode_with_labels: + index = int(self.labels_idx[index]) + + points = self.get_merged_sweep_radar(index) + + + input_dict = { + 'frame_id': str(index).zfill(6), + 'points': points + } + + if self.mode in self.mode_with_labels: + gt_boxes, gt_names = self.get_label(index) + # print(f"Loaded GT boxes for index {index}: {gt_boxes}") + + if self.dataset_cfg.get('FILTER_MIN_POINTS_IN_GT', False): + # print(f"Filtering {gt_boxes.shape[0]} GT boxes for index {index} with minimum points in GT: {self.dataset_cfg.FILTER_MIN_POINTS_IN_GT}") + gt_boxes, gt_names = self.filter_gt_boxes(gt_boxes, gt_names, points) + # print(f"After filtering, {gt_boxes.shape[0]} GT boxes remain for index {index}.") + + input_dict.update({ + 'gt_names': gt_names, + 'gt_boxes': gt_boxes + }) + + data_dict = self.prepare_data(data_dict=input_dict) + + return data_dict + + def evaluation(self, det_annos, class_names, **kwargs): + + def kitti_eval(eval_det_annos, eval_gt_annos, map_name_to_kitti): + from ..kitti.kitti_object_eval_python import eval as kitti_eval + from ..kitti import kitti_utils + + kitti_utils.transform_annotations_to_kitti_format(eval_det_annos, map_name_to_kitti=map_name_to_kitti) + kitti_utils.transform_annotations_to_kitti_format( + eval_gt_annos, map_name_to_kitti=map_name_to_kitti, + info_with_fakelidar=self.dataset_cfg.get('INFO_WITH_FAKELIDAR', False) + ) + kitti_class_names = [map_name_to_kitti[x] for x in class_names] + ap_result_str, ap_dict = kitti_eval.get_official_eval_result( + gt_annos=eval_gt_annos, dt_annos=eval_det_annos, current_classes=kitti_class_names + ) + return ap_result_str, ap_dict + + eval_det_annos = copy.deepcopy(det_annos) + eval_gt_annos = [] + for i in range(self.__len__()): + gt_boxes, gt_names = self.get_label(self.labels_idx[i]) + points = self.get_merged_sweep_radar(i) + gt_boxes, gt_names = self.filter_gt_boxes(gt_boxes, gt_names, points) + + data_dict = { + 'frame_id': str(self.labels_idx[i]).zfill(6), + 'gt_names': gt_names, + 'gt_boxes_lidar': gt_boxes + } + eval_gt_annos.append(data_dict) + + + + if kwargs['eval_metric'] == 'kitti': + ap_result_str, ap_dict = kitti_eval(eval_det_annos, eval_gt_annos, self.map_class_to_kitti) + else: + raise NotImplementedError + + return ap_result_str, ap_dict + + # NOT USED + def get_infos(self, class_names, num_workers=4, has_label=True, sample_id_list=None, num_features=4): + import concurrent.futures as futures + + def process_single_scene(sample_idx): + # print('%s sample_idx: %s' % (self.split, sample_idx)) + info = {} + pc_info = {'num_features': num_features, 'lidar_idx': sample_idx} + info['point_cloud'] = pc_info + + if has_label: + annotations = {} + gt_boxes_lidar, name = self.get_label(sample_idx) + annotations['name'] = name + annotations['gt_boxes_lidar'] = gt_boxes_lidar[:, :7] + info['annos'] = annotations + + return info + + sample_id_list = sample_id_list if sample_id_list is not None else self.sample_id_list + + # create a thread pool to improve the velocity + with futures.ThreadPoolExecutor(num_workers) as executor: + infos = executor.map(process_single_scene, sample_id_list) + return list(infos) + + # NOT USED + def create_groundtruth_database(self, info_path=None, used_classes=None, split='train'): + import torch + + database_save_path = Path(self.root_path) / ('gt_database' if split == 'train' else ('gt_database_%s' % split)) + db_info_save_path = Path(self.root_path) / ('custom_dbinfos_%s.pkl' % split) + + database_save_path.mkdir(parents=True, exist_ok=True) + all_db_infos = {} + + with open(info_path, 'rb') as f: + infos = pickle.load(f) + + for k in range(len(infos)): + # print('gt_database sample: %d/%d' % (k + 1, len(infos))) + info = infos[k] + sample_idx = info['point_cloud']['lidar_idx'] + points = self.get_radar(sample_idx, 'front') + annos = info['annos'] + names = annos['name'] + gt_boxes = annos['gt_boxes_lidar'] + + num_obj = gt_boxes.shape[0] + point_indices = roiaware_pool3d_utils.points_in_boxes_cpu( + torch.from_numpy(points[:, 0:3]), torch.from_numpy(gt_boxes) + ).numpy() # (nboxes, npoints) + + for i in range(num_obj): + filename = '%s_%s_%d.bin' % (sample_idx, names[i], i) + filepath = database_save_path / filename + gt_points = points[point_indices[i] > 0] + + gt_points[:, :3] -= gt_boxes[i, :3] + with open(filepath, 'w') as f: + gt_points.tofile(f) + + if (used_classes is None) or names[i] in used_classes: + db_path = str(filepath.relative_to(self.root_path)) # gt_database/xxxxx.bin + db_info = {'name': names[i], 'path': db_path, 'gt_idx': i, + 'box3d_lidar': gt_boxes[i], 'num_points_in_gt': gt_points.shape[0]} + if names[i] in all_db_infos: + all_db_infos[names[i]].append(db_info) + else: + all_db_infos[names[i]] = [db_info] + + # Output the num of all classes in database + for k, v in all_db_infos.items(): + print('Database %s: %d' % (k, len(v))) + + with open(db_info_save_path, 'wb') as f: + pickle.dump(all_db_infos, f) + + @staticmethod + def create_label_file_with_name_and_box(class_names, gt_names, gt_boxes, save_label_path): + with open(save_label_path, 'w') as f: + for idx in range(gt_boxes.shape[0]): + boxes = gt_boxes[idx] + name = gt_names[idx] + if name not in class_names: + continue + line = "{x} {y} {z} {l} {w} {h} {angle} {name}\n".format( + x=boxes[0], y=boxes[1], z=(boxes[2]), l=boxes[3], + w=boxes[4], h=boxes[5], angle=boxes[6], name=name + ) + f.write(line) + +# NOT USED +def create_custom_infos(dataset_cfg, class_names, data_path, save_path, workers=4): + dataset = CustomDataset( + dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, + training=False, logger=common_utils.create_logger() + ) + train_split, val_split = 'train', 'val' + num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list) + + train_filename = save_path / ('custom_infos_%s.pkl' % train_split) + val_filename = save_path / ('custom_infos_%s.pkl' % val_split) + + print('------------------------Start to generate data infos------------------------') + + dataset.set_split(train_split) + custom_infos_train = dataset.get_infos( + class_names, num_workers=workers, has_label=True, num_features=num_features + ) + with open(train_filename, 'wb') as f: + pickle.dump(custom_infos_train, f) + print('Custom info train file is saved to %s' % train_filename) + + dataset.set_split(val_split) + custom_infos_val = dataset.get_infos( + class_names, num_workers=workers, has_label=True, num_features=num_features + ) + with open(val_filename, 'wb') as f: + pickle.dump(custom_infos_val, f) + print('Custom info train file is saved to %s' % val_filename) + + print('------------------------Start create groundtruth database for data augmentation------------------------') + dataset.set_split(train_split) + dataset.create_groundtruth_database(train_filename, split=train_split) + print('------------------------Data preparation done------------------------') + + +if __name__ == '__main__': + import sys + + if sys.argv.__len__() > 1 and sys.argv[1] == 'create_custom_infos': + import yaml + from pathlib import Path + from easydict import EasyDict + + dataset_cfg = EasyDict(yaml.safe_load(open(sys.argv[2]))) + ROOT_DIR = (Path(__file__).resolve().parent / '../../../').resolve() + create_custom_infos( + dataset_cfg=dataset_cfg, + class_names=['Vehicle', 'Pedestrian', 'Cyclist'], + data_path=ROOT_DIR / 'data' / 'custom', + save_path=ROOT_DIR / 'data' / 'custom', + ) diff --git a/pcdet/datasets/our_data/point_processor_nuscenes.py b/pcdet/datasets/our_data/point_processor_nuscenes.py new file mode 100644 index 000000000..290ba972e --- /dev/null +++ b/pcdet/datasets/our_data/point_processor_nuscenes.py @@ -0,0 +1,123 @@ +import numpy as np +from .base_point_processor import BasePointProcessor, RCS_MEAN, RCS_STD, use_SNR +#Mean RCS: 8.70, Std RCS: 7.04 + +# New logs: Overall RCS mean across frames: -6.38, Overall RCS std across frames: 8.14 +NUSCENE_RCS_MEAN = 8.70 +NUSCENE_RCS_STD = 7.04 + +Z_OFFSET = 0.9 + +class PointProcessorNuscenes(BasePointProcessor): + def __init__(self, radar_offset_tx, radar_offset_ty, radar_offset_yaw, n_frames, is_rcs_normalized=False, COMP_POINTS_MOTION = False): + super().__init__(radar_offset_tx, radar_offset_ty, radar_offset_yaw, n_frames, COMP_POINTS_MOTION) + self.is_rcs_normalized = is_rcs_normalized + + + + def rotate_points(self, points, shift_x, shift_y, yaw): + cos_yaw = np.cos(yaw) + sin_yaw = np.sin(yaw) + + # 1. Rotate FIRST + x_rotated = points[:, 0] * cos_yaw - points[:, 1] * sin_yaw + y_rotated = points[:, 0] * sin_yaw + points[:, 1] * cos_yaw + + # 2. Add translation SECOND + points[:, 0] = x_rotated - shift_x + points[:, 1] = y_rotated + shift_y + + points = self.rotate_velocities(points, yaw) + + return points + + def rotate_velocities(self, points, yaw): + cos_yaw = np.cos(yaw) + sin_yaw = np.sin(yaw) + + vx = points[:, 4].copy() + vy = points[:, 5].copy() + + points[:, 4] = vx * cos_yaw - vy * sin_yaw + points[:, 5] = vx * sin_yaw + vy * cos_yaw + + return points + + + def calculate_compensated_velocity(self, points, shift_x, shift_y, shift_yaw, vx, vy, v_yaw, timestamp_pc): + """ + Calculates the absolute compensated radial velocity for radar points. + + Args: + points: (N, 7) numpy array where columns are [x, y, z, intensity, RCS, max_v_r, v_r, v_r_comp, time] + + Returns: + v_comp: (N,) numpy array of compensated velocities + """ + v_comp = super().calculate_compensated_velocity(points, shift_x, shift_y, shift_yaw, vx, vy, v_yaw, timestamp_pc) + v_comp_x = v_comp * self.u_x + v_comp_y = v_comp * self.u_y + + v_comp_list = np.column_stack([v_comp_x, v_comp_y]) + + + return v_comp_list + + + def processPointsSingleFrame(self, points, timestamp_pc, shift_x=0.0, shift_y=0.0, shift_yaw=0.0, vx=None, vy=None, v_yaw=None): + #### WATCH OUT: OFFSET ON Z DUE TO RADAR PLACEMENT, CHECK IF IT MATCHES THE ONE IN THE DATASET + #points[:, 2] -= Z_OFFSET # Adjust Z for radar mounting height + # points = points[np.logical_and(points[:, 2] >= -3, points[:, 2] <= 5)] + #points[:, 2] = 0 # Set Z to 0 for BEV processing + + v_comp = self.calculate_compensated_velocity(points, shift_x, shift_y, shift_yaw, vx, vy, v_yaw, timestamp_pc) + + #print("Speed: ", np.shape(radial_ambiguous_velocity)) + #v_comp=np.expand_dims(v_comp, axis=1) + if use_SNR: + snr = np.expand_dims(points[:,4], axis=1) + else: #RCS + snr = points[:,3] + if self.is_rcs_normalized: + snr = self.convert_intensity_to_rcs(snr) + + # #### + # self.bins.append(np.histogram(snr, range=(-30, 70), bins=100)[0]) + # rcs_mean = np.mean(snr) + # rcs_std = np.std(snr) + + # self.means.append(rcs_mean) + # self.stds.append(rcs_std) + # ### + + snr = np.expand_dims(snr, axis=1) + + time_vector = np.zeros((points.shape[0], 1), dtype=points.dtype) + processed_points = np.hstack([points[:, 0:3], snr, v_comp, time_vector]) + + processed_points = self.filter_invalid_points(processed_points) + # [x, y, z, snr, v_comp_x, v_comp_y, time] + + # print("Processed points shape: ", np.shape(processed_points)) + + return processed_points + + def alignRCSDistribution(self, rcs): + rcs = ((rcs - RCS_MEAN) / RCS_STD) * NUSCENE_RCS_STD + NUSCENE_RCS_MEAN + print("Aligned RCS stats - mean: {:.2f}, std: {:.2f}".format(np.mean(rcs), np.std(rcs))) + return rcs + + def updateTimestamp(self, timestamp): + return timestamp + self.dt + + def filter_valid_speed_points(self, points): + # Filter out points with unrealistic speeds (e.g., > 30 m/s) + speed = np.sqrt(points[:, -2]**2 + points[:, -3]**2) + valid_speed_points = points[speed < 60] + return valid_speed_points + + def compensate_points_motion(self, points, dt): + points[:, 0] = points[:, 0] + points[:, 4] * dt + points[:, 1] = points[:, 1] + points[:, 5] * dt + + return points diff --git a/pcdet/datasets/truckscenes/truckscenes_dataset.py b/pcdet/datasets/truckscenes/truckscenes_dataset.py new file mode 100644 index 000000000..a482aa302 --- /dev/null +++ b/pcdet/datasets/truckscenes/truckscenes_dataset.py @@ -0,0 +1,516 @@ +import copy +import pickle +from pathlib import Path + +import numpy as np +from tqdm import tqdm + +from ...ops.roiaware_pool3d import roiaware_pool3d_utils +from ...utils import common_utils +from ..dataset import DatasetTemplate +from pyquaternion import Quaternion +from PIL import Image +from truckscenes.utils.data_classes import RadarPointCloud + + +class TruckScenesDataset(DatasetTemplate): + def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None): + root_path = (root_path if root_path is not None else Path(dataset_cfg.DATA_PATH)) / dataset_cfg.VERSION + super().__init__( + dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger + ) + self.infos = [] + self.camera_config = self.dataset_cfg.get('CAMERA_CONFIG', None) + if self.camera_config is not None: + self.use_camera = self.camera_config.get('USE_CAMERA', True) + self.camera_image_config = self.camera_config.IMAGE + else: + self.use_camera = False + + self.include_truckscenes_data(self.mode) + if self.training and self.dataset_cfg.get('BALANCED_RESAMPLING', False): + self.infos = self.balanced_infos_resampling(self.infos) + + def include_truckscenes_data(self, mode): + self.logger.info('Loading TruckScenes dataset') + truckscenes_infos = [] + + for info_path in self.dataset_cfg.INFO_PATH[mode]: + info_path = self.root_path / info_path + if not info_path.exists(): + continue + with open(info_path, 'rb') as f: + infos = pickle.load(f) + truckscenes_infos.extend(infos) + + self.infos.extend(truckscenes_infos) + self.logger.info('Total samples for TruckScenes dataset: %d' % (len(truckscenes_infos))) + + def balanced_infos_resampling(self, infos): + """ + Class-balanced sampling of nuScenes dataset from https://arxiv.org/abs/1908.09492 + """ + if self.class_names is None: + return infos + + cls_infos = {name: [] for name in self.class_names} + for info in infos: + for name in set(info['gt_names']): + if name in self.class_names: + cls_infos[name].append(info) + + duplicated_samples = sum([len(v) for _, v in cls_infos.items()]) + cls_dist = {k: len(v) / duplicated_samples for k, v in cls_infos.items()} + + sampled_infos = [] + + frac = 1.0 / len(self.class_names) + ratios = [frac / v for v in cls_dist.values()] + + for cur_cls_infos, ratio in zip(list(cls_infos.values()), ratios): + sampled_infos += np.random.choice( + cur_cls_infos, int(len(cur_cls_infos) * ratio) + ).tolist() + self.logger.info('Total samples after balanced resampling: %s' % (len(sampled_infos))) + + cls_infos_new = {name: [] for name in self.class_names} + for info in sampled_infos: + for name in set(info['gt_names']): + if name in self.class_names: + cls_infos_new[name].append(info) + + cls_dist_new = {k: len(v) / len(sampled_infos) for k, v in cls_infos_new.items()} + + return sampled_infos + + def get_sweep(self, sweep_info): + def remove_ego_points(points, center_radius=1.0): + mask = ~((np.abs(points[:, 0]) < center_radius) & (np.abs(points[:, 1]) < center_radius)) + return points[mask] + + lidar_path = self.root_path / sweep_info['lidar_path'] + points_sweep = np.fromfile(str(lidar_path), dtype=np.float32, count=-1).reshape([-1, 5])[:, :4] + points_sweep = remove_ego_points(points_sweep).T + if sweep_info['transform_matrix'] is not None: + num_points = points_sweep.shape[1] + points_sweep[:3, :] = sweep_info['transform_matrix'].dot( + np.vstack((points_sweep[:3, :], np.ones(num_points))))[:3, :] + + cur_times = sweep_info['time_lag'] * np.ones((1, points_sweep.shape[1])) + return points_sweep.T, cur_times.T + + def get_lidar_with_sweeps(self, index, max_sweeps=1): + info = self.infos[index] + lidar_path = self.root_path / info['lidar_path'] + + pc = RadarPointCloud.from_file(str(lidar_path)) + points = pc.points.T + + v_ego_local = np.zeros(3) + if len(info['sweeps']) > 0: + v_x, v_y, v_z = self.compensate_ego_motion(points, info['sweeps'][0]) + + points[:, 3] = v_x + points[:, 4] = v_y + points[:, 5] = v_z + + sweep_points_list = [points] + sweep_times_list = [np.zeros((points.shape[0], 1))] + + if max_sweeps > 1 and len(info['sweeps']) > 0: + #print(f"Sample {index} has {len(info['sweeps'])} sweeps, sampling up to {max_sweeps - 1} sweeps for augmentation.") + num_sweeps = len(info['sweeps']) + choices = np.random.choice(num_sweeps, max_sweeps - 1, replace=False) + + for k in choices: + sweep = info['sweeps'][k] + sweep_path = self.root_path / sweep['lidar_path'] + sweep_pc = RadarPointCloud.from_file(str(sweep_path)) + sweep_points = sweep_pc.points.T + + v_x, v_y, v_z = self.compensate_ego_motion(sweep_points, sweep) + + sweep_points[:, 3] = v_x + sweep_points[:, 4] = v_y + sweep_points[:, 5] = v_z + + if sweep['transform_matrix'] is not None: + sweep_points[:, :3] = sweep_points[:, :3] @ sweep['transform_matrix'][:3, :3].T + sweep_points[:, :3] += sweep['transform_matrix'][:3, 3] + sweep_points[:, 3:6] = sweep_points[:, 3:6] @ sweep['transform_matrix'][:3, :3].T + + sweep_points_list.append(sweep_points) + sweep_times_list.append(sweep['time_lag'] * np.ones((sweep_points.shape[0], 1))) + + all_points = np.concatenate(sweep_points_list, axis=0) + all_times = np.concatenate(sweep_times_list, axis=0) + + combined = np.concatenate((all_points, all_times), axis=1) + final_points = combined[:, [0, 1, 2, 6, 3, 4, 7]] + + return final_points.astype(np.float32) + + def compensate_ego_motion(self, points, info_sweep): + if info_sweep['transform_matrix'] is None or info_sweep['time_lag'] <= 0: + print(f"Warning: No valid transform matrix or non-positive time lag for ego motion compensation. Returning original velocities. {info_sweep}") + return points[:, 3], points[:, 4], points[:, 5] + + translation = info_sweep['transform_matrix'][:3, 3] + v_ego_local = -translation / info_sweep['time_lag'] + #print(f"Ego velocity (local): {v_ego_local}, time lag: {info_sweep['time_lag']}") + #print(f"Mean speed before compensation: {np.mean(points[:, 3])} {np.mean(points[:, 4])} {np.mean(points[:, 5])}") + + d = np.linalg.norm(points[:, :3], axis=1) + ux = points[:, 0] / (d[:] + 1e-8) + uy = points[:, 1] / (d[:] + 1e-8) + uz = points[:, 2] / (d[:] + 1e-8) + + v_ego_local_points = ux * v_ego_local[0] + uy * v_ego_local[1] + uz * v_ego_local[2] + v_points = ux * points[:, 3] + uy * points[:, 4] + uz * points[:, 5] + v_comp = v_points + v_ego_local_points + #print(f"Mean speed after compensation: {np.mean(ux * v_comp)} {np.mean(uy * v_comp)} {np.mean(uz * v_comp)}") + + return ux * v_comp, uy * v_comp, uz * v_comp + + def crop_image(self, input_dict): + W, H = input_dict["ori_shape"] + imgs = input_dict["camera_imgs"] + img_process_infos = [] + crop_images = [] + for img in imgs: + if self.training == True: + fH, fW = self.camera_image_config.FINAL_DIM + resize_lim = self.camera_image_config.RESIZE_LIM_TRAIN + resize = np.random.uniform(*resize_lim) + resize_dims = (int(W * resize), int(H * resize)) + newW, newH = resize_dims + crop_h = newH - fH + crop_w = int(np.random.uniform(0, max(0, newW - fW))) + crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) + else: + fH, fW = self.camera_image_config.FINAL_DIM + resize_lim = self.camera_image_config.RESIZE_LIM_TEST + resize = np.mean(resize_lim) + resize_dims = (int(W * resize), int(H * resize)) + newW, newH = resize_dims + crop_h = newH - fH + crop_w = int(max(0, newW - fW) / 2) + crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) + + # reisze and crop image + img = img.resize(resize_dims) + img = img.crop(crop) + crop_images.append(img) + img_process_infos.append([resize, crop, False, 0]) + + input_dict['img_process_infos'] = img_process_infos + input_dict['camera_imgs'] = crop_images + return input_dict + + def load_camera_info(self, input_dict, info): + input_dict["image_paths"] = [] + input_dict["lidar2camera"] = [] + input_dict["lidar2image"] = [] + input_dict["camera2ego"] = [] + input_dict["camera_intrinsics"] = [] + input_dict["camera2lidar"] = [] + + for _, camera_info in info["cams"].items(): + input_dict["image_paths"].append(camera_info["data_path"]) + + # lidar to camera transform + lidar2camera_r = np.linalg.inv(camera_info["sensor2lidar_rotation"]) + lidar2camera_t = ( + camera_info["sensor2lidar_translation"] @ lidar2camera_r.T + ) + lidar2camera_rt = np.eye(4).astype(np.float32) + lidar2camera_rt[:3, :3] = lidar2camera_r.T + lidar2camera_rt[3, :3] = -lidar2camera_t + input_dict["lidar2camera"].append(lidar2camera_rt.T) + + # camera intrinsics + camera_intrinsics = np.eye(4).astype(np.float32) + camera_intrinsics[:3, :3] = camera_info["camera_intrinsics"] + input_dict["camera_intrinsics"].append(camera_intrinsics) + + # lidar to image transform + lidar2image = camera_intrinsics @ lidar2camera_rt.T + input_dict["lidar2image"].append(lidar2image) + + # camera to ego transform + camera2ego = np.eye(4).astype(np.float32) + camera2ego[:3, :3] = Quaternion( + camera_info["sensor2ego_rotation"] + ).rotation_matrix + camera2ego[:3, 3] = camera_info["sensor2ego_translation"] + input_dict["camera2ego"].append(camera2ego) + + # camera to lidar transform + camera2lidar = np.eye(4).astype(np.float32) + camera2lidar[:3, :3] = camera_info["sensor2lidar_rotation"] + camera2lidar[:3, 3] = camera_info["sensor2lidar_translation"] + input_dict["camera2lidar"].append(camera2lidar) + # read image + filename = input_dict["image_paths"] + images = [] + for name in filename: + images.append(Image.open(str(self.root_path / name))) + + input_dict["camera_imgs"] = images + input_dict["ori_shape"] = images[0].size + + # resize and crop image + input_dict = self.crop_image(input_dict) + + return input_dict + + def __len__(self): + if self._merge_all_iters_to_one_epoch: + return len(self.infos) * self.total_epochs + + return len(self.infos) + + def __getitem__(self, index): + if self._merge_all_iters_to_one_epoch: + index = index % len(self.infos) + + info = copy.deepcopy(self.infos[index]) + points = self.get_lidar_with_sweeps(index, max_sweeps=self.dataset_cfg.MAX_SWEEPS) + + #print(f"DEBUG - PATH : {info['lidar_path']}, POINTS SHAPE: {points.shape}") + + input_dict = { + 'points': points, + 'frame_id': Path(info['lidar_path']).stem, + 'metadata': {'token': info['token']} + } + + if 'gt_boxes' in info: + if self.dataset_cfg.get('FILTER_MIN_POINTS_IN_GT', False): + import torch + from pcdet.ops.roiaware_pool3d import roiaware_pool3d_utils + + gt_boxes = info['gt_boxes'] + + # Filter out empty gt_boxes + if len(gt_boxes) == 0: + mask = np.zeros(0, dtype=bool) + else: + boxes_tensor = torch.from_numpy(gt_boxes[:, :7]).float().reshape(-1, 7) + points_tensor = torch.from_numpy(points[:, :3]).float().reshape(-1, 3) + + try: + point_indices = roiaware_pool3d_utils.points_in_boxes_cpu(boxes_tensor, points_tensor).numpy() + except AssertionError: + point_indices = roiaware_pool3d_utils.points_in_boxes_cpu(points_tensor, boxes_tensor).numpy() + + if point_indices.ndim == 2: + point_counts = point_indices.sum(axis=1) + else: + point_counts = np.zeros(gt_boxes.shape[0], dtype=np.int32) + valid_indices = point_indices[point_indices != -1] + unique_boxes, counts = np.unique(valid_indices, return_counts=True) + point_counts[unique_boxes] = counts + + mask = point_counts >= self.dataset_cfg.FILTER_MIN_POINTS_IN_GT + else: + mask = None + + input_dict.update({ + 'gt_names': info['gt_names'] if mask is None else info['gt_names'][mask], + 'gt_boxes': info['gt_boxes'] if mask is None else info['gt_boxes'][mask] + }) + if self.use_camera: + input_dict = self.load_camera_info(input_dict, info) + + data_dict = self.prepare_data(data_dict=input_dict) + + if self.dataset_cfg.get('SET_NAN_VELOCITY_TO_ZEROS', False) and 'gt_boxes' in info: + gt_boxes = data_dict['gt_boxes'] + gt_boxes[np.isnan(gt_boxes)] = 0 + data_dict['gt_boxes'] = gt_boxes + + if not self.dataset_cfg.PRED_VELOCITY and 'gt_boxes' in data_dict: + data_dict['gt_boxes'] = data_dict['gt_boxes'][:, [0, 1, 2, 3, 4, 5, 6, -1]] + + return data_dict + + def evaluation(self, det_annos, class_names, **kwargs): + import json + from truckscenes.truckscenes import TruckScenes + from . import truckscenes_utils + nusc = TruckScenes(version=self.dataset_cfg.VERSION, dataroot=str(self.root_path), verbose=True) + nusc_annos = truckscenes_utils.transform_det_annos_to_nusc_annos(det_annos, nusc) + nusc_annos['meta'] = { + 'use_camera': False, + 'use_lidar': False, + 'use_radar': True, + 'use_map': False, + 'use_external': False, + } + + output_path = Path(kwargs['output_path']) + output_path.mkdir(exist_ok=True, parents=True) + res_path = str(output_path / 'results_nusc.json') + with open(res_path, 'w') as f: + json.dump(nusc_annos, f) + + self.logger.info(f'The predictions of TruckScenes have been saved to {res_path}') + + if self.dataset_cfg.VERSION == 'v1.1-test': + return 'No ground-truth annotations for evaluation', {} + + from truckscenes.eval.detection.config import config_factory + from truckscenes.eval.detection.evaluate import TruckScenesEval + + eval_set_map = { + 'v1.1-mini': 'mini_val', + 'v1.1-trainval': 'val', + 'v1.1-test': 'test' + } + try: + eval_version = 'detection_cvpr_2024' + eval_config = config_factory(eval_version) + except: + eval_version = 'cvpr_2024' + eval_config = config_factory(eval_version) + + nusc_eval = TruckScenesEval( + nusc, + config=eval_config, + result_path=res_path, + eval_set=eval_set_map[self.dataset_cfg.VERSION], + output_dir=str(output_path), + verbose=True, + ) + metrics_summary = nusc_eval.main(plot_examples=0, render_curves=False) + + with open(output_path / 'metrics_summary.json', 'r') as f: + metrics = json.load(f) + + result_str, result_dict = truckscenes_utils.format_truckscenes_results(metrics, self.class_names, version=eval_version) + return result_str, result_dict + + def create_groundtruth_database(self, used_classes=None, max_sweeps=10): + import torch + + database_save_path = self.root_path / f'gt_database_{max_sweeps}sweeps_withvelo' + db_info_save_path = self.root_path / f'truckscenes_dbinfos_{max_sweeps}sweeps_withvelo.pkl' + + database_save_path.mkdir(parents=True, exist_ok=True) + all_db_infos = {} + + for idx in tqdm(range(len(self.infos))): + sample_idx = idx + info = self.infos[idx] + points = self.get_lidar_with_sweeps(idx, max_sweeps=max_sweeps) + gt_boxes = info['gt_boxes'] + gt_names = info['gt_names'] + + box_idxs_of_pts = roiaware_pool3d_utils.points_in_boxes_gpu( + torch.from_numpy(points[:, 0:3]).unsqueeze(dim=0).float().cuda(), + torch.from_numpy(gt_boxes[:, 0:7]).unsqueeze(dim=0).float().cuda() + ).long().squeeze(dim=0).cpu().numpy() + + for i in range(gt_boxes.shape[0]): + filename = '%s_%s_%d.bin' % (sample_idx, gt_names[i], i) + filepath = database_save_path / filename + gt_points = points[box_idxs_of_pts == i] + + gt_points[:, :3] -= gt_boxes[i, :3] + with open(filepath, 'w') as f: + gt_points.tofile(f) + + if (used_classes is None) or gt_names[i] in used_classes: + db_path = str(filepath.relative_to(self.root_path)) # gt_database/xxxxx.bin + db_info = {'name': gt_names[i], 'path': db_path, 'image_idx': sample_idx, 'gt_idx': i, + 'box3d_lidar': gt_boxes[i], 'num_points_in_gt': gt_points.shape[0]} + if gt_names[i] in all_db_infos: + all_db_infos[gt_names[i]].append(db_info) + else: + all_db_infos[gt_names[i]] = [db_info] + for k, v in all_db_infos.items(): + print('Database %s: %d' % (k, len(v))) + + with open(db_info_save_path, 'wb') as f: + pickle.dump(all_db_infos, f) + + +def create_truckscenes_info(version, data_path, save_path, max_sweeps=10, with_cam=False): + from truckscenes.truckscenes import TruckScenes + from truckscenes.utils import splits + from . import truckscenes_utils + data_path = data_path / version + save_path = save_path / version + + assert version in ['v1.1-trainval', 'v1.1-test', 'v1.1-mini'] + if version == 'v1.1-trainval': + train_scenes = splits.train + val_scenes = splits.val + elif version == 'v1.1-test': + train_scenes = splits.test + val_scenes = [] + elif version == 'v1.1-mini': + train_scenes = splits.mini_train + val_scenes = splits.mini_val + else: + raise NotImplementedError + + nusc = TruckScenes(version=version, dataroot=data_path, verbose=True) + available_scenes = truckscenes_utils.get_available_scenes(nusc) + available_scene_names = [s['name'] for s in available_scenes] + train_scenes = list(filter(lambda x: x in available_scene_names, train_scenes)) + val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes)) + train_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in train_scenes]) + val_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in val_scenes]) + + print('%s: train scene(%d), val scene(%d)' % (version, len(train_scenes), len(val_scenes))) + + train_nusc_infos, val_nusc_infos = truckscenes_utils.fill_trainval_infos( + data_path=data_path, nusc=nusc, train_scenes=train_scenes, val_scenes=val_scenes, + test='test' in version, max_sweeps=max_sweeps, with_cam=with_cam + ) + + if version == 'v1.1-test': + print('test sample: %d' % len(train_nusc_infos)) + with open(save_path / f'truckscenes_infos_{max_sweeps}sweeps_test.pkl', 'wb') as f: + pickle.dump(train_nusc_infos, f) + else: + print('train sample: %d, val sample: %d' % (len(train_nusc_infos), len(val_nusc_infos))) + with open(save_path / f'truckscenes_infos_{max_sweeps}sweeps_train.pkl', 'wb') as f: + pickle.dump(train_nusc_infos, f) + with open(save_path / f'truckscenes_infos_{max_sweeps}sweeps_val.pkl', 'wb') as f: + pickle.dump(val_nusc_infos, f) + + +if __name__ == '__main__': + import yaml + import argparse + from pathlib import Path + from easydict import EasyDict + + parser = argparse.ArgumentParser(description='arg parser') + parser.add_argument('--cfg_file', type=str, default=None, help='specify the config of dataset') + parser.add_argument('--func', type=str, default='create_truckscenes_infos', help='') + parser.add_argument('--version', type=str, default='v1.1-trainval', help='') + parser.add_argument('--with_cam', action='store_true', default=False, help='use camera or not') + args = parser.parse_args() + + if args.func == 'create_truckscenes_infos': + dataset_cfg = EasyDict(yaml.safe_load(open(args.cfg_file))) + ROOT_DIR = (Path(__file__).resolve().parent / '../../../').resolve() + dataset_cfg.VERSION = args.version + create_truckscenes_info( + version=dataset_cfg.VERSION, + data_path=ROOT_DIR / 'data' / 'truckscenes', + save_path=ROOT_DIR / 'data' / 'truckscenes', + max_sweeps=dataset_cfg.MAX_SWEEPS, + with_cam=args.with_cam + ) + + truckscenes_dataset = TruckScenesDataset( + dataset_cfg=dataset_cfg, class_names=None, + root_path=ROOT_DIR / 'data' / 'truckscenes', + logger=common_utils.create_logger(), training=True + ) + truckscenes_dataset.create_groundtruth_database(max_sweeps=dataset_cfg.MAX_SWEEPS) diff --git a/pcdet/datasets/truckscenes/truckscenes_utils.py b/pcdet/datasets/truckscenes/truckscenes_utils.py new file mode 100644 index 000000000..84bf7b0ca --- /dev/null +++ b/pcdet/datasets/truckscenes/truckscenes_utils.py @@ -0,0 +1,631 @@ +""" +The NuScenes data pre-processing and evaluation is modified from +https://github.com/traveller59/second.pytorch and https://github.com/poodarchu/Det3D +""" + +import operator +from functools import reduce +from pathlib import Path + +import numpy as np +import tqdm +from truckscenes.utils.data_classes import Box +from truckscenes.utils.geometry_utils import transform_matrix +from pyquaternion import Quaternion + +DefaultAttribute = { + "car": "vehicle.moving", + "truck": "vehicle.moving", + "bus": "vehicle.moving", + "trailer": "vehicle.parked", + "other_vehicle": "vehicle.moving", + "motorcycle": "vehicle.moving", + "bicycle": "cycle.with_rider", + "pedestrian": "pedestrian.moving", + "traffic_cone": "traffic_sign.temporary", + "barrier": "", + "animal": "", + "traffic_sign": "traffic_sign.pole_mounted" + } + +# map_name_from_general_to_detection = { +# 'human.pedestrian.adult': 'pedestrian', +# 'human.pedestrian.child': 'pedestrian', +# 'human.pedestrian.wheelchair': 'ignore', +# 'human.pedestrian.stroller': 'ignore', +# 'human.pedestrian.personal_mobility': 'ignore', +# 'human.pedestrian.police_officer': 'pedestrian', +# 'human.pedestrian.construction_worker': 'pedestrian', +# 'animal': 'ignore', +# 'vehicle.car': 'car', +# 'vehicle.motorcycle': 'motorcycle', +# 'vehicle.bicycle': 'bicycle', +# 'vehicle.bus.bendy': 'bus', +# 'vehicle.bus.rigid': 'bus', +# 'vehicle.truck': 'truck', +# 'vehicle.construction': 'construction_vehicle', +# 'vehicle.emergency.ambulance': 'ignore', +# 'vehicle.emergency.police': 'ignore', +# 'vehicle.trailer': 'trailer', +# 'movable_object.barrier': 'barrier', +# 'movable_object.trafficcone': 'traffic_cone', +# 'movable_object.pushable_pullable': 'ignore', +# 'movable_object.debris': 'ignore', +# 'static_object.bicycle_rack': 'ignore', +# } + +map_name_from_general_to_detection = { + 'animal': 'animal', + 'human.pedestrian.adult': 'pedestrian', + 'human.pedestrian.child': 'pedestrian', + 'human.pedestrian.construction': 'pedestrian', + 'human.pedestrian.construction_worker': 'pedestrian', + 'human.pedestrian.stroller': 'ignore', + 'human.pedestrian.wheelchair': 'ignore', + 'human.pedestrian.personal_mobility': 'ignore', + 'human.pedestrian.police_officer': 'pedestrian', + 'movable_object.barrier': 'barrier', + 'movable_object.trafficcone': 'traffic_cone', + 'movable_object.pushable_pullable': 'ignore', + 'movable_object.debris': 'ignore', + 'static_object.traffic_sign': 'traffic_sign', + 'static_object.bicycle_rack': 'ignore', + 'vehicle.bicycle': 'bicycle', + 'vehicle.bus.bendy': 'bus', + 'vehicle.bus.rigid': 'bus', + 'vehicle.car': 'car', + 'vehicle.construction': 'other_vehicle', + 'vehicle.ego_trailer': 'ignore', + 'vehicle.motorcycle': 'motorcycle', + 'vehicle.other': 'other_vehicle', + 'vehicle.trailer': 'trailer', + 'vehicle.train': 'ignore', + 'vehicle.truck': 'truck', + 'vehicle.emergency.ambulance': 'ignore', + 'vehicle.emergency.police': 'ignore', +} + +cls_attr_dist = { + 'barrier': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 0, + 'vehicle.parked': 0, + 'vehicle.stopped': 0, + }, + 'bicycle': { + 'cycle.with_rider': 2791, + 'cycle.without_rider': 8946, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 0, + 'vehicle.parked': 0, + 'vehicle.stopped': 0, + }, + 'bus': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 9092, + 'vehicle.parked': 3294, + 'vehicle.stopped': 3881, + }, + 'car': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 114304, + 'vehicle.parked': 330133, + 'vehicle.stopped': 46898, + }, + 'construction_vehicle': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 882, + 'vehicle.parked': 11549, + 'vehicle.stopped': 2102, + }, + 'ignore': { + 'cycle.with_rider': 307, + 'cycle.without_rider': 73, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 165, + 'vehicle.parked': 400, + 'vehicle.stopped': 102, + }, + 'motorcycle': { + 'cycle.with_rider': 4233, + 'cycle.without_rider': 8326, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 0, + 'vehicle.parked': 0, + 'vehicle.stopped': 0, + }, + 'pedestrian': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 157444, + 'pedestrian.sitting_lying_down': 13939, + 'pedestrian.standing': 46530, + 'vehicle.moving': 0, + 'vehicle.parked': 0, + 'vehicle.stopped': 0, + }, + 'traffic_cone': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 0, + 'vehicle.parked': 0, + 'vehicle.stopped': 0, + }, + 'trailer': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 3421, + 'vehicle.parked': 19224, + 'vehicle.stopped': 1895, + }, + 'truck': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 21339, + 'vehicle.parked': 55626, + 'vehicle.stopped': 11097, + }, +} + + +def get_available_scenes(nusc): + available_scenes = [] + print('total scene num:', len(nusc.scene)) + for scene in nusc.scene: + scene_token = scene['token'] + scene_rec = nusc.get('scene', scene_token) + sample_rec = nusc.get('sample', scene_rec['first_sample_token']) + sd_rec = nusc.get('sample_data', sample_rec['data']['RADAR_LEFT_FRONT']) + has_more_frames = True + scene_not_exist = False + while has_more_frames: + lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token']) + if not Path(lidar_path).exists(): + scene_not_exist = True + break + else: + break + # if not sd_rec['next'] == '': + # sd_rec = nusc.get('sample_data', sd_rec['next']) + # else: + # has_more_frames = False + if scene_not_exist: + continue + available_scenes.append(scene) + print('exist scene num:', len(available_scenes)) + return available_scenes + + +def get_sample_data(nusc, sample_data_token, selected_anntokens=None): + """ + Returns the data path as well as all annotations related to that sample_data. + Note that the boxes are transformed into the current sensor's coordinate frame. + Args: + nusc: + sample_data_token: Sample_data token. + selected_anntokens: If provided only return the selected annotation. + + Returns: + + """ + # Retrieve sensor & pose records + sd_record = nusc.get('sample_data', sample_data_token) + cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) + sensor_record = nusc.get('sensor', cs_record['sensor_token']) + pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) + + data_path = nusc.get_sample_data_path(sample_data_token) + + if sensor_record['modality'] == 'camera': + cam_intrinsic = np.array(cs_record['camera_intrinsic']) + imsize = (sd_record['width'], sd_record['height']) + else: + cam_intrinsic = imsize = None + + # Retrieve all sample annotations and map to sensor coordinate system. + if selected_anntokens is not None: + boxes = list(map(nusc.get_box, selected_anntokens)) + else: + boxes = nusc.get_boxes(sample_data_token) + + # Make list of Box objects including coord system transforms. + box_list = [] + for box in boxes: + box.velocity = nusc.box_velocity(box.token) + # Move box to ego vehicle coord system + box.translate(-np.array(pose_record['translation'])) + box.rotate(Quaternion(pose_record['rotation']).inverse) + + # Move box to sensor coord system + box.translate(-np.array(cs_record['translation'])) + box.rotate(Quaternion(cs_record['rotation']).inverse) + + box_list.append(box) + + return data_path, box_list, cam_intrinsic + + +def quaternion_yaw(q: Quaternion) -> float: + """ + Calculate the yaw angle from a quaternion. + Note that this only works for a quaternion that represents a box in lidar or global coordinate frame. + It does not work for a box in the camera frame. + :param q: Quaternion of interest. + :return: Yaw angle in radians. + """ + + # Project into xy plane. + v = np.dot(q.rotation_matrix, np.array([1, 0, 0])) + + # Measure yaw using arctan. + yaw = np.arctan2(v[1], v[0]) + + return yaw + + +def obtain_sensor2top( + nusc, sensor_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, sensor_type="lidar" +): + """Obtain the info with RT matric from general sensor to Top LiDAR. + + Args: + nusc (class): Dataset class in the nuScenes dataset. + sensor_token (str): Sample data token corresponding to the + specific sensor type. + l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3). + l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego + in shape (3, 3). + e2g_t (np.ndarray): Translation from ego to global in shape (1, 3). + e2g_r_mat (np.ndarray): Rotation matrix from ego to global + in shape (3, 3). + sensor_type (str): Sensor to calibrate. Default: 'lidar'. + + Returns: + sweep (dict): Sweep information after transformation. + """ + sd_rec = nusc.get("sample_data", sensor_token) + cs_record = nusc.get("calibrated_sensor", sd_rec["calibrated_sensor_token"]) + pose_record = nusc.get("ego_pose", sd_rec["ego_pose_token"]) + data_path = str(nusc.get_sample_data_path(sd_rec["token"])) + # if os.getcwd() in data_path: # path from lyftdataset is absolute path + # data_path = data_path.split(f"{os.getcwd()}/")[-1] # relative path + sweep = { + "data_path": data_path, + "type": sensor_type, + "sample_data_token": sd_rec["token"], + "sensor2ego_translation": cs_record["translation"], + "sensor2ego_rotation": cs_record["rotation"], + "ego2global_translation": pose_record["translation"], + "ego2global_rotation": pose_record["rotation"], + "timestamp": sd_rec["timestamp"], + } + l2e_r_s = sweep["sensor2ego_rotation"] + l2e_t_s = sweep["sensor2ego_translation"] + e2g_r_s = sweep["ego2global_rotation"] + e2g_t_s = sweep["ego2global_translation"] + + # obtain the RT from sensor to Top LiDAR + # sweep->ego->global->ego'->lidar + l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix + e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix + R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ ( + np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T + ) + T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ ( + np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T + ) + T -= ( + e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) + + l2e_t @ np.linalg.inv(l2e_r_mat).T + ).squeeze(0) + sweep["sensor2lidar_rotation"] = R.T # points @ R.T + T + sweep["sensor2lidar_translation"] = T + return sweep + + +def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, max_sweeps=10, with_cam=False): + train_nusc_infos = [] + val_nusc_infos = [] + progress_bar = tqdm.tqdm(total=len(nusc.sample), desc='create_info', dynamic_ncols=True) + + ref_chan = 'RADAR_LEFT_FRONT' # The radar channel from which we track back n sweeps to aggregate the point cloud. + chan = 'RADAR_LEFT_FRONT' # The reference channel of the current sample_rec that the point clouds are mapped to. + + for index, sample in enumerate(nusc.sample): + progress_bar.update() + + ref_sd_token = sample['data'][ref_chan] + ref_sd_rec = nusc.get('sample_data', ref_sd_token) + ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token']) + ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token']) + ref_time = 1e-6 * ref_sd_rec['timestamp'] + + ref_lidar_path, ref_boxes, _ = get_sample_data(nusc, ref_sd_token) + + ref_cam_front_token = sample['data']['CAMERA_LEFT_FRONT'] + ref_cam_path, _, ref_cam_intrinsic = nusc.get_sample_data(ref_cam_front_token) + + # Homogeneous transform from ego car frame to reference frame + ref_from_car = transform_matrix( + ref_cs_rec['translation'], Quaternion(ref_cs_rec['rotation']), inverse=True + ) + + # Homogeneous transformation matrix from global to _current_ ego car frame + car_from_global = transform_matrix( + ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), inverse=True, + ) + + info = { + 'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(), + 'cam_front_path': Path(ref_cam_path).relative_to(data_path).__str__(), + 'cam_intrinsic': ref_cam_intrinsic, + 'token': sample['token'], + 'sweeps': [], + 'ref_from_car': ref_from_car, + 'car_from_global': car_from_global, + 'timestamp': ref_time, + } + if with_cam: + info['cams'] = dict() + l2e_r = ref_cs_rec["rotation"] + l2e_t = ref_cs_rec["translation"], + e2g_r = ref_pose_rec["rotation"] + e2g_t = ref_pose_rec["translation"] + l2e_r_mat = Quaternion(l2e_r).rotation_matrix + e2g_r_mat = Quaternion(e2g_r).rotation_matrix + + # obtain 6 image's information per frame + camera_types = [ + "CAMERA_LEFT_FRONT", + "CAMERA_RIGHT_FRONT", + "CAMERA_LEFT_BACK", + "CAMERA_RIGHT_BACK", + ] + for cam in camera_types: + cam_token = sample["data"][cam] + cam_path, _, camera_intrinsics = nusc.get_sample_data(cam_token) + cam_info = obtain_sensor2top( + nusc, cam_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, cam + ) + cam_info['data_path'] = Path(cam_info['data_path']).relative_to(data_path).__str__() + cam_info.update(camera_intrinsics=camera_intrinsics) + info["cams"].update({cam: cam_info}) + + + sample_data_token = sample['data'][chan] + curr_sd_rec = nusc.get('sample_data', sample_data_token) + sweeps = [] + while len(sweeps) < max_sweeps - 1: + if curr_sd_rec['prev'] == '': + if len(sweeps) == 0: + sweep = { + 'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(), + 'sample_data_token': curr_sd_rec['token'], + 'transform_matrix': None, + 'time_lag': curr_sd_rec['timestamp'] * 0, + } + sweeps.append(sweep) + else: + sweeps.append(sweeps[-1]) + else: + curr_sd_rec = nusc.get('sample_data', curr_sd_rec['prev']) + + # Get past pose + current_pose_rec = nusc.get('ego_pose', curr_sd_rec['ego_pose_token']) + global_from_car = transform_matrix( + current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False, + ) + + # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. + current_cs_rec = nusc.get( + 'calibrated_sensor', curr_sd_rec['calibrated_sensor_token'] + ) + car_from_current = transform_matrix( + current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False, + ) + + tm = reduce(np.dot, [ref_from_car, car_from_global, global_from_car, car_from_current]) + + lidar_path = nusc.get_sample_data_path(curr_sd_rec['token']) + + time_lag = ref_time - 1e-6 * curr_sd_rec['timestamp'] + + sweep = { + 'lidar_path': Path(lidar_path).relative_to(data_path).__str__(), + 'sample_data_token': curr_sd_rec['token'], + 'transform_matrix': tm, + 'global_from_car': global_from_car, + 'car_from_current': car_from_current, + 'time_lag': time_lag, + } + sweeps.append(sweep) + + info['sweeps'] = sweeps + + assert len(info['sweeps']) == max_sweeps - 1, \ + f"sweep {curr_sd_rec['token']} only has {len(info['sweeps'])} sweeps, " \ + f"you should duplicate to sweep num {max_sweeps - 1}" + + if not test: + annotations = [nusc.get('sample_annotation', token) for token in sample['anns']] + + # the filtering gives 0.5~1 map improvement + num_lidar_pts = np.array([anno['num_lidar_pts'] for anno in annotations]) + num_radar_pts = np.array([anno['num_radar_pts'] for anno in annotations]) + mask = (num_radar_pts > -1) + + locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3) + dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3)[:, [1, 0, 2]] # wlh == > dxdydz (lwh) + velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3) + rots = np.array([quaternion_yaw(b.orientation) for b in ref_boxes]).reshape(-1, 1) + names = np.array([b.name for b in ref_boxes]) + tokens = np.array([b.token for b in ref_boxes]) + gt_boxes = np.concatenate([locs, dims, rots, velocity[:, :2]], axis=1) + + assert len(annotations) == len(gt_boxes) == len(velocity) + + info['gt_boxes'] = gt_boxes[mask, :] + info['gt_boxes_velocity'] = velocity[mask, :] + info['gt_names'] = np.array([map_name_from_general_to_detection[name] for name in names])[mask] + info['gt_boxes_token'] = tokens[mask] + info['num_lidar_pts'] = num_lidar_pts[mask] + info['num_radar_pts'] = num_radar_pts[mask] + + if sample['scene_token'] in train_scenes: + train_nusc_infos.append(info) + else: + val_nusc_infos.append(info) + + progress_bar.close() + return train_nusc_infos, val_nusc_infos + + +def boxes_lidar_to_nusenes(det_info): + boxes3d = det_info['boxes_lidar'] + scores = det_info['score'] + labels = det_info['pred_labels'] + + box_list = [] + for k in range(boxes3d.shape[0]): + quat = Quaternion(axis=[0, 0, 1], radians=boxes3d[k, 6]) + velocity = (*boxes3d[k, 7:9], 0.0) if boxes3d.shape[1] == 9 else (0.0, 0.0, 0.0) + box = Box( + boxes3d[k, :3], + boxes3d[k, [4, 3, 5]], # wlh + quat, label=labels[k], score=scores[k], velocity=velocity, + ) + box_list.append(box) + return box_list + + +def lidar_nusc_box_to_global(nusc, boxes, sample_token): + s_record = nusc.get('sample', sample_token) + sample_data_token = s_record['data']['RADAR_LEFT_FRONT'] + + sd_record = nusc.get('sample_data', sample_data_token) + cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) + sensor_record = nusc.get('sensor', cs_record['sensor_token']) + pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) + + data_path = nusc.get_sample_data_path(sample_data_token) + box_list = [] + for box in boxes: + # Move box to ego vehicle coord system + box.rotate(Quaternion(cs_record['rotation'])) + box.translate(np.array(cs_record['translation'])) + # Move box to global coord system + box.rotate(Quaternion(pose_record['rotation'])) + box.translate(np.array(pose_record['translation'])) + box_list.append(box) + return box_list + + +def transform_det_annos_to_nusc_annos(det_annos, nusc): + nusc_annos = { + 'results': {}, + 'meta': None, + } + + for det in det_annos: + annos = [] + box_list = boxes_lidar_to_nusenes(det) + box_list = lidar_nusc_box_to_global( + nusc=nusc, boxes=box_list, sample_token=det['metadata']['token'] + ) + + for k, box in enumerate(box_list): + name = det['name'][k] + if np.sqrt(box.velocity[0] ** 2 + box.velocity[1] ** 2) > 0.2: + if name in ['car', 'construction_vehicle', 'bus', 'truck', 'trailer']: + attr = 'vehicle.moving' + elif name in ['bicycle', 'motorcycle']: + attr = 'cycle.with_rider' + else: + attr = None + else: + if name in ['pedestrian']: + attr = 'pedestrian.standing' + elif name in ['bus']: + attr = 'vehicle.stopped' + else: + attr = None + attr = attr if attr is not None else max( + cls_attr_dist[name].items(), key=operator.itemgetter(1))[0] + nusc_anno = { + 'sample_token': det['metadata']['token'], + 'translation': box.center.tolist(), + 'size': box.wlh.tolist(), + 'rotation': box.orientation.elements.tolist(), + 'velocity': box.velocity[:2].tolist(), + 'detection_name': name, + 'detection_score': box.score, + 'attribute_name': attr + } + annos.append(nusc_anno) + + nusc_annos['results'].update({det["metadata"]["token"]: annos}) + + return nusc_annos + + +def format_truckscenes_results(metrics, class_names, version='default'): + result = '----------------Nuscene %s results-----------------\n' % version + for name in class_names: + threshs = ', '.join(list(metrics['all']['label_aps'][name].keys())) + ap_list = list(metrics['all']['label_aps'][name].values()) + + err_name =', '.join([x.split('_')[0] for x in list(metrics['all']['label_tp_errors'][name].keys())]) + error_list = list(metrics['all']['label_tp_errors'][name].values()) + + result += f'***{name} error@{err_name} | AP@{threshs}\n' + result += ', '.join(['%.2f' % x for x in error_list]) + ' | ' + result += ', '.join(['%.2f' % (x * 100) for x in ap_list]) + result += f" | mean AP: {metrics['all']['mean_dist_aps'][name]}" + result += '\n' + + result += '--------------average performance-------------\n' + details = {} + for key, val in metrics['all']['tp_errors'].items(): + result += '%s:\t %.4f\n' % (key, val) + details[key] = val + + result += 'mAP:\t %.4f\n' % metrics['all']['mean_ap'] + result += 'NDS:\t %.4f\n' % metrics['all']['nd_score'] + + details.update({ + 'mAP': metrics['all']['mean_ap'], + 'NDS': metrics['all']['nd_score'], + }) + + return result, details diff --git a/pcdet/datasets/truckscenes_multiradar/truckscenes_dataset.py b/pcdet/datasets/truckscenes_multiradar/truckscenes_dataset.py new file mode 100644 index 000000000..c84e2b125 --- /dev/null +++ b/pcdet/datasets/truckscenes_multiradar/truckscenes_dataset.py @@ -0,0 +1,566 @@ +import copy +import pickle +from pathlib import Path + +import numpy as np +from tqdm import tqdm + +from ...ops.roiaware_pool3d import roiaware_pool3d_utils +from ...utils import common_utils +from ..dataset import DatasetTemplate +from pyquaternion import Quaternion +from PIL import Image +from truckscenes.utils.data_classes import RadarPointCloud, reduce + + +class TruckScenesMultiRadarDataset(DatasetTemplate): + def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None): + root_path = (root_path if root_path is not None else Path(dataset_cfg.DATA_PATH)) / dataset_cfg.VERSION + super().__init__( + dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger + ) + self.infos = [] + self.camera_config = self.dataset_cfg.get('CAMERA_CONFIG', None) + if self.camera_config is not None: + self.use_camera = self.camera_config.get('USE_CAMERA', True) + self.camera_image_config = self.camera_config.IMAGE + else: + self.use_camera = False + + self.include_truckscenes_data(self.mode) + if self.training and self.dataset_cfg.get('BALANCED_RESAMPLING', False): + self.infos = self.balanced_infos_resampling(self.infos) + + def include_truckscenes_data(self, mode): + self.logger.info('Loading TruckScenes dataset') + truckscenes_infos = [] + + for info_path in self.dataset_cfg.INFO_PATH[mode]: + info_path = self.root_path / info_path + if not info_path.exists(): + continue + with open(info_path, 'rb') as f: + infos = pickle.load(f) + truckscenes_infos.extend(infos) + + self.infos.extend(truckscenes_infos) + self.logger.info('Total samples for TruckScenes dataset: %d' % (len(truckscenes_infos))) + + def balanced_infos_resampling(self, infos): + """ + Class-balanced sampling of nuScenes dataset from https://arxiv.org/abs/1908.09492 + """ + if self.class_names is None: + return infos + + cls_infos = {name: [] for name in self.class_names} + for info in infos: + for name in set(info['gt_names']): + if name in self.class_names: + cls_infos[name].append(info) + + duplicated_samples = sum([len(v) for _, v in cls_infos.items()]) + cls_dist = {k: len(v) / duplicated_samples for k, v in cls_infos.items()} + + sampled_infos = [] + + frac = 1.0 / len(self.class_names) + ratios = [frac / v for v in cls_dist.values()] + + for cur_cls_infos, ratio in zip(list(cls_infos.values()), ratios): + sampled_infos += np.random.choice( + cur_cls_infos, int(len(cur_cls_infos) * ratio) + ).tolist() + self.logger.info('Total samples after balanced resampling: %s' % (len(sampled_infos))) + + cls_infos_new = {name: [] for name in self.class_names} + for info in sampled_infos: + for name in set(info['gt_names']): + if name in self.class_names: + cls_infos_new[name].append(info) + + cls_dist_new = {k: len(v) / len(sampled_infos) for k, v in cls_infos_new.items()} + + return sampled_infos + + def get_sweep(self, sweep_info): + def remove_ego_points(points, center_radius=1.0): + mask = ~((np.abs(points[:, 0]) < center_radius) & (np.abs(points[:, 1]) < center_radius)) + return points[mask] + + lidar_path = self.root_path / sweep_info['lidar_path'] + points_sweep = np.fromfile(str(lidar_path), dtype=np.float32, count=-1).reshape([-1, 5])[:, :4] + points_sweep = remove_ego_points(points_sweep).T + if sweep_info['transform_matrix'] is not None: + num_points = points_sweep.shape[1] + points_sweep[:3, :] = sweep_info['transform_matrix'].dot( + np.vstack((points_sweep[:3, :], np.ones(num_points))))[:3, :] + + cur_times = sweep_info['time_lag'] * np.ones((1, points_sweep.shape[1])) + return points_sweep.T, cur_times.T + + def get_merged_sweeps(self, index, max_sweeps=1): + merged_points = [] + info = self.infos[index] + + # Get the neutral anchor matrices + ref_from_car = info['ref_from_car'] + car_from_global = info['car_from_global'] + #print(info['radars'].items()) + + for chan_name, chan_info in info['radars'].items(): + + + # Load the raw points for this radar + points = self.get_lidar_with_sweeps(index, chan_name, max_sweeps) + + lidar_from_radar = reduce(np.dot, [ + #ref_from_car, + car_from_global, + chan_info['global_from_radar_car'], + chan_info['car_from_radar'] + ]) + + xyz_hom = np.hstack((points[:, 0:3], np.ones((points.shape[0], 1)))) + pts_lidar = xyz_hom @ lidar_from_radar.T + points[:, 0:3] = pts_lidar[:, 0:3] + + rot_matrix = lidar_from_radar[0:3, 0:3] + + # Temporarily pad 2D velocity into 3D so the rotation matrix works + v_3d = np.hstack((points[:, 4:6], np.zeros((points.shape[0], 1)))) + + # Rotate the velocity vectors + vel_lidar = v_3d @ rot_matrix.T + + # Put the newly rotated vx and vy back into their correct columns + points[:, 4:6] = vel_lidar[:, 0:2] + merged_points.append(points) + + + final_point_cloud = np.concatenate(merged_points, axis=0) + + return final_point_cloud + + def get_lidar_with_sweeps(self, index, chan, max_sweeps=1): + info = self.infos[index]['radars'][chan] + lidar_path = self.root_path / info['lidar_path'] + + pc = RadarPointCloud.from_file(str(lidar_path)) + points = pc.points.T + + v_ego_local = np.zeros(3) + if len(info['sweeps']) > 0: + v_x, v_y, v_z = self.compensate_ego_motion(points, info['sweeps'][0]) + + points[:, 3] = v_x + points[:, 4] = v_y + points[:, 5] = v_z + + sweep_points_list = [points] + sweep_times_list = [np.zeros((points.shape[0], 1))] + + if max_sweeps > 1 and len(info['sweeps']) > 0: + #print(f"Sample {index} has {len(info['sweeps'])} sweeps, sampling up to {max_sweeps - 1} sweeps for augmentation.") + num_sweeps = len(info['sweeps']) + choices = np.random.choice(num_sweeps, max_sweeps - 1, replace=False) + + for k in choices: + sweep = info['sweeps'][k] + sweep_path = self.root_path / sweep['lidar_path'] + sweep_pc = RadarPointCloud.from_file(str(sweep_path)) + sweep_points = sweep_pc.points.T + + v_x, v_y, v_z = self.compensate_ego_motion(sweep_points, sweep) + + sweep_points[:, 3] = v_x + sweep_points[:, 4] = v_y + sweep_points[:, 5] = v_z + + if sweep['transform_matrix'] is not None: + sweep_points[:, :3] = sweep_points[:, :3] @ sweep['transform_matrix'][:3, :3].T + sweep_points[:, :3] += sweep['transform_matrix'][:3, 3] + sweep_points[:, 3:6] = sweep_points[:, 3:6] @ sweep['transform_matrix'][:3, :3].T + + sweep_points_list.append(sweep_points) + sweep_times_list.append(sweep['time_lag'] * np.ones((sweep_points.shape[0], 1))) + + all_points = np.concatenate(sweep_points_list, axis=0) + all_times = np.concatenate(sweep_times_list, axis=0) + + combined = np.concatenate((all_points, all_times), axis=1) + final_points = combined[:, [0, 1, 2, 6, 3, 4, 7]] + + return final_points.astype(np.float32) + + def compensate_ego_motion(self, points, info_sweep): + if info_sweep['transform_matrix'] is None or info_sweep['time_lag'] <= 0: + print(f"Warning: No valid transform matrix or non-positive time lag for ego motion compensation. Returning original velocities. {info_sweep}") + return points[:, 3], points[:, 4], points[:, 5] + + translation = info_sweep['transform_matrix'][:3, 3] + v_ego_local = -translation / info_sweep['time_lag'] + #print(f"Ego velocity (local): {v_ego_local}, time lag: {info_sweep['time_lag']}") + #print(f"Mean speed before compensation: {np.mean(points[:, 3])} {np.mean(points[:, 4])} {np.mean(points[:, 5])}") + + d = np.linalg.norm(points[:, :3], axis=1) + ux = points[:, 0] / (d[:] + 1e-8) + uy = points[:, 1] / (d[:] + 1e-8) + uz = points[:, 2] / (d[:] + 1e-8) + + v_ego_local_points = ux * v_ego_local[0] + uy * v_ego_local[1] + uz * v_ego_local[2] + v_points = ux * points[:, 3] + uy * points[:, 4] + uz * points[:, 5] + v_comp = v_points + v_ego_local_points + #print(f"Mean speed after compensation: {np.mean(ux * v_comp)} {np.mean(uy * v_comp)} {np.mean(uz * v_comp)}") + + return ux * v_comp, uy * v_comp, uz * v_comp + + def crop_image(self, input_dict): + W, H = input_dict["ori_shape"] + imgs = input_dict["camera_imgs"] + img_process_infos = [] + crop_images = [] + for img in imgs: + if self.training == True: + fH, fW = self.camera_image_config.FINAL_DIM + resize_lim = self.camera_image_config.RESIZE_LIM_TRAIN + resize = np.random.uniform(*resize_lim) + resize_dims = (int(W * resize), int(H * resize)) + newW, newH = resize_dims + crop_h = newH - fH + crop_w = int(np.random.uniform(0, max(0, newW - fW))) + crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) + else: + fH, fW = self.camera_image_config.FINAL_DIM + resize_lim = self.camera_image_config.RESIZE_LIM_TEST + resize = np.mean(resize_lim) + resize_dims = (int(W * resize), int(H * resize)) + newW, newH = resize_dims + crop_h = newH - fH + crop_w = int(max(0, newW - fW) / 2) + crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) + + # reisze and crop image + img = img.resize(resize_dims) + img = img.crop(crop) + crop_images.append(img) + img_process_infos.append([resize, crop, False, 0]) + + input_dict['img_process_infos'] = img_process_infos + input_dict['camera_imgs'] = crop_images + return input_dict + + def load_camera_info(self, input_dict, info): + input_dict["image_paths"] = [] + input_dict["lidar2camera"] = [] + input_dict["lidar2image"] = [] + input_dict["camera2ego"] = [] + input_dict["camera_intrinsics"] = [] + input_dict["camera2lidar"] = [] + + for _, camera_info in info["cams"].items(): + input_dict["image_paths"].append(camera_info["data_path"]) + + # lidar to camera transform + lidar2camera_r = np.linalg.inv(camera_info["sensor2lidar_rotation"]) + lidar2camera_t = ( + camera_info["sensor2lidar_translation"] @ lidar2camera_r.T + ) + lidar2camera_rt = np.eye(4).astype(np.float32) + lidar2camera_rt[:3, :3] = lidar2camera_r.T + lidar2camera_rt[3, :3] = -lidar2camera_t + input_dict["lidar2camera"].append(lidar2camera_rt.T) + + # camera intrinsics + camera_intrinsics = np.eye(4).astype(np.float32) + camera_intrinsics[:3, :3] = camera_info["camera_intrinsics"] + input_dict["camera_intrinsics"].append(camera_intrinsics) + + # lidar to image transform + lidar2image = camera_intrinsics @ lidar2camera_rt.T + input_dict["lidar2image"].append(lidar2image) + + # camera to ego transform + camera2ego = np.eye(4).astype(np.float32) + camera2ego[:3, :3] = Quaternion( + camera_info["sensor2ego_rotation"] + ).rotation_matrix + camera2ego[:3, 3] = camera_info["sensor2ego_translation"] + input_dict["camera2ego"].append(camera2ego) + + # camera to lidar transform + camera2lidar = np.eye(4).astype(np.float32) + camera2lidar[:3, :3] = camera_info["sensor2lidar_rotation"] + camera2lidar[:3, 3] = camera_info["sensor2lidar_translation"] + input_dict["camera2lidar"].append(camera2lidar) + # read image + filename = input_dict["image_paths"] + images = [] + for name in filename: + images.append(Image.open(str(self.root_path / name))) + + input_dict["camera_imgs"] = images + input_dict["ori_shape"] = images[0].size + + # resize and crop image + input_dict = self.crop_image(input_dict) + + return input_dict + + def __len__(self): + if self._merge_all_iters_to_one_epoch: + return len(self.infos) * self.total_epochs + + return len(self.infos) + + def __getitem__(self, index): + if self._merge_all_iters_to_one_epoch: + index = index % len(self.infos) + + info = copy.deepcopy(self.infos[index]) + points = self.get_merged_sweeps(index, max_sweeps=self.dataset_cfg.MAX_SWEEPS) + + + #print(f"DEBUG - PATH : {info['lidar_path']}, POINTS SHAPE: {points.shape}") + + input_dict = { + 'points': points, + 'frame_id': Path(info['lidar_path']).stem, + 'metadata': {'token': info['token']} + } + + if 'gt_boxes' in info: + if self.dataset_cfg.get('FILTER_MIN_POINTS_IN_GT', False): + import torch + from pcdet.ops.roiaware_pool3d import roiaware_pool3d_utils + + gt_boxes = info['gt_boxes'] + + gt_vel = gt_boxes[:, 6:9] + + if np.isnan(gt_vel).any() or np.isinf(gt_vel).any(): + #print(f"Warning: Found NaN or Inf values in gt_boxes for sample {index}. Replacing with zeros.") + gt_boxes[:, 6:9] = np.nan_to_num(gt_vel, nan=0.0, posinf=0.0, neginf=0.0) + + # Filter out empty gt_boxes + if len(gt_boxes) == 0: + mask = np.zeros(0, dtype=bool) + else: + boxes_tensor = torch.from_numpy(gt_boxes[:, :7]).float().reshape(-1, 7) + points_tensor = torch.from_numpy(points[:, :3]).float().reshape(-1, 3) + + try: + point_indices = roiaware_pool3d_utils.points_in_boxes_cpu(boxes_tensor, points_tensor).numpy() + except AssertionError: + point_indices = roiaware_pool3d_utils.points_in_boxes_cpu(points_tensor, boxes_tensor).numpy() + + if point_indices.ndim == 2: + point_counts = point_indices.sum(axis=1) + else: + point_counts = np.zeros(gt_boxes.shape[0], dtype=np.int32) + valid_indices = point_indices[point_indices != -1] + unique_boxes, counts = np.unique(valid_indices, return_counts=True) + point_counts[unique_boxes] = counts + + mask = point_counts >= self.dataset_cfg.FILTER_MIN_POINTS_IN_GT + else: + mask = None + + input_dict.update({ + 'gt_names': info['gt_names'] if mask is None else info['gt_names'][mask], + 'gt_boxes': info['gt_boxes'] if mask is None else info['gt_boxes'][mask] + }) + if self.use_camera: + input_dict = self.load_camera_info(input_dict, info) + + data_dict = self.prepare_data(data_dict=input_dict) + + if self.dataset_cfg.get('SET_NAN_VELOCITY_TO_ZEROS', False) and 'gt_boxes' in info: + gt_boxes = data_dict['gt_boxes'] + gt_boxes[np.isnan(gt_boxes)] = 0 + data_dict['gt_boxes'] = gt_boxes + + if not self.dataset_cfg.PRED_VELOCITY and 'gt_boxes' in data_dict: + data_dict['gt_boxes'] = data_dict['gt_boxes'][:, [0, 1, 2, 3, 4, 5, 6, -1]] + + return data_dict + + def evaluation(self, det_annos, class_names, **kwargs): + import json + from truckscenes.truckscenes import TruckScenes + from . import truckscenes_utils + nusc = TruckScenes(version=self.dataset_cfg.VERSION, dataroot=str(self.root_path), verbose=True) + nusc_annos = truckscenes_utils.transform_det_annos_to_nusc_annos(det_annos, nusc) + nusc_annos['meta'] = { + 'use_camera': False, + 'use_lidar': False, + 'use_radar': True, + 'use_map': False, + 'use_external': False, + } + + output_path = Path(kwargs['output_path']) + output_path.mkdir(exist_ok=True, parents=True) + res_path = str(output_path / 'results_nusc.json') + with open(res_path, 'w') as f: + json.dump(nusc_annos, f) + + self.logger.info(f'The predictions of TruckScenes have been saved to {res_path}') + + if self.dataset_cfg.VERSION == 'v1.1-test': + return 'No ground-truth annotations for evaluation', {} + + from truckscenes.eval.detection.config import config_factory + from truckscenes.eval.detection.evaluate import TruckScenesEval + + eval_set_map = { + 'v1.1-mini': 'mini_val', + 'v1.1-trainval': 'val', + 'v1.1-test': 'test' + } + try: + eval_version = 'detection_cvpr_2024' + eval_config = config_factory(eval_version) + except: + eval_version = 'cvpr_2024' + eval_config = config_factory(eval_version) + + nusc_eval = TruckScenesEval( + nusc, + config=eval_config, + result_path=res_path, + eval_set=eval_set_map[self.dataset_cfg.VERSION], + output_dir=str(output_path), + verbose=True, + ) + metrics_summary = nusc_eval.main(plot_examples=0, render_curves=False) + + with open(output_path / 'metrics_summary.json', 'r') as f: + metrics = json.load(f) + + result_str, result_dict = truckscenes_utils.format_truckscenes_results(metrics, self.class_names, version=eval_version) + return result_str, result_dict + + def create_groundtruth_database(self, used_classes=None, max_sweeps=10): + import torch + + database_save_path = self.root_path / f'gt_database_{max_sweeps}sweeps_withvelo' + db_info_save_path = self.root_path / f'truckscenes_dbinfos_{max_sweeps}sweeps_withvelo.pkl' + + database_save_path.mkdir(parents=True, exist_ok=True) + all_db_infos = {} + + for idx in tqdm(range(len(self.infos))): + sample_idx = idx + info = self.infos[idx] + points = self.get_merged_sweeps(idx, max_sweeps=max_sweeps) + gt_boxes = info['gt_boxes'] + gt_names = info['gt_names'] + + box_idxs_of_pts = roiaware_pool3d_utils.points_in_boxes_gpu( + torch.from_numpy(points[:, 0:3]).unsqueeze(dim=0).float().cuda(), + torch.from_numpy(gt_boxes[:, 0:7]).unsqueeze(dim=0).float().cuda() + ).long().squeeze(dim=0).cpu().numpy() + + for i in range(gt_boxes.shape[0]): + filename = '%s_%s_%d.bin' % (sample_idx, gt_names[i], i) + filepath = database_save_path / filename + gt_points = points[box_idxs_of_pts == i] + + gt_points[:, :3] -= gt_boxes[i, :3] + with open(filepath, 'w') as f: + gt_points.tofile(f) + + if (used_classes is None) or gt_names[i] in used_classes: + db_path = str(filepath.relative_to(self.root_path)) # gt_database/xxxxx.bin + db_info = {'name': gt_names[i], 'path': db_path, 'image_idx': sample_idx, 'gt_idx': i, + 'box3d_lidar': gt_boxes[i], 'num_points_in_gt': gt_points.shape[0]} + if gt_names[i] in all_db_infos: + all_db_infos[gt_names[i]].append(db_info) + else: + all_db_infos[gt_names[i]] = [db_info] + for k, v in all_db_infos.items(): + print('Database %s: %d' % (k, len(v))) + + with open(db_info_save_path, 'wb') as f: + pickle.dump(all_db_infos, f) + + +def create_truckscenes_info(version, data_path, save_path, max_sweeps=10, with_cam=False): + from truckscenes.truckscenes import TruckScenes + from truckscenes.utils import splits + from . import truckscenes_utils + data_path = data_path / version + save_path = save_path / version + + assert version in ['v1.1-trainval', 'v1.1-test', 'v1.1-mini'] + if version == 'v1.1-trainval': + train_scenes = splits.train + val_scenes = splits.val + elif version == 'v1.1-test': + train_scenes = splits.test + val_scenes = [] + elif version == 'v1.1-mini': + train_scenes = splits.mini_train + val_scenes = splits.mini_val + else: + raise NotImplementedError + + nusc = TruckScenes(version=version, dataroot=data_path, verbose=True) + available_scenes = truckscenes_utils.get_available_scenes(nusc) + available_scene_names = [s['name'] for s in available_scenes] + train_scenes = list(filter(lambda x: x in available_scene_names, train_scenes)) + val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes)) + train_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in train_scenes]) + val_scenes = set([available_scenes[available_scene_names.index(s)]['token'] for s in val_scenes]) + + print('%s: train scene(%d), val scene(%d)' % (version, len(train_scenes), len(val_scenes))) + + train_nusc_infos, val_nusc_infos = truckscenes_utils.fill_trainval_infos( + data_path=data_path, nusc=nusc, train_scenes=train_scenes, val_scenes=val_scenes, + test='test' in version, max_sweeps=max_sweeps, with_cam=with_cam + ) + + if version == 'v1.1-test': + print('test sample: %d' % len(train_nusc_infos)) + with open(save_path / f'truckscenes_infos_{max_sweeps}sweeps_test.pkl', 'wb') as f: + pickle.dump(train_nusc_infos, f) + else: + print('train sample: %d, val sample: %d' % (len(train_nusc_infos), len(val_nusc_infos))) + with open(save_path / f'truckscenes_infos_{max_sweeps}sweeps_train.pkl', 'wb') as f: + pickle.dump(train_nusc_infos, f) + with open(save_path / f'truckscenes_infos_{max_sweeps}sweeps_val.pkl', 'wb') as f: + pickle.dump(val_nusc_infos, f) + + +if __name__ == '__main__': + import yaml + import argparse + from pathlib import Path + from easydict import EasyDict + + parser = argparse.ArgumentParser(description='arg parser') + parser.add_argument('--cfg_file', type=str, default=None, help='specify the config of dataset') + parser.add_argument('--func', type=str, default='create_truckscenes_infos', help='') + parser.add_argument('--version', type=str, default='v1.1-trainval', help='') + parser.add_argument('--with_cam', action='store_true', default=False, help='use camera or not') + args = parser.parse_args() + + if args.func == 'create_truckscenes_infos': + dataset_cfg = EasyDict(yaml.safe_load(open(args.cfg_file))) + ROOT_DIR = (Path(__file__).resolve().parent / '../../../').resolve() + dataset_cfg.VERSION = args.version + create_truckscenes_info( + version=dataset_cfg.VERSION, + data_path=ROOT_DIR / 'data' / 'truckscenes', + save_path=ROOT_DIR / 'data' / 'truckscenes', + max_sweeps=dataset_cfg.MAX_SWEEPS, + with_cam=args.with_cam + ) + + truckscenes_dataset = TruckScenesMultiRadarDataset( + dataset_cfg=dataset_cfg, class_names=None, + root_path=ROOT_DIR / 'data' / 'truckscenes', + logger=common_utils.create_logger(), training=True + ) + truckscenes_dataset.create_groundtruth_database(max_sweeps=dataset_cfg.MAX_SWEEPS) diff --git a/pcdet/datasets/truckscenes_multiradar/truckscenes_utils.py b/pcdet/datasets/truckscenes_multiradar/truckscenes_utils.py new file mode 100644 index 000000000..a1eed470a --- /dev/null +++ b/pcdet/datasets/truckscenes_multiradar/truckscenes_utils.py @@ -0,0 +1,675 @@ +""" +The NuScenes data pre-processing and evaluation is modified from +https://github.com/traveller59/second.pytorch and https://github.com/poodarchu/Det3D +""" + +import operator +from functools import reduce +from pathlib import Path + +import numpy as np +import tqdm +from truckscenes.utils.data_classes import Box +from truckscenes.utils.geometry_utils import transform_matrix +from pyquaternion import Quaternion + +DefaultAttribute = { + "car": "vehicle.moving", + "truck": "vehicle.moving", + "bus": "vehicle.moving", + "trailer": "vehicle.parked", + "other_vehicle": "vehicle.moving", + "motorcycle": "vehicle.moving", + "bicycle": "cycle.with_rider", + "pedestrian": "pedestrian.moving", + "traffic_cone": "traffic_sign.temporary", + "barrier": "", + "animal": "", + "traffic_sign": "traffic_sign.pole_mounted" + } + +# map_name_from_general_to_detection = { +# 'human.pedestrian.adult': 'pedestrian', +# 'human.pedestrian.child': 'pedestrian', +# 'human.pedestrian.wheelchair': 'ignore', +# 'human.pedestrian.stroller': 'ignore', +# 'human.pedestrian.personal_mobility': 'ignore', +# 'human.pedestrian.police_officer': 'pedestrian', +# 'human.pedestrian.construction_worker': 'pedestrian', +# 'animal': 'ignore', +# 'vehicle.car': 'car', +# 'vehicle.motorcycle': 'motorcycle', +# 'vehicle.bicycle': 'bicycle', +# 'vehicle.bus.bendy': 'bus', +# 'vehicle.bus.rigid': 'bus', +# 'vehicle.truck': 'truck', +# 'vehicle.construction': 'construction_vehicle', +# 'vehicle.emergency.ambulance': 'ignore', +# 'vehicle.emergency.police': 'ignore', +# 'vehicle.trailer': 'trailer', +# 'movable_object.barrier': 'barrier', +# 'movable_object.trafficcone': 'traffic_cone', +# 'movable_object.pushable_pullable': 'ignore', +# 'movable_object.debris': 'ignore', +# 'static_object.bicycle_rack': 'ignore', +# } + +map_name_from_general_to_detection = { + 'animal': 'animal', + 'human.pedestrian.adult': 'pedestrian', + 'human.pedestrian.child': 'pedestrian', + 'human.pedestrian.construction': 'pedestrian', + 'human.pedestrian.construction_worker': 'pedestrian', + 'human.pedestrian.stroller': 'ignore', + 'human.pedestrian.wheelchair': 'ignore', + 'human.pedestrian.personal_mobility': 'ignore', + 'human.pedestrian.police_officer': 'pedestrian', + 'movable_object.barrier': 'barrier', + 'movable_object.trafficcone': 'traffic_cone', + 'movable_object.pushable_pullable': 'ignore', + 'movable_object.debris': 'ignore', + 'static_object.traffic_sign': 'traffic_sign', + 'static_object.bicycle_rack': 'ignore', + 'vehicle.bicycle': 'bicycle', + 'vehicle.bus.bendy': 'bus', + 'vehicle.bus.rigid': 'bus', + 'vehicle.car': 'car', + 'vehicle.construction': 'other_vehicle', + 'vehicle.ego_trailer': 'ignore', + 'vehicle.motorcycle': 'motorcycle', + 'vehicle.other': 'other_vehicle', + 'vehicle.trailer': 'trailer', + 'vehicle.train': 'ignore', + 'vehicle.truck': 'truck', + 'vehicle.emergency.ambulance': 'ignore', + 'vehicle.emergency.police': 'ignore', +} + +cls_attr_dist = { + 'barrier': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 0, + 'vehicle.parked': 0, + 'vehicle.stopped': 0, + }, + 'bicycle': { + 'cycle.with_rider': 2791, + 'cycle.without_rider': 8946, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 0, + 'vehicle.parked': 0, + 'vehicle.stopped': 0, + }, + 'bus': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 9092, + 'vehicle.parked': 3294, + 'vehicle.stopped': 3881, + }, + 'car': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 114304, + 'vehicle.parked': 330133, + 'vehicle.stopped': 46898, + }, + 'construction_vehicle': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 882, + 'vehicle.parked': 11549, + 'vehicle.stopped': 2102, + }, + 'ignore': { + 'cycle.with_rider': 307, + 'cycle.without_rider': 73, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 165, + 'vehicle.parked': 400, + 'vehicle.stopped': 102, + }, + 'motorcycle': { + 'cycle.with_rider': 4233, + 'cycle.without_rider': 8326, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 0, + 'vehicle.parked': 0, + 'vehicle.stopped': 0, + }, + 'pedestrian': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 157444, + 'pedestrian.sitting_lying_down': 13939, + 'pedestrian.standing': 46530, + 'vehicle.moving': 0, + 'vehicle.parked': 0, + 'vehicle.stopped': 0, + }, + 'traffic_cone': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 0, + 'vehicle.parked': 0, + 'vehicle.stopped': 0, + }, + 'trailer': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 3421, + 'vehicle.parked': 19224, + 'vehicle.stopped': 1895, + }, + 'truck': { + 'cycle.with_rider': 0, + 'cycle.without_rider': 0, + 'pedestrian.moving': 0, + 'pedestrian.sitting_lying_down': 0, + 'pedestrian.standing': 0, + 'vehicle.moving': 21339, + 'vehicle.parked': 55626, + 'vehicle.stopped': 11097, + }, +} + + +def get_available_scenes(nusc): + available_scenes = [] + print('total scene num:', len(nusc.scene)) + for scene in nusc.scene: + scene_token = scene['token'] + scene_rec = nusc.get('scene', scene_token) + sample_rec = nusc.get('sample', scene_rec['first_sample_token']) + sd_rec = nusc.get('sample_data', sample_rec['data']['RADAR_LEFT_FRONT']) + has_more_frames = True + scene_not_exist = False + while has_more_frames: + lidar_path, boxes, _ = nusc.get_sample_data(sd_rec['token']) + if not Path(lidar_path).exists(): + scene_not_exist = True + break + else: + break + # if not sd_rec['next'] == '': + # sd_rec = nusc.get('sample_data', sd_rec['next']) + # else: + # has_more_frames = False + if scene_not_exist: + continue + available_scenes.append(scene) + print('exist scene num:', len(available_scenes)) + return available_scenes + +def get_sample_data(nusc, sample_data_token, selected_anntokens=None): + """ + Returns the data path as well as all annotations related to that sample_data. + Note that the boxes are transformed into the current sensor's coordinate frame. + Args: + nusc: + sample_data_token: Sample_data token. + selected_anntokens: If provided only return the selected annotation. + + Returns: + + """ + # Retrieve sensor & pose records + sd_record = nusc.get('sample_data', sample_data_token) + cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) + sensor_record = nusc.get('sensor', cs_record['sensor_token']) + pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) + + data_path = nusc.get_sample_data_path(sample_data_token) + + if sensor_record['modality'] == 'camera': + cam_intrinsic = np.array(cs_record['camera_intrinsic']) + imsize = (sd_record['width'], sd_record['height']) + else: + cam_intrinsic = imsize = None + + # Retrieve all sample annotations and map to sensor coordinate system. + if selected_anntokens is not None: + boxes = list(map(nusc.get_box, selected_anntokens)) + else: + boxes = nusc.get_boxes(sample_data_token) + + # Make list of Box objects including coord system transforms. + box_list = [] + for box in boxes: + box.velocity = nusc.box_velocity(box.token) + # Move box to ego vehicle coord system + # box.translate(-np.array(pose_record['translation'])) + # box.rotate(Quaternion(pose_record['rotation']).inverse) + + # Move box to sensor coord system + box.translate(-np.array(cs_record['translation'])) + box.rotate(Quaternion(cs_record['rotation']).inverse) + + box_list.append(box) + + return data_path, box_list, cam_intrinsic + + +def quaternion_yaw(q: Quaternion) -> float: + """ + Calculate the yaw angle from a quaternion. + Note that this only works for a quaternion that represents a box in lidar or global coordinate frame. + It does not work for a box in the camera frame. + :param q: Quaternion of interest. + :return: Yaw angle in radians. + """ + + # Project into xy plane. + v = np.dot(q.rotation_matrix, np.array([1, 0, 0])) + + # Measure yaw using arctan. + yaw = np.arctan2(v[1], v[0]) + + return yaw + + +def obtain_sensor2top( + nusc, sensor_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, sensor_type="lidar" +): + """Obtain the info with RT matric from general sensor to Top LiDAR. + + Args: + nusc (class): Dataset class in the nuScenes dataset. + sensor_token (str): Sample data token corresponding to the + specific sensor type. + l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3). + l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego + in shape (3, 3). + e2g_t (np.ndarray): Translation from ego to global in shape (1, 3). + e2g_r_mat (np.ndarray): Rotation matrix from ego to global + in shape (3, 3). + sensor_type (str): Sensor to calibrate. Default: 'lidar'. + + Returns: + sweep (dict): Sweep information after transformation. + """ + sd_rec = nusc.get("sample_data", sensor_token) + cs_record = nusc.get("calibrated_sensor", sd_rec["calibrated_sensor_token"]) + pose_record = nusc.get("ego_pose", sd_rec["ego_pose_token"]) + data_path = str(nusc.get_sample_data_path(sd_rec["token"])) + # if os.getcwd() in data_path: # path from lyftdataset is absolute path + # data_path = data_path.split(f"{os.getcwd()}/")[-1] # relative path + sweep = { + "data_path": data_path, + "type": sensor_type, + "sample_data_token": sd_rec["token"], + "sensor2ego_translation": cs_record["translation"], + "sensor2ego_rotation": cs_record["rotation"], + "ego2global_translation": pose_record["translation"], + "ego2global_rotation": pose_record["rotation"], + "timestamp": sd_rec["timestamp"], + } + l2e_r_s = sweep["sensor2ego_rotation"] + l2e_t_s = sweep["sensor2ego_translation"] + e2g_r_s = sweep["ego2global_rotation"] + e2g_t_s = sweep["ego2global_translation"] + + # obtain the RT from sensor to Top LiDAR + # sweep->ego->global->ego'->lidar + l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix + e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix + R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ ( + np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T + ) + T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ ( + np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T + ) + T -= ( + e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) + + l2e_t @ np.linalg.inv(l2e_r_mat).T + ).squeeze(0) + sweep["sensor2lidar_rotation"] = R.T # points @ R.T + T + sweep["sensor2lidar_translation"] = T + return sweep + + +def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, max_sweeps=10, with_cam=False): + train_nusc_infos = [] + val_nusc_infos = [] + progress_bar = tqdm.tqdm(total=len(nusc.sample), desc='create_info', dynamic_ncols=True) + + ref_chan = 'RADAR_LEFT_FRONT' # The radar channel from which we track back n sweeps to aggregate the point cloud. + chans = ['RADAR_LEFT_FRONT', 'RADAR_RIGHT_FRONT'] # The reference channel of the current sample_rec that the point clouds are mapped to. + + for index, sample in enumerate(nusc.sample): + progress_bar.update() + + ref_sd_token = sample['data'][ref_chan] + ref_sd_rec = nusc.get('sample_data', ref_sd_token) + ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token']) + ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token']) + ref_time = 1e-6 * ref_sd_rec['timestamp'] + + ref_lidar_path, ref_boxes, _ = get_sample_data(nusc, ref_sd_token) + + ref_cam_front_token = sample['data']['CAMERA_LEFT_FRONT'] + ref_cam_path, _, ref_cam_intrinsic = nusc.get_sample_data(ref_cam_front_token) + + # Homogeneous transform from ego car frame to reference frame + ref_from_car = transform_matrix( + ref_cs_rec['translation'], Quaternion(ref_cs_rec['rotation']), inverse=True + ) + + # Homogeneous transformation matrix from global to _current_ ego car frame + car_from_global = transform_matrix( + ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), inverse=True, + ) + + info = { + 'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(), + 'cam_front_path': Path(ref_cam_path).relative_to(data_path).__str__(), + 'cam_intrinsic': ref_cam_intrinsic, + 'token': sample['token'], + 'sweeps': [], + 'ref_from_car': ref_from_car, + 'car_from_global': car_from_global, + 'timestamp': ref_time, + } + if with_cam: + info['cams'] = dict() + l2e_r = ref_cs_rec["rotation"] + l2e_t = ref_cs_rec["translation"], + e2g_r = ref_pose_rec["rotation"] + e2g_t = ref_pose_rec["translation"] + l2e_r_mat = Quaternion(l2e_r).rotation_matrix + e2g_r_mat = Quaternion(e2g_r).rotation_matrix + + # obtain 6 image's information per frame + camera_types = [ + "CAMERA_LEFT_FRONT", + "CAMERA_RIGHT_FRONT", + "CAMERA_LEFT_BACK", + "CAMERA_RIGHT_BACK", + ] + for cam in camera_types: + cam_token = sample["data"][cam] + cam_path, _, camera_intrinsics = nusc.get_sample_data(cam_token) + cam_info = obtain_sensor2top( + nusc, cam_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, cam + ) + cam_info['data_path'] = Path(cam_info['data_path']).relative_to(data_path).__str__() + cam_info.update(camera_intrinsics=camera_intrinsics) + info["cams"].update({cam: cam_info}) + + info['radars'] = dict() + for chan in chans: + if chan not in sample['data']: + print(f"Warning: {chan} not in sample_data for sample {sample['token']}, skip this channel.") + continue + + radar_token = sample['data'][chan] + curr_sd_rec = nusc.get('sample_data', radar_token) + current_cs_rec = nusc.get('calibrated_sensor', curr_sd_rec['calibrated_sensor_token']) + current_pose_rec = nusc.get('ego_pose', curr_sd_rec['ego_pose_token']) + ref_time = 1e-6 * curr_sd_rec['timestamp'] + + # Transform from Sensor to Ego Car + car_from_radar = transform_matrix( + current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False + ) + + # Transform from Ego Car to Global Map + global_from_radar_car = transform_matrix( + current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False + ) + + radar_from_car = transform_matrix( + current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=True + ) + car_from_global_radar = transform_matrix( + current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=True + ) + + sweeps = [] + while len(sweeps) < max_sweeps - 1: + if curr_sd_rec['prev'] == '': + if len(sweeps) == 0: + sweep = { + 'lidar_path': Path(ref_lidar_path).relative_to(data_path).__str__(), + 'sample_data_token': curr_sd_rec['token'], + 'transform_matrix': None, + 'time_lag': curr_sd_rec['timestamp'] * 0, + } + sweeps.append(sweep) + else: + sweeps.append(sweeps[-1]) + else: + curr_sd_rec = nusc.get('sample_data', curr_sd_rec['prev']) + + # Get past pose + current_pose_rec = nusc.get('ego_pose', curr_sd_rec['ego_pose_token']) + global_from_car = transform_matrix( + current_pose_rec['translation'], Quaternion(current_pose_rec['rotation']), inverse=False, + ) + + # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. + current_cs_rec = nusc.get( + 'calibrated_sensor', curr_sd_rec['calibrated_sensor_token'] + ) + car_from_current = transform_matrix( + current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']), inverse=False, + ) + + # This maps: Past Radar -> Past Ego -> Global -> Present Ego -> Present Radar + tm = reduce(np.dot, [ + radar_from_car, # Present Ego -> Present Radar + car_from_global_radar, # Global -> Present Ego + global_from_car, # Past Ego -> Global + car_from_current # Past Radar -> Past Ego + ]) + + lidar_path = nusc.get_sample_data_path(curr_sd_rec['token']) + + time_lag = ref_time - 1e-6 * curr_sd_rec['timestamp'] + + sweep = { + 'lidar_path': Path(lidar_path).relative_to(data_path).__str__(), + 'sample_data_token': curr_sd_rec['token'], + 'transform_matrix': tm, + 'global_from_car': global_from_car, + 'car_from_current': car_from_current, + 'time_lag': time_lag, + } + sweeps.append(sweep) + + info['radars'][chan] = { + 'lidar_path': Path(nusc.get_sample_data_path(radar_token)).relative_to(data_path).__str__(), + 'car_from_radar': car_from_radar, + 'global_from_radar_car': global_from_radar_car, + 'sweeps': sweeps + } + + assert len(info['radars'][chan]['sweeps']) == max_sweeps - 1, \ + f"sweep {curr_sd_rec['token']} only has {len(info['radars'][chan]['sweeps'])} sweeps, " \ + f"you should duplicate to sweep num {max_sweeps - 1}" + + if not test: + annotations = [nusc.get('sample_annotation', token) for token in sample['anns']] + + # the filtering gives 0.5~1 map improvement + num_lidar_pts = np.array([anno['num_lidar_pts'] for anno in annotations]) + num_radar_pts = np.array([anno['num_radar_pts'] for anno in annotations]) + mask = (num_radar_pts > 0) + + # New boxes in global frame + box = [nusc.get_box(token) for token in sample['anns']] + + # pull it down to the flat Ego/IMU frame + for b in box: + b.translate(-np.array(ref_pose_rec['translation'])) + b.rotate(Quaternion(ref_pose_rec['rotation']).inverse) + + locs = np.array([b.center for b in box]).reshape(-1, 3) + dims = np.array([b.wlh for b in box]).reshape(-1, 3)[:, [1, 0, 2]] # wlh == > dxdydz (lwh) + velocity = np.array([b.velocity for b in box]).reshape(-1, 3) + rots = np.array([quaternion_yaw(b.orientation) for b in box]).reshape(-1, 1) + names = np.array([b.name for b in box]) + tokens = np.array([b.token for b in box]) + gt_boxes = np.concatenate([locs, dims, rots, velocity[:, :2]], axis=1) + + assert len(annotations) == len(gt_boxes) == len(velocity) + + info['gt_boxes'] = gt_boxes[mask, :] + info['gt_boxes_velocity'] = velocity[mask, :] + info['gt_names'] = np.array([map_name_from_general_to_detection[name] for name in names])[mask] + info['gt_boxes_token'] = tokens[mask] + info['num_lidar_pts'] = num_lidar_pts[mask] + info['num_radar_pts'] = num_radar_pts[mask] + + if sample['scene_token'] in train_scenes: + train_nusc_infos.append(info) + else: + val_nusc_infos.append(info) + + progress_bar.close() + return train_nusc_infos, val_nusc_infos + + +def boxes_lidar_to_nusenes(det_info): + boxes3d = det_info['boxes_lidar'] + scores = det_info['score'] + labels = det_info['pred_labels'] + + box_list = [] + for k in range(boxes3d.shape[0]): + quat = Quaternion(axis=[0, 0, 1], radians=boxes3d[k, 6]) + velocity = (*boxes3d[k, 7:9], 0.0) if boxes3d.shape[1] == 9 else (0.0, 0.0, 0.0) + box = Box( + boxes3d[k, :3], + boxes3d[k, [4, 3, 5]], # wlh + quat, label=labels[k], score=scores[k], velocity=velocity, + ) + box_list.append(box) + return box_list + + +def lidar_nusc_box_to_global(nusc, boxes, sample_token): + s_record = nusc.get('sample', sample_token) + sample_data_token = s_record['data']['RADAR_LEFT_FRONT'] + + sd_record = nusc.get('sample_data', sample_data_token) + cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) + sensor_record = nusc.get('sensor', cs_record['sensor_token']) + pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) + + data_path = nusc.get_sample_data_path(sample_data_token) + box_list = [] + for box in boxes: + # Move box to ego vehicle coord system + # box.rotate(Quaternion(cs_record['rotation'])) + # box.translate(np.array(cs_record['translation'])) + # Move box to global coord system + box.rotate(Quaternion(pose_record['rotation'])) + box.translate(np.array(pose_record['translation'])) + box_list.append(box) + return box_list + + +def transform_det_annos_to_nusc_annos(det_annos, nusc): + nusc_annos = { + 'results': {}, + 'meta': None, + } + + for det in det_annos: + annos = [] + box_list = boxes_lidar_to_nusenes(det) + box_list = lidar_nusc_box_to_global( + nusc=nusc, boxes=box_list, sample_token=det['metadata']['token'] + ) + + for k, box in enumerate(box_list): + name = det['name'][k] + if np.sqrt(box.velocity[0] ** 2 + box.velocity[1] ** 2) > 0.2: + if name in ['car', 'construction_vehicle', 'bus', 'truck', 'trailer']: + attr = 'vehicle.moving' + elif name in ['bicycle', 'motorcycle']: + attr = 'cycle.with_rider' + else: + attr = None + else: + if name in ['pedestrian']: + attr = 'pedestrian.standing' + elif name in ['bus']: + attr = 'vehicle.stopped' + else: + attr = None + attr = attr if attr is not None else max( + cls_attr_dist[name].items(), key=operator.itemgetter(1))[0] + nusc_anno = { + 'sample_token': det['metadata']['token'], + 'translation': box.center.tolist(), + 'size': box.wlh.tolist(), + 'rotation': box.orientation.elements.tolist(), + 'velocity': box.velocity[:2].tolist(), + 'detection_name': name, + 'detection_score': box.score, + 'attribute_name': attr + } + annos.append(nusc_anno) + + nusc_annos['results'].update({det["metadata"]["token"]: annos}) + + return nusc_annos + + +def format_truckscenes_results(metrics, class_names, version='default'): + result = '----------------Nuscene %s results-----------------\n' % version + for name in class_names: + threshs = ', '.join(list(metrics['all']['label_aps'][name].keys())) + ap_list = list(metrics['all']['label_aps'][name].values()) + + err_name =', '.join([x.split('_')[0] for x in list(metrics['all']['label_tp_errors'][name].keys())]) + error_list = list(metrics['all']['label_tp_errors'][name].values()) + + result += f'***{name} error@{err_name} | AP@{threshs}\n' + result += ', '.join(['%.2f' % x for x in error_list]) + ' | ' + result += ', '.join(['%.2f' % (x * 100) for x in ap_list]) + result += f" | mean AP: {metrics['all']['mean_dist_aps'][name]}" + result += '\n' + + result += '--------------average performance-------------\n' + details = {} + for key, val in metrics['all']['tp_errors'].items(): + result += '%s:\t %.4f\n' % (key, val) + details[key] = val + + result += 'mAP:\t %.4f\n' % metrics['all']['mean_ap'] + result += 'NDS:\t %.4f\n' % metrics['all']['nd_score'] + + details.update({ + 'mAP': metrics['all']['mean_ap'], + 'NDS': metrics['all']['nd_score'], + }) + + return result, details diff --git a/tools/cfgs/custom_models/PP.yaml b/tools/cfgs/custom_models/PP.yaml new file mode 100644 index 000000000..c06d8adbd --- /dev/null +++ b/tools/cfgs/custom_models/PP.yaml @@ -0,0 +1,178 @@ +CLASS_NAMES: ['Car', 'Cyclist', 'Pedestrian'] + +DATA_CONFIG: + _BASE_CONFIG_: /media/franco/hdd/matteogombia/OpenPCDet/tools/cfgs/dataset_configs/custom_dataset.yaml + FILTER_MIN_POINTS_IN_GT: 1 + + #POINT_CLOUD_RANGE: [0, -25.6, -10.0, 307.2, 25.6, 10.0] + POINT_CLOUD_RANGE: [0, -25.6, -10.0, 153.6, 25.6, 10.0] + DATA_PROCESSOR: + - NAME: mask_points_and_boxes_outside_range + REMOVE_OUTSIDE_BOXES: True + + - NAME: shuffle_points + SHUFFLE_ENABLED: { + 'train': True, + 'test': True + } + + - NAME: transform_points_to_voxels + VOXEL_SIZE: [0.32, 0.32, 20.0] + MAX_POINTS_PER_VOXEL: 10 + MAX_NUMBER_OF_VOXELS: { + 'train': 5000, + 'test': 5000 + } + +MODEL: + NAME: PointPillar + + VFE: + NAME: PillarVFE + WITH_DISTANCE: True + USE_ABSLOTE_XYZ: True + USE_NORM: True + NUM_FILTERS: [64] + + MAP_TO_BEV: + NAME: PointPillarScatter + NUM_BEV_FEATURES: 64 + + BACKBONE_2D: + NAME: BaseBEVBackbone + LAYER_NUMS: [3, 5, 5] + LAYER_STRIDES: [2, 2, 2] + NUM_FILTERS: [64, 128, 256] + UPSAMPLE_STRIDES: [1, 2, 4] + NUM_UPSAMPLE_FILTERS: [128, 128, 128] + + DENSE_HEAD: + NAME: AnchorHeadMulti + CLASS_AGNOSTIC: False + + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + USE_MULTIHEAD: True + SEPARATE_MULTIHEAD: True + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[4.578, 1.836, 1.736]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.132], + 'align_center': False, + 'feature_map_stride': 2, + 'matched_threshold': 0.25, + 'unmatched_threshold': 0.15 + }, + { + 'class_name': 'Cyclist', + 'anchor_sizes': [[1.834, 0.623, 1.468]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.063], + 'align_center': False, + 'feature_map_stride': 2, + 'matched_threshold': 0.20, + 'unmatched_threshold': 0.10 + }, + { + 'class_name': 'Pedestrian', + 'anchor_sizes': [[0.732, 0.779, 1.814]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.178], + 'align_center': False, + 'feature_map_stride': 2, + 'matched_threshold': 0.20, + 'unmatched_threshold': 0.10 + }, + ] + + SHARED_CONV_NUM_FILTER: 64 + + RPN_HEAD_CFGS: [ + { + 'HEAD_CLS_NAME': ['Car'], + }, + { + 'HEAD_CLS_NAME': ['Cyclist'], + }, + { + 'HEAD_CLS_NAME': ['Pedestrian'], + }, + ] + SEPARATE_REG_CONFIG: + NUM_MIDDLE_CONV: 1 + NUM_MIDDLE_FILTER: 64 + REG_LIST: ['reg:2', 'height:1', 'size:3', 'angle:2'] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + BOX_CODER_CONFIG: { + 'code_size': 7, + 'encode_angle_by_sincos': True + } + + + # LOSS_CONFIG: + # REG_LOSS_TYPE: WeightedL1Loss + # LOSS_WEIGHTS: { + # 'pos_cls_weight': 1.0, + # 'neg_cls_weight': 2.0, + # 'cls_weight': 1.0, + # 'loc_weight': 0.25, + # 'dir_weight': 0.2, + # 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2] + # } + LOSS_CONFIG: + REG_LOSS_TYPE: WeightedL1Loss + LOSS_WEIGHTS: { + 'pos_cls_weight': 2.0, + 'neg_cls_weight': 1.0, + 'cls_weight': 1.0, + 'loc_weight': 1.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.3 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: True + NMS_TYPE: nms_gpu + NMS_THRESH: 0.001 + NMS_PRE_MAXSIZE: 1000 + NMS_POST_MAXSIZE: 83 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 8 + NUM_EPOCHS: 30 + + OPTIMIZER: adam_onecycle + LR: 0.001 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/dataset_configs/custom_dataset.yaml b/tools/cfgs/dataset_configs/custom_dataset.yaml index f8fedc9f3..c0bf5ab99 100644 --- a/tools/cfgs/dataset_configs/custom_dataset.yaml +++ b/tools/cfgs/dataset_configs/custom_dataset.yaml @@ -1,55 +1,42 @@ DATASET: 'CustomDataset' -DATA_PATH: '../data/custom' +DATA_PATH: '/media/franco/hdd/dataset/radar_data' -POINT_CLOUD_RANGE: [-75.2, -75.2, -2, 75.2, 75.2, 4] +AUXILIARY_RADAR: ['right', 'left'] +#AUXILIARY_RADAR: [] #Keep just the main radar (front) + +MODE_WITH_LABELS: ["train", "val", "test"] # For training and evaluation +#MODE_WITH_LABELS: ["train", "val"] # For trying unannotated samples + +COMP_POINTS_MOTIONS: False -MAP_CLASS_TO_KITTI: { - 'Vehicle': 'Car', - 'Pedestrian': 'Pedestrian', - 'Cyclist': 'Cyclist', -} + +MAX_SWEEPS: 10 DATA_SPLIT: { 'train': train, 'test': val } +DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + + INFO_PATH: { - 'train': [custom_infos_train.pkl], - 'test': [custom_infos_val.pkl], + 'train': [truckscenes_infos_8sweeps_train.pkl], + 'test': [truckscenes_infos_8sweeps_val.pkl], } + + POINT_FEATURE_ENCODING: { encoding_type: absolute_coordinates_encoding, - used_feature_list: ['x', 'y', 'z', 'intensity'], - src_feature_list: ['x', 'y', 'z', 'intensity'], + used_feature_list: ['x', 'y', 'z', 'rcs', 'vx_comp', 'vy_comp', 'timestamp'], + src_feature_list: ['x', 'y', 'z', 'rcs', 'vx_comp', 'vy_comp', 'timestamp'], } -DATA_AUGMENTOR: - DISABLE_AUG_LIST: ['placeholder'] - AUG_CONFIG_LIST: - - NAME: gt_sampling - USE_ROAD_PLANE: False - DB_INFO_PATH: - - custom_dbinfos_train.pkl - PREPARE: { - filter_by_min_points: ['Vehicle:5', 'Pedestrian:5', 'Cyclist:5'], - } - - SAMPLE_GROUPS: ['Vehicle:20', 'Pedestrian:15', 'Cyclist:15'] - NUM_POINT_FEATURES: 4 - DATABASE_WITH_FAKELIDAR: False - REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] - LIMIT_WHOLE_SCENE: True - - - NAME: random_world_flip - ALONG_AXIS_LIST: ['x', 'y'] - - - NAME: random_world_rotation - WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] - - - NAME: random_world_scaling - WORLD_SCALE_RANGE: [0.95, 1.05] DATA_PROCESSOR: - NAME: mask_points_and_boxes_outside_range @@ -58,13 +45,18 @@ DATA_PROCESSOR: - NAME: shuffle_points SHUFFLE_ENABLED: { 'train': True, - 'test': False + 'test': True } - NAME: transform_points_to_voxels - VOXEL_SIZE: [0.1, 0.1, 0.15] - MAX_POINTS_PER_VOXEL: 5 + #VOXEL_SIZE: [0.4, 0.4, 8.0] + VOXEL_SIZE: [0.32, 0.32, 20.0] + MAX_POINTS_PER_VOXEL: 10 MAX_NUMBER_OF_VOXELS: { - 'train': 150000, - 'test': 150000 - } \ No newline at end of file + 'train': 5000, + 'test': 5000 + } + +#POINT_CLOUD_RANGE: [-153.2, -153.2, -5.0, 153.2, 153.2, 3.0] +POINT_CLOUD_RANGE: [0, -25.6, -10.0, 307.2, 25.6, 10.0] +PRED_VELOCITY: False diff --git a/tools/cfgs/dataset_configs/nuscenes_dataset.yaml b/tools/cfgs/dataset_configs/nuscenes_dataset.yaml index 5aa89d802..93438a95d 100644 --- a/tools/cfgs/dataset_configs/nuscenes_dataset.yaml +++ b/tools/cfgs/dataset_configs/nuscenes_dataset.yaml @@ -2,44 +2,30 @@ DATASET: 'NuScenesDataset' DATA_PATH: '../data/nuscenes' VERSION: 'v1.0-trainval' -MAX_SWEEPS: 10 -PRED_VELOCITY: True -SET_NAN_VELOCITY_TO_ZEROS: True -FILTER_MIN_POINTS_IN_GT: 1 +MAX_SWEEPS: 6 DATA_SPLIT: { 'train': train, 'test': val } -INFO_PATH: { - 'train': [nuscenes_infos_10sweeps_train.pkl], - 'test': [nuscenes_infos_10sweeps_val.pkl], -} - -POINT_CLOUD_RANGE: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0] - -BALANCED_RESAMPLING: True - DATA_AUGMENTOR: DISABLE_AUG_LIST: ['placeholder'] AUG_CONFIG_LIST: - NAME: gt_sampling DB_INFO_PATH: - - nuscenes_dbinfos_10sweeps_withvelo.pkl + - nuscenes_dbinfos_6sweeps_withvelo.pkl PREPARE: { filter_by_min_points: [ - 'car:5','truck:5', 'construction_vehicle:5', 'bus:5', 'trailer:5', - 'barrier:5', 'motorcycle:5', 'bicycle:5', 'pedestrian:5', 'traffic_cone:5' + 'car:1', 'bicycle:1', 'pedestrian:1' ], } SAMPLE_GROUPS: [ - 'car:2','truck:3', 'construction_vehicle:7', 'bus:4', 'trailer:6', - 'barrier:2', 'motorcycle:6', 'bicycle:6', 'pedestrian:2', 'traffic_cone:2' + 'car:6', 'pedestrian:2', 'bicycle:4' ] - NUM_POINT_FEATURES: 5 + NUM_POINT_FEATURES: 7 DATABASE_WITH_FAKELIDAR: False REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] LIMIT_WHOLE_SCENE: True @@ -54,10 +40,17 @@ DATA_AUGMENTOR: WORLD_SCALE_RANGE: [0.95, 1.05] +INFO_PATH: { + 'train': [nuscenes_infos_6sweeps_train.pkl], + 'test': [nuscenes_infos_6sweeps_val.pkl], +} + + + POINT_FEATURE_ENCODING: { encoding_type: absolute_coordinates_encoding, - used_feature_list: ['x', 'y', 'z', 'intensity', 'timestamp'], - src_feature_list: ['x', 'y', 'z', 'intensity', 'timestamp'], + used_feature_list: ['x', 'y', 'z', 'rcs', 'vx_comp', 'vy_comp', 'timestamp'], + src_feature_list: ['x', 'y', 'z','rcs', 'vx_comp', 'vy_comp', 'timestamp'], } @@ -72,9 +65,12 @@ DATA_PROCESSOR: } - NAME: transform_points_to_voxels - VOXEL_SIZE: [0.1, 0.1, 0.2] - MAX_POINTS_PER_VOXEL: 10 + VOXEL_SIZE: [0.4, 0.4, 8.0] + MAX_POINTS_PER_VOXEL: 20 MAX_NUMBER_OF_VOXELS: { - 'train': 60000, - 'test': 60000 + 'train': 30000, + 'test': 30000 } + +POINT_CLOUD_RANGE: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0] +PRED_VELOCITY: True diff --git a/tools/cfgs/dataset_configs/truckscenes_multiradar_5000_dataset.yaml b/tools/cfgs/dataset_configs/truckscenes_multiradar_5000_dataset.yaml new file mode 100644 index 000000000..71a8e1e1a --- /dev/null +++ b/tools/cfgs/dataset_configs/truckscenes_multiradar_5000_dataset.yaml @@ -0,0 +1,54 @@ +DATASET: 'TruckScenesMultiRadarDataset' +DATA_PATH: '../data/truckscenes' + +VERSION: 'v1.1-trainval' +MAX_SWEEPS: 10 + +DATA_SPLIT: { + 'train': train, + 'test': val +} + +DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + + +INFO_PATH: { + 'train': [truckscenes_infos_10sweeps_car_train.pkl], + 'test': [truckscenes_infos_10sweeps_car_val.pkl], +} + + + +POINT_FEATURE_ENCODING: { + encoding_type: absolute_coordinates_encoding, + used_feature_list: ['x', 'y', 'z', 'rcs', 'vx_comp', 'vy_comp', 'timestamp'], + src_feature_list: ['x', 'y', 'z', 'rcs', 'vx_comp', 'vy_comp', 'timestamp'], +} + + +DATA_PROCESSOR: + - NAME: mask_points_and_boxes_outside_range + REMOVE_OUTSIDE_BOXES: True + + - NAME: shuffle_points + SHUFFLE_ENABLED: { + 'train': True, + 'test': True + } + + - NAME: transform_points_to_voxels + #VOXEL_SIZE: [0.4, 0.4, 8.0] + VOXEL_SIZE: [0.32, 0.32, 20.0] + MAX_POINTS_PER_VOXEL: 10 + MAX_NUMBER_OF_VOXELS: { + 'train': 5000, + 'test': 5000 + } + +#POINT_CLOUD_RANGE: [-153.2, -153.2, -5.0, 153.2, 153.2, 3.0] +POINT_CLOUD_RANGE: [0, -25.6, -10.0, 153.6, 25.6, 10.0] +PRED_VELOCITY: False diff --git a/tools/cfgs/truckscenes_multiradar_models/anchors_5000.yaml b/tools/cfgs/truckscenes_multiradar_models/anchors_5000.yaml new file mode 100644 index 000000000..a9e3f1e71 --- /dev/null +++ b/tools/cfgs/truckscenes_multiradar_models/anchors_5000.yaml @@ -0,0 +1,178 @@ +CLASS_NAMES: ['car', 'bicycle', 'pedestrian'] + +DATA_CONFIG: + _BASE_CONFIG_: /media/franco/hdd/matteogombia/OpenPCDet/tools/cfgs/dataset_configs/truckscenes_multiradar_5000_dataset.yaml + FILTER_MIN_POINTS_IN_GT: 1 + + POINT_CLOUD_RANGE: [0, -25.6, -10.0, 153.6, 25.6, 10.0] + #POINT_CLOUD_RANGE: [-153.2, -153.2, -5.0, 153.2, 153.2, 3.0] + DATA_PROCESSOR: + - NAME: mask_points_and_boxes_outside_range + REMOVE_OUTSIDE_BOXES: True + + - NAME: shuffle_points + SHUFFLE_ENABLED: { + 'train': True, + 'test': True + } + + - NAME: transform_points_to_voxels + VOXEL_SIZE: [0.32, 0.32, 20.0] + MAX_POINTS_PER_VOXEL: 10 + MAX_NUMBER_OF_VOXELS: { + 'train': 5000, + 'test': 5000 + } + +MODEL: + NAME: PointPillar + + VFE: + NAME: PillarVFE + WITH_DISTANCE: True + USE_ABSLOTE_XYZ: True + USE_NORM: True + NUM_FILTERS: [64] + + MAP_TO_BEV: + NAME: PointPillarScatter + NUM_BEV_FEATURES: 64 + + BACKBONE_2D: + NAME: BaseBEVBackbone + LAYER_NUMS: [3, 5, 5] + LAYER_STRIDES: [2, 2, 2] + NUM_FILTERS: [64, 128, 256] + UPSAMPLE_STRIDES: [1, 2, 4] + NUM_UPSAMPLE_FILTERS: [128, 128, 128] + + DENSE_HEAD: + NAME: AnchorHeadMulti + CLASS_AGNOSTIC: False + + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + USE_MULTIHEAD: True + SEPARATE_MULTIHEAD: True + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'car', + 'anchor_sizes': [[4.578, 1.836, 1.736]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.132], + 'align_center': False, + 'feature_map_stride': 2, + 'matched_threshold': 0.25, + 'unmatched_threshold': 0.15 + }, + { + 'class_name': 'bicycle', + 'anchor_sizes': [[1.834, 0.623, 1.468]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.063], + 'align_center': False, + 'feature_map_stride': 2, + 'matched_threshold': 0.20, + 'unmatched_threshold': 0.10 + }, + { + 'class_name': 'pedestrian', + 'anchor_sizes': [[0.732, 0.779, 1.814]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.178], + 'align_center': False, + 'feature_map_stride': 2, + 'matched_threshold': 0.20, + 'unmatched_threshold': 0.10 + }, + ] + + SHARED_CONV_NUM_FILTER: 64 + + RPN_HEAD_CFGS: [ + { + 'HEAD_CLS_NAME': ['car'], + }, + { + 'HEAD_CLS_NAME': ['bicycle'], + }, + { + 'HEAD_CLS_NAME': ['pedestrian'], + }, + ] + SEPARATE_REG_CONFIG: + NUM_MIDDLE_CONV: 1 + NUM_MIDDLE_FILTER: 64 + REG_LIST: ['reg:2', 'height:1', 'size:3', 'angle:2'] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + BOX_CODER_CONFIG: { + 'code_size': 7, + 'encode_angle_by_sincos': True + } + + + # LOSS_CONFIG: + # REG_LOSS_TYPE: WeightedL1Loss + # LOSS_WEIGHTS: { + # 'pos_cls_weight': 1.0, + # 'neg_cls_weight': 2.0, + # 'cls_weight': 1.0, + # 'loc_weight': 0.25, + # 'dir_weight': 0.2, + # 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2] + # } + LOSS_CONFIG: + REG_LOSS_TYPE: WeightedL1Loss + LOSS_WEIGHTS: { + 'pos_cls_weight': 2.0, + 'neg_cls_weight': 1.0, + 'cls_weight': 1.0, + 'loc_weight': 1.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: nuscenes + + NMS_CONFIG: + MULTI_CLASSES_NMS: True + NMS_TYPE: nms_gpu + NMS_THRESH: 0.2 + NMS_PRE_MAXSIZE: 1000 + NMS_POST_MAXSIZE: 83 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 8 + NUM_EPOCHS: 30 + + OPTIMIZER: adam_onecycle + LR: 0.001 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10