diff --git a/depth.py b/depth.py new file mode 100644 index 0000000..d1daaca --- /dev/null +++ b/depth.py @@ -0,0 +1,75 @@ +import cv2 +import torch +import time +import numpy as np +# import open3d as o3d + +# Load MiDas model for depth estimation +model_type = "DPT_Hybrid" +midas = torch.hub.load("intel-isl/MiDas", model_type) +device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") +midas.to(device) +midas.eval() + +# Load transformers to resize and normalize the image +midas_transforms = torch.hub.load("intel-isl/MiDas", "transforms") +transform = midas_transforms.dpt_transform if model_type == "DPT_Large" or model_type == "DPT_Hybrid" else midas_transforms.small_transform + + +# Create Open3D visualizer +# vis = o3d.visualization.Visualizer() +# vis.create_window("3D Point Cloud") + +# Create a geometry object to store the point cloud +# pcd = o3d.geometry.PointCloud() + +def img2depth(img_rgb): + # Apply input transform + input_batch = transform(img_rgb).to(device) + + # Prediction and resize to original resolution + with torch.no_grad(): + prediction = midas(input_batch) + prediction = torch.nn.functional.interpolate( + prediction.unsqueeze(1), + size=img_rgb.shape[:2], + mode="bicubic", + align_corners=False, + ).squeeze() + + return prediction.cpu().numpy() + + # Apply colormap to depth map + # depth_colored = cv2.applyColorMap((depth_map / depth_map.max() * 255).astype(np.uint8), cv2.COLORMAP_JET) + + # Generate 3D point cloud with colors + # point_cloud = [] + # colors = [] + # for y in range(depth_map.shape[0]): + # for x in range(depth_map.shape[1]): + # depth = depth_map[y, x] + # if depth > 0: + # # Convert pixel coordinates to 3D space + # z = depth + # x_3d = x + # y_3d = depth_map.shape[0] - y + # # point_cloud.append([x_3d, y_3d, z]) + # colors.append(img_rgb[y, x]) + + # point_cloud = np.array(point_cloud) + # colors = np.array(colors) + + # Update point cloud data + # pcd.points = o3d.utility.Vector3dVector(point_cloud) + # pcd.colors = o3d.utility.Vector3dVector(colors / 255.0) + + # Scale point cloud to increase its size + # pcd.scale(1000.0, center=pcd.get_center()) + + # Visualize point cloud in 3D space + # vis.clear_geometries() + # vis.add_geometry(pcd) + # vis.poll_events() + # vis.update_renderer() + + # Display camera feed and depth map diff --git a/matchdepth.py b/matchdepth.py new file mode 100644 index 0000000..07ae0ad --- /dev/null +++ b/matchdepth.py @@ -0,0 +1,97 @@ +import numpy as np +import matplotlib.pyplot as plt +from sklearn.linear_model import LinearRegression + +def align_depth_maps(tof_depth, img_depth, tof_mask): + """ + Aligns the scale and bounds of image-to-depth output to match the TOF depth map + + Parameters: + ----------- + tof_depth : numpy.ndarray + Depth map from TOF sensor (incomplete but spatially accurate) + img_depth : numpy.ndarray + Depth map from image-to-depth model (complete but incorrectly scaled) + tof_mask : numpy.ndarray + Binary mask where 1 indicates valid TOF measurements + + Returns: + -------- + numpy.ndarray + Aligned image depth map with scale and bounds matching TOF depth map + """ + # Extract valid TOF depth values and corresponding image depth values + valid_mask = tof_mask > 0 + tof_valid = tof_depth + img_valid = img_depth*tof_mask + + if len(tof_valid) < 10: + raise ValueError("Not enough valid TOF points for reliable alignment") + + # Reshape for sklearn + tof_valid = tof_valid.reshape(-1, 1) + img_valid = img_valid.reshape(-1, 1) + + # Fit linear regression to find scale and offset + # We're solving for: tof_depth = a * img_depth + b + model = LinearRegression() + model.fit(img_valid, tof_valid) + + scale = model.coef_[0][0] + offset = model.intercept_[0] + + print(f"Fitted scale: {scale:.4f}") + print(f"Fitted offset: {offset:.4f}") + + # Apply transformation to the entire image depth map + aligned_depth = scale * img_depth + offset + + return aligned_depth + +def evaluate_alignment(tof_depth, aligned_depth, tof_mask): + """ + Evaluate how well the aligned depth map matches the TOF depth map + """ + valid_mask = tof_mask > 0 + tof_valid = tof_depth[valid_mask] + aligned_valid = aligned_depth[valid_mask] + + abs_diff = np.abs(tof_valid - aligned_valid) + mean_abs_error = np.mean(abs_diff) + max_abs_error = np.max(abs_diff) + + print(f"Mean Absolute Error: {mean_abs_error:.4f}") + print(f"Max Absolute Error: {max_abs_error:.4f}") + + return mean_abs_error, max_abs_error + +def visualize_results(tof_depth, img_depth, aligned_depth, tof_mask): + """ + Visualize original and aligned depth maps + """ + fig, axes = plt.subplots(2, 2, figsize=(12, 10)) + + # Original TOF depth map + im0 = axes[0, 0].imshow(tof_depth) + axes[0, 0].set_title("TOF Depth Map") + plt.colorbar(im0, ax=axes[0, 0]) + + # Original image depth map + im1 = axes[0, 1].imshow(img_depth) + axes[0, 1].set_title("Image-to-Depth Map (Original)") + plt.colorbar(im1, ax=axes[0, 1]) + + # Aligned image depth map + im2 = axes[1, 0].imshow(aligned_depth) + axes[1, 0].set_title("Image-to-Depth Map (Aligned)") + plt.colorbar(im2, ax=axes[1, 0]) + + # Difference where TOF is valid + diff = np.zeros_like(tof_depth) + diff[tof_mask > 0] = np.abs(tof_depth[tof_mask > 0] - aligned_depth[tof_mask > 0]) + im3 = axes[1, 1].imshow(diff) + axes[1, 1].set_title("Absolute Difference (where TOF is valid)") + plt.colorbar(im3, ax=axes[1, 1]) + + plt.tight_layout() + plt.show() \ No newline at end of file diff --git a/tof.py b/tof.py index fb04479..5e1319d 100644 --- a/tof.py +++ b/tof.py @@ -5,188 +5,22 @@ import cv2 #import matplotlib.pyplot as plt import requests -import open3d as o3d +from depth import img2depth +from matchdepth import align_depth_maps + +# import open3d as o3d HOST = '192.168.233.1' PORT = 80 -# def create_point_cloud_map(width, height, fx, fy, cx, cy): -# """ -# Create mapping arrays for converting depth image to point cloud. - -# Args: -# width, height: Image dimensions -# fx, fy: Focal lengths -# cx, cy: Principal point coordinates - -# Returns: -# x_map, y_map: Arrays that when multiplied by depth give X,Y coordinates -# """ -# # Create pixel coordinate grid -# v, u = np.meshgrid(np.arange(height), np.arange(width), indexing='ij') - -# # Convert to normalized image coordinates -# x_map = (u - cx) / fx -# y_map = (v - cy) / fy - -# return x_map, y_map - -# def depth_to_points(depth_image, x_map, y_map): -# """ -# Convert depth image to point cloud using pre-computed maps. - -# Args: -# depth_image: 2D depth array -# x_map, y_map: Pre-computed coordinate maps - -# Returns: -# points: Nx3 array of XYZ coordinates -# """ -# # Calculate X and Y coordinates -# X = depth_image * x_map -# Y = depth_image * y_map - -# # Stack coordinates into point cloud -# valid_points = depth_image > 0 -# points = np.stack(( -# X[valid_points], -# Y[valid_points], -# depth_image[valid_points] -# ), axis=-1) - -# return points - - - - - -def create_point_cloud_map(width, height, fx, fy, cx, cy): - """ - Create mapping arrays for converting depth image to point cloud. - - Args: - width, height: Image dimensions - fx, fy: Focal lengths - cx, cy: Principal point coordinates - - Returns: - x_map, y_map: Arrays that when multiplied by depth give X,Y coordinates - """ - # Create pixel coordinate grid - v, u = np.meshgrid(np.arange(height), np.arange(width), indexing='ij') - - # Convert to normalized image coordinates - x_map = (u - cx) / fx - y_map = (v - cy) / fy - - return x_map, y_map - -def transform_points(points, translation, rotation=None): - """ - Apply rigid transformation to points. - - Args: - points: Nx3 array of XYZ coordinates - translation: [tx, ty, tz] translation vector - rotation: 3x3 rotation matrix (optional) - - Returns: - transformed_points: Nx3 array of transformed coordinates - """ - if rotation is not None: - points = points @ rotation.T - return points + translation - -def depth_to_colored_points(depth_image, color_image, x_map, y_map, - color_intrinsics, depth_to_color_translation, - depth_to_color_rotation=None): - """ - Convert depth image to colored point cloud using pre-computed maps. - - Args: - depth_image: 2D depth array - color_image: RGB image array (height, width, 3) - x_map, y_map: Pre-computed coordinate maps for depth camera - color_intrinsics: (fx, fy, cx, cy) for RGB camera - depth_to_color_translation: [tx, ty, tz] from depth to color camera - depth_to_color_rotation: 3x3 rotation matrix (optional) - - Returns: - points: Nx3 array of XYZ coordinates - colors: Nx3 array of RGB values - """ - # Calculate initial point cloud from depth - valid_points = depth_image > 0 - X = depth_image * x_map - Y = depth_image * y_map - - points = np.stack(( - X[valid_points], - Y[valid_points], - depth_image[valid_points] - ), axis=-1) - - # Transform points to color camera coordinate system - transformed_points = transform_points( - points, - depth_to_color_translation, - depth_to_color_rotation - ) - - # Project points into color image - fx, fy, cx, cy = color_intrinsics - u = (transformed_points[:, 0] * fx / transformed_points[:, 2] + cx).astype(int) - v = (transformed_points[:, 1] * fy / transformed_points[:, 2] + cy).astype(int) - - # Filter points that project outside image bounds - height, width = color_image.shape[:2] - valid_uvs = (u >= 0) & (u < width) & (v >= 0) & (v < height) - - # Sample colors from valid projections - colors = np.zeros((len(points), 3), dtype=np.uint8) - colors[valid_uvs] = color_image[v[valid_uvs], u[valid_uvs]] - - return points[valid_uvs], colors[valid_uvs] - -def depth_to_points(depth_image, x_map, y_map): - """ - Convert depth image to point cloud using pre-computed maps. - - Args: - depth_image: 2D depth array - x_map, y_map: Pre-computed coordinate maps - - Returns: - points: Nx3 array of XYZ coordinates - """ - # Calculate X and Y coordinates - X = depth_image * x_map - Y = depth_image * y_map - - # Stack coordinates into point cloud - valid_points = depth_image > 0 - points = np.stack(( - X[valid_points], - Y[valid_points], - depth_image[valid_points] - ), axis=-1) - - return points - - - - - - - def get_frame_from_http(host=HOST, port=PORT): r = requests.get('http://{}:{}/getdeep'.format(host, port)) if(r.status_code == requests.codes.ok): - print('Get deep image') + # print('Get deep image') deepimg = r.content - print('Length={}'.format(len(deepimg))) + # print('Length={}'.format(len(deepimg))) (frameid, stamp_msec) = struct.unpack(' y_start and x_end > x_start and + result_y_end > result_y_start and result_x_end > result_x_start): + result[result_y_start:result_y_end, result_x_start:result_x_end] = shifted_image[y_start:y_end, x_start:x_end] + + return result + + prev_status = None +prev_depth = None def show_frame(frame_data: bytes): global prev_status + global prev_depth config = frame_config_decode(frame_data[16:16+12]) frame_bytes = frame_payload_decode(frame_data[16+12:], config) @@ -287,76 +187,76 @@ def show_frame(frame_data: bytes): (480, 640, 3) if config[6] == 1 else (600, 800, 3)) if frame_bytes[3] else None if not (depth is None or status is None or rgb is None): + rgb = cv2.resize(rgb, dsize=(320, 240), interpolation=cv2.INTER_CUBIC) # Resize + rgb = scale_and_shift(rgb, 1.1, -10, -10) + status = 1-status + if prev_status is None: - mask = (status==0) + mask = (status) else: - mask = (status==0)*(prev_status==0) - - linear_mask = mask.reshape((-1)) - # delete = np.where(1-linear_mask)) - - # depth_frame = (depth*mask)/1000 - depth_frame = (depth)/1000 - blurred = cv2.GaussianBlur(depth_frame,(3,3),1.0) - # depth_frame = cv2.addWeighted(depth_frame, 2.5, blurred, -1, 0) - + mask = (status)*(prev_status) prev_status = status - points = depth_to_points(depth_frame, x_map, y_map) - points = points[linear_mask] - colors = cv2.resize(rgb, (320, 240)) - # cv2.imshow("color", mask) + depth = depth*mask + if prev_depth is not None: + new_depth = (depth + prev_depth)/2 + prev_depth = depth + depth = new_depth + else: + prev_depth = depth - - - # colors = (np.stack((depth_frame,) * 3, axis=-1)).reshape((-1, 3)).astype(np.float64) / 255.0 - colors = colors.reshape((-1, 3)).astype(np.float64) / 255.0 - # colors *= linear_mask - # colors = colors[:-(colors.shape[0]-points.shape[0])] - # colors = np.delete(colors, delete, axis = 0) - colors = colors[linear_mask] - # print(np.where(points)) - - # print(colors_e.shape) + img_depth = img2depth(rgb) + aligned_img_depth = align_depth_maps(depth, img_depth, mask)*(1-mask) - return depth_frame, points, colors - return None, None, None + return (aligned_img_depth + depth), rgb, mask + return None # create visualizer and window. -vis = o3d.visualization.Visualizer() -vis.create_window(height=480, width=640) +# vis = o3d.visualization.Visualizer() +# vis.create_window(height=480, width=640) -pcd = o3d.geometry.PointCloud() -pcd.points = o3d.utility.Vector3dVector(np.random.rand(10, 3)) +# pcd = o3d.geometry.PointCloud() +# pcd.points = o3d.utility.Vector3dVector(np.random.rand(10, 3)) -vis.add_geometry(pcd) - -x_map, y_map = create_point_cloud_map( - width=320, height=240, - fx=231.8290, fy=232.7785, # focal lengths - cx=166.9372, cy=123.5151 # principal point -) +# vis.add_geometry(pcd) -depth_to_color_translation = np.array([0, 0, 0]) # 5cm offset in x -depth_to_color_rotation = np.eye(3) # Identity matrix if cameras are parallel +# depth_to_color_translation = np.array([0, 0, 0]) # 5cm offset in x +# depth_to_color_rotation = np.eye(3) # Identity matrix if cameras are parallel -color_intrinsics = (520, 520, 325, 245) +# color_intrinsics = (520, 520, 325, 245) keep_running = True +photocount = 0 + while keep_running: if post_encode_config(frame_config_encode(1,0,255,0,2,7,1,0,0)): p = get_frame_from_http() - depth_image, points, colors = show_frame(p) - if depth_image is None or colors is None : continue - cv2.imshow("e", depth_image) - cv2.waitKey(1) + depth_image, rgb, mask = show_frame(p) + if depth_image is None: continue + depth_colored = cv2.applyColorMap((depth_image).astype(np.uint8), cv2.COLORMAP_JET) + # mask = (depth_image>1000) + + # b = np.repeat((depth_image>10)[:, :, np.newaxis], 3, axis=2) + # b = np.repeat((mask==1)[:, :, np.newaxis], 3, axis=2) + + + cv2.imshow("e", depth_colored) + cv2.imshow("rgb", rgb) + + key = cv2.waitKey(1) + + if key & 0xFF == 27: + break + if key & 0xFF == 32: + photocount += 1 + print(f"Took photo {photocount}!") # points, colors = depth_to_colored_points( # depth_image, # color_image, @@ -373,20 +273,21 @@ while keep_running: # colors[:, [0, 2]] = colors[:, [2, 0]] - print(points.shape) - print(colors.shape) + # print(points.shape) + # print(colors.shape) - pcd.points = o3d.utility.Vector3dVector(points) - pcd.colors = o3d.utility.Vector3dVector(colors) + # pcd.points = o3d.utility.Vector3dVector(points) + # pcd.colors = o3d.utility.Vector3dVector(colors) - vis.update_geometry(pcd) + # vis.update_geometry(pcd) - keep_running = vis.poll_events() - vis.update_renderer() + # keep_running = vis.poll_events() + # vis.update_renderer() # pcd.points.extend(np.random.rand(n_new, 3)) # cv2.waitKey(1) # with open("rgbd.raw", 'wb') as f: # f.write(p) # f.flush() +cv2.destroyAllWindows() \ No newline at end of file