Add depth

2026-06-08 16:18:02 -06:00 · 2025-04-24 22:10:04 -06:00
parent 35c9f2a267
commit f9b88ff2f0
3 changed files with 299 additions and 226 deletions
@@ -0,0 +1,75 @@
+import cv2
+import torch
+import time
+import numpy as np
+# import open3d as o3d
+
+# Load MiDas model for depth estimation
+model_type = "DPT_Hybrid"
+midas = torch.hub.load("intel-isl/MiDas", model_type)
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+midas.to(device)
+midas.eval()
+
+# Load transformers to resize and normalize the image
+midas_transforms = torch.hub.load("intel-isl/MiDas", "transforms")
+transform = midas_transforms.dpt_transform if model_type == "DPT_Large" or model_type == "DPT_Hybrid" else midas_transforms.small_transform
+
+
+# Create Open3D visualizer
+# vis = o3d.visualization.Visualizer()
+# vis.create_window("3D Point Cloud")
+
+# Create a geometry object to store the point cloud
+# pcd = o3d.geometry.PointCloud()
+
+def img2depth(img_rgb):
+    # Apply input transform
+    input_batch = transform(img_rgb).to(device)
+
+    # Prediction and resize to original resolution
+    with torch.no_grad():
+        prediction = midas(input_batch)
+        prediction = torch.nn.functional.interpolate(
+            prediction.unsqueeze(1),
+            size=img_rgb.shape[:2],
+            mode="bicubic",
+            align_corners=False,
+        ).squeeze()
+
+    return prediction.cpu().numpy()
+
+    # Apply colormap to depth map
+    # depth_colored = cv2.applyColorMap((depth_map / depth_map.max() * 255).astype(np.uint8), cv2.COLORMAP_JET)
+
+    # Generate 3D point cloud with colors
+    # point_cloud = []
+    # colors = []
+    # for y in range(depth_map.shape[0]):
+    #     for x in range(depth_map.shape[1]):
+    #         depth = depth_map[y, x]
+    #         if depth > 0:
+    #             # Convert pixel coordinates to 3D space
+    #             z = depth
+    #             x_3d = x
+    #             y_3d = depth_map.shape[0] - y
+    #             # point_cloud.append([x_3d, y_3d, z])
+    #             colors.append(img_rgb[y, x])
+
+    # point_cloud = np.array(point_cloud)
+    # colors = np.array(colors)
+
+    # Update point cloud data
+    # pcd.points = o3d.utility.Vector3dVector(point_cloud)
+    # pcd.colors = o3d.utility.Vector3dVector(colors / 255.0)
+
+    # Scale point cloud to increase its size
+    # pcd.scale(1000.0, center=pcd.get_center())
+
+    # Visualize point cloud in 3D space
+    # vis.clear_geometries()
+    # vis.add_geometry(pcd)
+    # vis.poll_events()
+    # vis.update_renderer()
+
+    # Display camera feed and depth map
@@ -0,0 +1,97 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.linear_model import LinearRegression
+
+def align_depth_maps(tof_depth, img_depth, tof_mask):
+    """
+    Aligns the scale and bounds of image-to-depth output to match the TOF depth map
+    
+    Parameters:
+    -----------
+    tof_depth : numpy.ndarray
+        Depth map from TOF sensor (incomplete but spatially accurate)
+    img_depth : numpy.ndarray
+        Depth map from image-to-depth model (complete but incorrectly scaled)
+    tof_mask : numpy.ndarray
+        Binary mask where 1 indicates valid TOF measurements
+    
+    Returns:
+    --------
+    numpy.ndarray
+        Aligned image depth map with scale and bounds matching TOF depth map
+    """
+    # Extract valid TOF depth values and corresponding image depth values
+    valid_mask = tof_mask > 0
+    tof_valid = tof_depth
+    img_valid = img_depth*tof_mask
+    
+    if len(tof_valid) < 10:
+        raise ValueError("Not enough valid TOF points for reliable alignment")
+    
+    # Reshape for sklearn
+    tof_valid = tof_valid.reshape(-1, 1)
+    img_valid = img_valid.reshape(-1, 1)
+    
+    # Fit linear regression to find scale and offset
+    # We're solving for: tof_depth = a * img_depth + b
+    model = LinearRegression()
+    model.fit(img_valid, tof_valid)
+    
+    scale = model.coef_[0][0]
+    offset = model.intercept_[0]
+    
+    print(f"Fitted scale: {scale:.4f}")
+    print(f"Fitted offset: {offset:.4f}")
+    
+    # Apply transformation to the entire image depth map
+    aligned_depth = scale * img_depth + offset
+    
+    return aligned_depth
+
+def evaluate_alignment(tof_depth, aligned_depth, tof_mask):
+    """
+    Evaluate how well the aligned depth map matches the TOF depth map
+    """
+    valid_mask = tof_mask > 0
+    tof_valid = tof_depth[valid_mask]
+    aligned_valid = aligned_depth[valid_mask]
+    
+    abs_diff = np.abs(tof_valid - aligned_valid)
+    mean_abs_error = np.mean(abs_diff)
+    max_abs_error = np.max(abs_diff)
+    
+    print(f"Mean Absolute Error: {mean_abs_error:.4f}")
+    print(f"Max Absolute Error: {max_abs_error:.4f}")
+    
+    return mean_abs_error, max_abs_error
+
+def visualize_results(tof_depth, img_depth, aligned_depth, tof_mask):
+    """
+    Visualize original and aligned depth maps
+    """
+    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
+    
+    # Original TOF depth map
+    im0 = axes[0, 0].imshow(tof_depth)
+    axes[0, 0].set_title("TOF Depth Map")
+    plt.colorbar(im0, ax=axes[0, 0])
+    
+    # Original image depth map
+    im1 = axes[0, 1].imshow(img_depth)
+    axes[0, 1].set_title("Image-to-Depth Map (Original)")
+    plt.colorbar(im1, ax=axes[0, 1])
+    
+    # Aligned image depth map
+    im2 = axes[1, 0].imshow(aligned_depth)
+    axes[1, 0].set_title("Image-to-Depth Map (Aligned)")
+    plt.colorbar(im2, ax=axes[1, 0])
+    
+    # Difference where TOF is valid
+    diff = np.zeros_like(tof_depth)
+    diff[tof_mask > 0] = np.abs(tof_depth[tof_mask > 0] - aligned_depth[tof_mask > 0])
+    im3 = axes[1, 1].imshow(diff)
+    axes[1, 1].set_title("Absolute Difference (where TOF is valid)")
+    plt.colorbar(im3, ax=axes[1, 1])
+    
+    plt.tight_layout()
+    plt.show()
@@ -5,188 +5,22 @@ import cv2
 #import matplotlib.pyplot as plt
 import requests

-import open3d as o3d
+from depth import img2depth
+from matchdepth import align_depth_maps
+
+# import open3d as o3d

 HOST = '192.168.233.1'
 PORT = 80

-# def create_point_cloud_map(width, height, fx, fy, cx, cy):
-#     """
-#     Create mapping arrays for converting depth image to point cloud.
-    
-#     Args:
-#         width, height: Image dimensions
-#         fx, fy: Focal lengths
-#         cx, cy: Principal point coordinates
-    
-#     Returns:
-#         x_map, y_map: Arrays that when multiplied by depth give X,Y coordinates
-#     """
-#     # Create pixel coordinate grid
-#     v, u = np.meshgrid(np.arange(height), np.arange(width), indexing='ij')
-    
-#     # Convert to normalized image coordinates
-#     x_map = (u - cx) / fx
-#     y_map = (v - cy) / fy
-    
-#     return x_map, y_map
-
-# def depth_to_points(depth_image, x_map, y_map):
-#     """
-#     Convert depth image to point cloud using pre-computed maps.
-    
-#     Args:
-#         depth_image: 2D depth array
-#         x_map, y_map: Pre-computed coordinate maps
-    
-#     Returns:
-#         points: Nx3 array of XYZ coordinates
-#     """
-#     # Calculate X and Y coordinates
-#     X = depth_image * x_map
-#     Y = depth_image * y_map
-    
-#     # Stack coordinates into point cloud
-#     valid_points = depth_image > 0
-#     points = np.stack((
-#         X[valid_points],
-#         Y[valid_points],
-#         depth_image[valid_points]
-#     ), axis=-1)
-    
-#     return points
-
-
-
-
-
-def create_point_cloud_map(width, height, fx, fy, cx, cy):
-    """
-    Create mapping arrays for converting depth image to point cloud.
-    
-    Args:
-        width, height: Image dimensions
-        fx, fy: Focal lengths
-        cx, cy: Principal point coordinates
-    
-    Returns:
-        x_map, y_map: Arrays that when multiplied by depth give X,Y coordinates
-    """
-    # Create pixel coordinate grid
-    v, u = np.meshgrid(np.arange(height), np.arange(width), indexing='ij')
-    
-    # Convert to normalized image coordinates
-    x_map = (u - cx) / fx
-    y_map = (v - cy) / fy
-    
-    return x_map, y_map
-
-def transform_points(points, translation, rotation=None):
-    """
-    Apply rigid transformation to points.
-    
-    Args:
-        points: Nx3 array of XYZ coordinates
-        translation: [tx, ty, tz] translation vector
-        rotation: 3x3 rotation matrix (optional)
-    
-    Returns:
-        transformed_points: Nx3 array of transformed coordinates
-    """
-    if rotation is not None:
-        points = points @ rotation.T
-    return points + translation
-
-def depth_to_colored_points(depth_image, color_image, x_map, y_map, 
-                          color_intrinsics, depth_to_color_translation,
-                          depth_to_color_rotation=None):
-    """
-    Convert depth image to colored point cloud using pre-computed maps.
-    
-    Args:
-        depth_image: 2D depth array
-        color_image: RGB image array (height, width, 3)
-        x_map, y_map: Pre-computed coordinate maps for depth camera
-        color_intrinsics: (fx, fy, cx, cy) for RGB camera
-        depth_to_color_translation: [tx, ty, tz] from depth to color camera
-        depth_to_color_rotation: 3x3 rotation matrix (optional)
-    
-    Returns:
-        points: Nx3 array of XYZ coordinates
-        colors: Nx3 array of RGB values
-    """
-    # Calculate initial point cloud from depth
-    valid_points = depth_image > 0
-    X = depth_image * x_map
-    Y = depth_image * y_map
-    
-    points = np.stack((
-        X[valid_points],
-        Y[valid_points],
-        depth_image[valid_points]
-    ), axis=-1)
-    
-    # Transform points to color camera coordinate system
-    transformed_points = transform_points(
-        points, 
-        depth_to_color_translation,
-        depth_to_color_rotation
-    )
-    
-    # Project points into color image
-    fx, fy, cx, cy = color_intrinsics
-    u = (transformed_points[:, 0] * fx / transformed_points[:, 2] + cx).astype(int)
-    v = (transformed_points[:, 1] * fy / transformed_points[:, 2] + cy).astype(int)
-    
-    # Filter points that project outside image bounds
-    height, width = color_image.shape[:2]
-    valid_uvs = (u >= 0) & (u < width) & (v >= 0) & (v < height)
-    
-    # Sample colors from valid projections
-    colors = np.zeros((len(points), 3), dtype=np.uint8)
-    colors[valid_uvs] = color_image[v[valid_uvs], u[valid_uvs]]
-    
-    return points[valid_uvs], colors[valid_uvs]
-
-def depth_to_points(depth_image, x_map, y_map):
-    """
-    Convert depth image to point cloud using pre-computed maps.
-    
-    Args:
-        depth_image: 2D depth array
-        x_map, y_map: Pre-computed coordinate maps
-    
-    Returns:
-        points: Nx3 array of XYZ coordinates
-    """
-    # Calculate X and Y coordinates
-    X = depth_image * x_map
-    Y = depth_image * y_map
-    
-    # Stack coordinates into point cloud
-    valid_points = depth_image > 0
-    points = np.stack((
-        X[valid_points],
-        Y[valid_points],
-        depth_image[valid_points]
-    ), axis=-1)
-    
-    return points
-
-
-
-
-
-
-
 def get_frame_from_http(host=HOST, port=PORT):
    r = requests.get('http://{}:{}/getdeep'.format(host, port))
    if(r.status_code == requests.codes.ok):
-        print('Get deep image')
+        # print('Get deep image')
        deepimg = r.content
-        print('Length={}'.format(len(deepimg)))
+        # print('Length={}'.format(len(deepimg)))
        (frameid, stamp_msec) = struct.unpack('<QQ', deepimg[0:8+8])
-        print((frameid, stamp_msec/1000))
+        # print((frameid, stamp_msec/1000))
        return deepimg

 def post_encode_config(config, host=HOST, port=PORT):
@@ -267,10 +101,76 @@ def frame_payload_decode(frame_data: bytes, with_config: tuple):

    return (deepth_img, ir_img, status_img, rgb_img)

+
+
+
+
+
+def scale_and_shift(image, scale_factor, shift_x, shift_y):
+    """
+    Scale an RGB image and shift it by n pixels in x and y direction.
+    
+    Parameters:
+    -----------
+    image : numpy.ndarray
+        Input RGB image with shape (height, width, 3)
+    scale_factor : float
+        Scale factor (e.g., 0.5 for half size, 2.0 for double size)
+    shift_x : int
+        Number of pixels to shift in x direction (positive: right, negative: left)
+    shift_y : int
+        Number of pixels to shift in y direction (positive: down, negative: up)
+    
+    Returns:
+    --------
+    numpy.ndarray
+        Scaled and shifted image with the same shape as the input image
+    """
+    # Get original image dimensions
+    height, width = image.shape[:2]
+    
+    # Calculate new dimensions after scaling
+    new_height = int(height * scale_factor)
+    new_width = int(width * scale_factor)
+    
+    # Scale the image
+    scaled_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
+    
+    # Create a transformation matrix for the shift
+    M = np.float32([[1, 0, shift_x], [0, 1, shift_y]])
+    
+    # Apply the shift to the scaled image
+    shifted_image = cv2.warpAffine(scaled_image, M, (new_width, new_height))
+    
+    # Create a blank canvas with original dimensions
+    result = np.zeros_like(image)
+    
+    # Calculate the region to copy from the shifted_scaled image
+    y_start = max(0, -shift_y)
+    y_end = min(new_height, height - shift_y)
+    x_start = max(0, -shift_x)
+    x_end = min(new_width, width - shift_x)
+    
+    # Calculate the region to paste into the result image
+    result_y_start = max(0, shift_y)
+    result_y_end = min(height, new_height + shift_y)
+    result_x_start = max(0, shift_x)
+    result_x_end = min(width, new_width + shift_x)
+    
+    # Copy the visible part of the shifted image to the result
+    if (y_end > y_start and x_end > x_start and 
+        result_y_end > result_y_start and result_x_end > result_x_start):
+        result[result_y_start:result_y_end, result_x_start:result_x_end] = shifted_image[y_start:y_end, x_start:x_end]
+    
+    return result
+
+
 prev_status = None
+prev_depth = None

 def show_frame(frame_data: bytes):
    global prev_status
+    global prev_depth
    config = frame_config_decode(frame_data[16:16+12])
    frame_bytes = frame_payload_decode(frame_data[16+12:], config)

@@ -287,76 +187,76 @@ def show_frame(frame_data: bytes):
        (480, 640, 3) if config[6] == 1 else (600, 800, 3)) if frame_bytes[3] else None

    if not (depth is None or status is None or rgb is None):
+        rgb = cv2.resize(rgb, dsize=(320, 240), interpolation=cv2.INTER_CUBIC) # Resize
+        rgb = scale_and_shift(rgb, 1.1, -10, -10)
+        status = 1-status
+
        if prev_status is None:
-            mask = (status==0)
+            mask = (status)
        else:
-            mask = (status==0)*(prev_status==0)
-        
-        linear_mask = mask.reshape((-1))
-        # delete = np.where(1-linear_mask))
-
-        # depth_frame = (depth*mask)/1000
-        depth_frame = (depth)/1000
-        blurred = cv2.GaussianBlur(depth_frame,(3,3),1.0)
-        # depth_frame = cv2.addWeighted(depth_frame, 2.5, blurred, -1, 0)
-
+            mask = (status)*(prev_status)
        prev_status = status
-        points = depth_to_points(depth_frame, x_map, y_map)
-        points = points[linear_mask]

-        colors = cv2.resize(rgb, (320, 240))
-        # cv2.imshow("color", mask)
+        depth = depth*mask
+        if prev_depth is not None:
+            new_depth = (depth + prev_depth)/2
+            prev_depth = depth
+            depth = new_depth
+        else:
+            prev_depth = depth

-         
-        
-        # colors = (np.stack((depth_frame,) * 3, axis=-1)).reshape((-1, 3)).astype(np.float64) / 255.0
-        colors = colors.reshape((-1, 3)).astype(np.float64) / 255.0
-        # colors *= linear_mask
-        # colors = colors[:-(colors.shape[0]-points.shape[0])]
-        # colors = np.delete(colors, delete, axis = 0)
-        colors = colors[linear_mask]

-        # print(np.where(points))
-
-        # print(colors_e.shape)
+        img_depth = img2depth(rgb)

+        aligned_img_depth = align_depth_maps(depth, img_depth, mask)*(1-mask)

        
-        return depth_frame, points, colors
-    return None, None, None
+        return (aligned_img_depth + depth), rgb, mask
+    return None


 # create visualizer and window.
-vis = o3d.visualization.Visualizer()
-vis.create_window(height=480, width=640)
+# vis = o3d.visualization.Visualizer()
+# vis.create_window(height=480, width=640)

-pcd = o3d.geometry.PointCloud()
-pcd.points = o3d.utility.Vector3dVector(np.random.rand(10, 3))
+# pcd = o3d.geometry.PointCloud()
+# pcd.points = o3d.utility.Vector3dVector(np.random.rand(10, 3))

-vis.add_geometry(pcd)
-
-x_map, y_map = create_point_cloud_map(
-    width=320, height=240,
-    fx=231.8290, fy=232.7785,  # focal lengths
-    cx=166.9372, cy=123.5151   # principal point
-)
+# vis.add_geometry(pcd)


-depth_to_color_translation = np.array([0, 0, 0])  # 5cm offset in x
-depth_to_color_rotation = np.eye(3)  # Identity matrix if cameras are parallel
+# depth_to_color_translation = np.array([0, 0, 0])  # 5cm offset in x
+# depth_to_color_rotation = np.eye(3)  # Identity matrix if cameras are parallel

-color_intrinsics = (520, 520, 325, 245)
+# color_intrinsics = (520, 520, 325, 245)

 keep_running = True

+photocount = 0
+
 while keep_running:
    if post_encode_config(frame_config_encode(1,0,255,0,2,7,1,0,0)):
        p = get_frame_from_http()
-        depth_image, points, colors = show_frame(p)
-        if depth_image is None or colors is None : continue
-        cv2.imshow("e", depth_image)
-        cv2.waitKey(1)
+        depth_image, rgb, mask = show_frame(p)
+        if depth_image is None: continue
+        depth_colored = cv2.applyColorMap((depth_image).astype(np.uint8), cv2.COLORMAP_JET)

+        # mask = (depth_image>1000)
+
+        # b = np.repeat((depth_image>10)[:, :, np.newaxis], 3, axis=2)
+        # b = np.repeat((mask==1)[:, :, np.newaxis], 3, axis=2)
+
+
+        cv2.imshow("e", depth_colored)
+        cv2.imshow("rgb", rgb)
+
+    key = cv2.waitKey(1)
+
+    if key & 0xFF == 27:
+        break
+    if key & 0xFF == 32:
+        photocount += 1
+        print(f"Took photo {photocount}!")
        # points, colors = depth_to_colored_points(
        #     depth_image,
        #     color_image,
@@ -373,20 +273,21 @@ while keep_running:

        # colors[:, [0, 2]] = colors[:, [2, 0]]

-        print(points.shape)
-        print(colors.shape)
+        # print(points.shape)
+        # print(colors.shape)

-        pcd.points = o3d.utility.Vector3dVector(points)
-        pcd.colors = o3d.utility.Vector3dVector(colors)
+        # pcd.points = o3d.utility.Vector3dVector(points)
+        # pcd.colors = o3d.utility.Vector3dVector(colors)


-    vis.update_geometry(pcd)
+    # vis.update_geometry(pcd)

-    keep_running = vis.poll_events()
-    vis.update_renderer()
+    # keep_running = vis.poll_events()
+    # vis.update_renderer()

        # pcd.points.extend(np.random.rand(n_new, 3))
    # cv2.waitKey(1)
    # with open("rgbd.raw", 'wb') as f:
    #     f.write(p)
    #     f.flush()
+cv2.destroyAllWindows()