要实现通过相机确定物品坐标位置,通常需要相机标定、物体检测和坐标转换几个步骤。下面我将提供一个完整的解决方案,包括相机标定、物体检测和3D坐标估计。
1. 系统架构
相机标定 - 获取相机内参和畸变系数
物体检测 - 使用OpenCV或深度学习模型检测物品
坐标转换 - 将2D图像坐标转换为3D世界坐标
ROS2集成 - 将上述功能集成到ROS2节点中
2. 实现步骤
2.1 创建功能包
bash
ros2 pkg create object_localization --build-type ament_python --dependencies rclpy sensor_msgs cv_bridge opencv-python geometry_msgs
2.2 相机标定节点
首先实现相机标定节点,用于获取相机内参和畸变系数。
在object_localization/object_localization
目录下创建camera_calibration.py
:
python
#!/usr/bin/env python3 import rclpy from rclpy.node import Node import cv2 import numpy as np import os from sensor_msgs.msg import Image from cv_bridge import CvBridge class CameraCalibrationNode(Node): def __init__(self): super().__init__('camera_calibration_node') # 参数 self.declare_parameter('chessboard_width', 9) self.declare_parameter('chessboard_height', 6) self.declare_parameter('square_size', 0.025) # 25mm self.declare_parameter('calibration_images_dir', 'calibration_images') self.declare_parameter('save_file', 'camera_calibration.yaml') self.chessboard_width = self.get_parameter('chessboard_width').value self.chessboard_height = self.get_parameter('chessboard_height').value self.square_size = self.get_parameter('square_size').value self.calibration_images_dir = self.get_parameter('calibration_images_dir').value self.save_file = self.get_parameter('save_file').value # 创建目录 os.makedirs(self.calibration_images_dir, exist_ok=True) # 初始化 self.bridge = CvBridge() self.subscription = self.create_subscription( Image, 'image_raw', self.image_callback, 10) self.criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) self.objp = np.zeros((self.chessboard_height*self.chessboard_width, 3), np.float32) self.objp[:,:2] = np.mgrid[0:self.chessboard_width, 0:self.chessboard_height].T.reshape(-1,2) * self.square_size self.objpoints = [] # 3D点 self.imgpoints = [] # 2D点 self.get_logger().info("Camera calibration node ready. Point camera at chessboard...") def image_callback(self, msg): try: frame = self.bridge.imgmsg_to_cv2(msg, "bgr8") gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # 查找棋盘格角点 ret, corners = cv2.findChessboardCorners(gray, (self.chessboard_width, self.chessboard_height), None) if ret: corners2 = cv2.cornerSubPix(gray, corners, (11,11), (-1,-1), self.criteria) # 绘制并显示角点 cv2.drawChessboardCorners(frame, (self.chessboard_width, self.chessboard_height), corners2, ret) cv2.imshow('Calibration', frame) key = cv2.waitKey(1) if key == ord('s'): # 保存当前帧用于标定 img_path = os.path.join(self.calibration_images_dir, f'calib_{len(self.objpoints)}.png') cv2.imwrite(img_path, frame) self.objpoints.append(self.objp) self.imgpoints.append(corners2) self.get_logger().info(f"Saved calibration image {img_path}") elif key == ord('c'): # 执行标定 self.calibrate_camera() except Exception as e: self.get_logger().error(f"Error processing image: {e}") def calibrate_camera(self): if len(self.objpoints) < 5: self.get_logger().warning("Need at least 5 images for calibration") return try: ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera( self.objpoints, self.imgpoints, (self.objpoints[0].shape[1], self.objpoints[0].shape[0]), None, None) # 保存标定结果 calibration_data = { 'camera_matrix': mtx.tolist(), 'distortion_coefficients': dist.tolist() } import yaml with open(self.save_file, 'w') as f: yaml.dump(calibration_data, f) self.get_logger().info(f"Calibration completed. Results saved to {self.save_file}") self.get_logger().info(f"Camera matrix:\n{mtx}") self.get_logger().info(f"Distortion coefficients:\n{dist}") # 计算重投影误差 mean_error = 0 for i in range(len(self.objpoints)): imgpoints2, _ = cv2.projectPoints(self.objpoints[i], rvecs[i], tvecs[i], mtx, dist) error = cv2.norm(self.imgpoints[i], imgpoints2, cv2.NORM_L2)/len(imgpoints2) mean_error += error self.get_logger().info(f"Total reprojection error: {mean_error/len(self.objpoints)}") except Exception as e: self.get_logger().error(f"Calibration failed: {e}") def main(args=None): rclpy.init(args=args) node = CameraCalibrationNode() rclpy.spin(node) node.destroy_node() rclpy.shutdown() if __name__ == '__main__': main()
2.3 物体定位节点
创建object_localization_node.py
用于检测物体并计算其3D坐标:
python
#!/usr/bin/env python3 import rclpy from rclpy.node import Node from sensor_msgs.msg import Image, CameraInfo from geometry_msgs.msg import PointStamped from cv_bridge import CvBridge import cv2 import numpy as np import yaml class ObjectLocalizationNode(Node): def __init__(self): super().__init__('object_localization_node') # 参数 self.declare_parameter('calibration_file', 'camera_calibration.yaml') self.declare_parameter('object_height', 0.0) # 物体高度(相对于相机) self.declare_parameter('target_color_low', [0, 100, 100]) self.declare_parameter('target_color_high', [10, 255, 255]) self.calibration_file = self.get_parameter('calibration_file').value self.object_height = self.get_parameter('object_height').value self.target_color_low = np.array(self.get_parameter('target_color_low').value, np.uint8) self.target_color_high = np.array(self.get_parameter('target_color_high').value, np.uint8) # 加载相机标定数据 self.camera_matrix = None self.dist_coeffs = None self.load_calibration_data() # 初始化 self.bridge = CvBridge() self.image_sub = self.create_subscription( Image, 'image_raw', self.image_callback, 10) self.camera_info_sub = self.create_subscription( CameraInfo, 'camera_info', self.camera_info_callback, 10) self.position_pub = self.create_publisher( PointStamped, 'object_position', 10) self.get_logger().info("Object localization node ready") def load_calibration_data(self): try: with open(self.calibration_file, 'r') as f: calibration_data = yaml.safe_load(f) self.camera_matrix = np.array(calibration_data['camera_matrix']) self.dist_coeffs = np.array(calibration_data['distortion_coefficients']) self.get_logger().info("Loaded camera calibration data") except Exception as e: self.get_logger().error(f"Failed to load calibration data: {e}") def camera_info_callback(self, msg): if self.camera_matrix is None: self.camera_matrix = np.array(msg.k).reshape(3, 3) self.dist_coeffs = np.array(msg.d) self.get_logger().info("Received camera info") def image_callback(self, msg): if self.camera_matrix is None: self.get_logger().warn("Camera calibration data not available yet") return try: frame = self.bridge.imgmsg_to_cv2(msg, "bgr8") # 1. 预处理图像 hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) mask = cv2.inRange(hsv, self.target_color_low, self.target_color_high) # 2. 形态学操作 kernel = np.ones((5,5), np.uint8) mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel) # 3. 查找轮廓 contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if len(contours) > 0: # 找到最大轮廓 largest_contour = max(contours, key=cv2.contourArea) # 计算轮廓的矩 M = cv2.moments(largest_contour) if M['m00'] > 0: # 计算中心点 cx = int(M['m10']/M['m00']) cy = int(M['m01']/M['m00']) # 4. 计算3D位置 object_point = self.calculate_3d_position(cx, cy) if object_point is not None: # 发布位置 position_msg = PointStamped() position_msg.header.stamp = self.get_clock().now().to_msg() position_msg.header.frame_id = "camera" position_msg.point.x = object_point[0] position_msg.point.y = object_point[1] position_msg.point.z = object_point[2] self.position_pub.publish(position_msg) # 可视化 cv2.circle(frame, (cx, cy), 5, (0, 0, 255), -1) cv2.putText(frame, f"({object_point[0]:.2f}, {object_point[1]:.2f}, {object_point[2]:.2f})", (cx+10, cy), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) # 显示结果 cv2.imshow('Object Detection', frame) cv2.waitKey(1) except Exception as e: self.get_logger().error(f"Error processing image: {e}") def calculate_3d_position(self, u, v): """ 根据2D图像坐标计算3D世界坐标 假设物体位于地面平面(Z=object_height) """ try: # 归一化图像坐标 uv_point = np.array([[u, v]], dtype=np.float32) uv_point = np.array([uv_point]) # 去畸变 normalized_point = cv2.undistortPoints(uv_point, self.camera_matrix, self.dist_coeffs) normalized_point = normalized_point[0][0] # 计算3D坐标 fx = self.camera_matrix[0, 0] fy = self.camera_matrix[1, 1] cx = self.camera_matrix[0, 2] cy = self.camera_matrix[1, 2] # 相机坐标系中的坐标 x = (normalized_point[0] - cx) / fx y = (normalized_point[1] - cy) / fy # 假设物体高度已知 Z = self.object_height X = x * Z Y = y * Z return (X, Y, Z) except Exception as e: self.get_logger().error(f"Error calculating 3D position: {e}") return None def main(args=None): rclpy.init(args=args) node = ObjectLocalizationNode() rclpy.spin(node) node.destroy_node() rclpy.shutdown() if __name__ == '__main__': main()
2.4 启动文件
创建launch/object_localization.launch.py
:
python
from launch import LaunchDescription from launch_ros.actions import Node def generate_launch_description(): return LaunchDescription([ Node( package='object_localization', executable='object_localization_node', name='object_localization_node', parameters=[ {'calibration_file': 'camera_calibration.yaml'}, {'object_height': 0.5}, # 假设物体高度为0.5米 {'target_color_low': [0, 100, 100]}, {'target_color_high': [10, 255, 255]} ] ) ])
3. 使用说明
3.1 相机标定
打印一个棋盘格图案(例如9x6)
运行标定节点:
bash
ros2 run object_localization camera_calibration_node
将棋盘格以不同角度展示给相机,按's'键保存图像
收集足够多(建议15-20张)图像后,按'c'键进行标定
标定结果将保存到
camera_calibration.yaml
3.2 物体定位
运行物体定位节点:
bash
ros2 launch object_localization object_localization.launch.py
确保目标物体颜色在
target_color_low
和target_color_high
范围内节点将发布检测到的物体位置到
/object_position
话题
3.3 可视化
可以使用RViz查看物体位置:
bash
ros2 run rviz2 rviz2
添加PointStamped
显示,设置话题为/object_position
4. 高级改进建议
使用深度学习模型 - 替换简单的颜色检测,使用YOLO等模型进行更准确的物体检测
多相机融合 - 使用多个相机进行三角测量,提高定位精度
深度相机支持 - 集成RealSense或Kinect等深度相机,直接获取深度信息
坐标变换 - 使用TF2将物体坐标转换到世界坐标系
AR标记检测 - 使用AprilTag或Aruco标记进行更精确的定位
这个实现提供了基于单目相机和已知高度的物体定位方法。对于更复杂的场景,可能需要结合深度信息或其他传感器数据。