第 4 部:自主取貨 Autonomous Pick Up
前言大綱
現在我們可以識別玩具了,我們將編寫一個腳本來自動撿起它。
在 ~/fetch 文件夾中創建一個名為 fetch.py 的新文件。 粘貼或 下載 以下內容:
- 注意:如果您下載該文件,請將其重命名為 fetch.py
import argparse
import sys
import time
import numpy as np
import cv2
import math
import bosdyn.client
import bosdyn.client.util
from bosdyn.client.robot_state import RobotStateClient
from bosdyn.client.robot_command import RobotCommandClient, RobotCommandBuilder, block_until_arm_arrives
from bosdyn.api import geometry_pb2
from bosdyn.client.lease import LeaseClient, LeaseKeepAlive
from bosdyn.api import image_pb2
from bosdyn.client.network_compute_bridge_client import NetworkComputeBridgeClient
from bosdyn.api import network_compute_bridge_pb2
from google.protobuf import wrappers_pb2
from bosdyn.client.manipulation_api_client import ManipulationApiClient
from bosdyn.api import manipulation_api_pb2
from bosdyn.api import basic_command_pb2
from bosdyn.client import frame_helpers
from bosdyn.client import math_helpers
kImageSources = [
'frontleft_fisheye_image', 'frontright_fisheye_image',
'left_fisheye_image', 'right_fisheye_image', 'back_fisheye_image'
]
導入所需的函式庫並定義一個常量,使用我們將使用的圖像源。
def get_obj_and_img(network_compute_client, server, model, confidence,
image_sources, label):
for source in image_sources:
# Build a network compute request for this image source.
image_source_and_service = network_compute_bridge_pb2.ImageSourceAndService(
image_source=source)
# Input data:
# model name
# minimum confidence (between 0 and 1)
# if we should automatically rotate the image
input_data = network_compute_bridge_pb2.NetworkComputeInputData(
image_source_and_service=image_source_and_service,
model_name=model,
min_confidence=confidence,
rotate_image=network_compute_bridge_pb2.NetworkComputeInputData.
ROTATE_IMAGE_ALIGN_HORIZONTAL)
# Server data: the service name
server_data = network_compute_bridge_pb2.NetworkComputeServerConfiguration(
service_name=server)
# Pack and send the request.
process_img_req = network_compute_bridge_pb2.NetworkComputeRequest(
input_data=input_data, server_config=server_data)
resp = network_compute_client.network_compute_bridge_command(
process_img_req)
創建一個迭代每個圖像源並為每個圖像源發出 NetworkComputeRequest 的函數。
best_obj = None
highest_conf = 0.0
best_vision_tform_obj = None
img = get_bounding_box_image(resp)
image_full = resp.image_response
# Show the image
cv2.imshow("Fetch", img)
cv2.waitKey(15)
調用在圖像上繪製邊界框的函數(見 下文)並向用戶顯示圖像。
if len(resp.object_in_image) > 0:
for obj in resp.object_in_image:
# Get the label
obj_label = obj.name.split('_label_')[-1]
if obj_label != label:
continue
conf_msg = wrappers_pb2.FloatValue()
obj.additional_properties.Unpack(conf_msg)
conf = conf_msg.value
解包圖像中每個物件的自信度值。
try:
vision_tform_obj = frame_helpers.get_a_tform_b(
obj.transforms_snapshot,
frame_helpers.VISION_FRAME_NAME,
obj.image_properties.frame_name_image_coordinates)
except bosdyn.client.frame_helpers.ValidateFrameTreeError:
# No depth data available.
vision_tform_obj = None
if conf > highest_conf and vision_tform_obj is not None:
highest_conf = conf
best_obj = obj
best_vision_tform_obj = vision_tform_obj
if best_obj is not None:
return best_obj, image_full, best_vision_tform_obj
return None, None, None
- 機器人通過將其深度相機與邊界框中的像素相關聯來自動計算對象的位置。
- 注意:深度相機的視野比灰度相機小,所以有些物體不會有位置計算。
- 將該值與我們在此圖像中看到的最佳置信度進行比較。
- 返回具有位置數據的置信度最高的對象,如果沒有對象則不返回任何對象。
def get_bounding_box_image(response):
dtype = np.uint8
img = np.fromstring(response.image_response.shot.image.data, dtype=dtype)
if response.image_response.shot.image.format == image_pb2.Image.FORMAT_RAW:
img = img.reshape(response.image_response.shot.image.rows,
response.image_response.shot.image.cols)
else:
img = cv2.imdecode(img, -1)
# Convert to BGR so we can draw colors
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
# Draw bounding boxes in the image for all the detections.
for obj in response.object_in_image:
conf_msg = wrappers_pb2.FloatValue()
obj.additional_properties.Unpack(conf_msg)
confidence = conf_msg.value
polygon = []
min_x = float('inf')
min_y = float('inf')
for v in obj.image_properties.coordinates.vertexes:
polygon.append([v.x, v.y])
min_x = min(min_x, v.x)
min_y = min(min_y, v.y)
polygon = np.array(polygon, np.int32)
polygon = polygon.reshape((-1, 1, 2))
cv2.polylines(img, [polygon], True, (0, 255, 0), 2)
caption = "{} {:.3f}".format(obj.name, confidence)
cv2.putText(img, caption, (int(min_x), int(min_y)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
return img
get_bounding_box_image 函數返回一個在頂部繪製邊界框的圖像。
- 從 proto 中解壓縮圖像。
- 轉換為顏色,以便我們可以在灰度圖像的頂部繪製綠色框。
- 對於每個檢測到的物件:
- 在每個多邊形角之間畫一條線。
- 繪製帶有物件名稱和自信度值的標題。
def find_center_px(polygon):
min_x = math.inf
min_y = math.inf
max_x = -math.inf
max_y = -math.inf
for vert in polygon.vertexes:
if vert.x < min_x:
min_x = vert.x
if vert.y < min_y:
min_y = vert.y
if vert.x > max_x:
max_x = vert.x
if vert.y > max_y:
max_y = vert.y
x = math.fabs(max_x - min_x) / 2.0 + min_x
y = math.fabs(max_y - min_y) / 2.0 + min_y
return (x, y)
函數通過取軸對齊的邊界框的中心來找到多邊形的近似中心。雖然不完美,但它滿足了我們的需求。
def block_for_trajectory_cmd(command_client, cmd_id, timeout_sec=None, verbose=False):
"""Helper that blocks until a trajectory command reaches STATUS_AT_GOAL or a timeout is
exceeded.
Args:
command_client: robot command client, used to request feedback
cmd_id: command ID returned by the robot when the trajectory command was sent
timeout_sec: optional number of seconds after which we'll return no matter what the
robot's state is.
verbose: if we should print state at 10 Hz.
Return values:
True if reaches STATUS_AT_GOAL, False otherwise.
"""
start_time = time.time()
if timeout_sec is not None:
end_time = start_time + timeout_sec
now = time.time()
while timeout_sec is None or now < end_time:
feedback_resp = command_client.robot_command_feedback(cmd_id)
current_state = feedback_resp.feedback.mobility_feedback.se2_trajectory_feedback.status
if verbose:
current_state_str = basic_command_pb2.SE2TrajectoryCommand.Feedback.Status.Name(current_state)
current_time = time.time()
print('Walking: ({time:.1f} sec): {state}'.format(
time=current_time - start_time, state=current_state_str),
end=' \r')
if current_state == basic_command_pb2.SE2TrajectoryCommand.Feedback.STATUS_AT_GOAL:
return True
time.sleep(0.1)
now = time.time()
if verbose:
print('block_for_trajectory_cmd: timeout exceeded.')
return False
輪詢機器人以獲取反饋並在機器人報告 STATUS_AT_GOAL 時返回。
def main(argv):
parser = argparse.ArgumentParser()
bosdyn.client.util.add_common_arguments(parser)
parser.add_argument(
'-s',
'--ml-service',
help='Service name of external machine learning server.',
required=True)
parser.add_argument('-m',
'--model',
help='Model name running on the external server.',
required=True)
parser.add_argument(
'-p',
'--person-model',
help='Person detection model name running on the external server.')
parser.add_argument('-c',
'--confidence-dogtoy',
help='Minimum confidence to return an object for the dogoy (0.0 to 1.0)',
default=0.5,
type=float)
parser.add_argument('-e',
'--confidence-person',
help='Minimum confidence for person detection (0.0 to 1.0)',
default=0.6,
type=float)
options = parser.parse_args(argv)
cv2.namedWindow("Fetch")
cv2.waitKey(500)
設置參數並告訴 OpenCV 創建一個輸出窗口。
sdk = bosdyn.client.create_standard_sdk('SpotFetchClient')
sdk.register_service_client(NetworkComputeBridgeClient)
robot = sdk.create_robot(options.hostname)
robot.authenticate(options.username, options.password)
# Time sync is necessary so that time-based filter requests can be converted
robot.time_sync.wait_for_sync()
network_compute_client = robot.ensure_client(
NetworkComputeBridgeClient.default_service_name)
robot_state_client = robot.ensure_client(
RobotStateClient.default_service_name)
command_client = robot.ensure_client(
RobotCommandClient.default_service_name)
lease_client = robot.ensure_client(LeaseClient.default_service_name)
manipulation_api_client = robot.ensure_client(
ManipulationApiClient.default_service_name)
以下是 Spot API 客戶端的標准設置。請注意,包含 manipulation_api_client 以便可以使用手臂。
# This script assumes the robot is already standing via the tablet. We'll take over from the
# tablet.
lease = lease_client.take()
現在我們從平板電腦接管控制。
- 這是運行快速測試的有用方法,尤其是在難以正確對齊機器人的情況下。
- 注意:在開始行為之前,我們使用此技術讓所有機器人就位進行 跳繩 jump roping。
lk = bosdyn.client.lease.LeaseKeepAlive(lease_client)
# Store the position of the hand at the last toy drop point.
vision_tform_hand_at_drop = None
while True:
holding_toy = False
while not holding_toy:
# Capture an image and run ML on it.
dogtoy, image, vision_tform_dogtoy = get_obj_and_img(
network_compute_client, options.ml_service, options.model,
options.confidence_dogtoy, kImageSources, 'dogtoy')
if dogtoy is None:
# Didn't find anything, keep searching.
continue
- 設置租用客戶端並開始循環。
- 搜索狗玩具,直到找到為止。
# If we have already dropped the toy off, make sure it has moved a sufficient amount before
# picking it up again
if vision_tform_hand_at_drop is not None and pose_dist(
vision_tform_hand_at_drop, vision_tform_dogtoy) < 0.5:
print('Found dogtoy, but it hasn\'t moved. Waiting...')
time.sleep(1)
continue
狗玩具丟下後,繞回去等待人再次扔掉它。
print('Found dogtoy...') # Got a dogtoy. Request pick up.
# Stow the arm in case it is deployed
stow_cmd = RobotCommandBuilder.arm_stow_command()
command_client.robot_command(stow_cmd)
# NOTE: we'll enable this code in Part 5, when we understand it.
# -------------------------
# # Walk to the object.
# walk_rt_vision, heading_rt_vision = compute_stand_location_and_yaw(
# vision_tform_dogtoy, robot_state_client, distance_margin=1.0)
# move_cmd = RobotCommandBuilder.trajectory_command(
# goal_x=walk_rt_vision[0],
# goal_y=walk_rt_vision[1],
# goal_heading=heading_rt_vision,
# frame_name=frame_helpers.VISION_FRAME_NAME,
# params=get_walking_params(0.5, 0.5))
# end_time = 5.0
# cmd_id = command_client.robot_command(command=move_cmd,
# end_time_secs=time.time() +
# end_time)
# # Wait until the robot reports that it is at the goal.
# block_for_trajectory_cmd(command_client, cmd_id, timeout_sec=5, verbose=True)
# -------------------------
現在,我們不會擔心步行更遠的距離,但在下一節中,我們將要這樣做。現在不要擔心這段代碼,我們將在第 5 部分啟用它。
# The ML result is a bounding box. Find the center.
(center_px_x,
center_px_y) = find_center_px(dogtoy.image_properties.coordinates)
# Request Pick Up on that pixel.
pick_vec = geometry_pb2.Vec2(x=center_px_x, y=center_px_y)
grasp = manipulation_api_pb2.PickObjectInImage(
pixel_xy=pick_vec,
transforms_snapshot_for_camera=image.shot.transforms_snapshot,
frame_name_image_sensor=image.shot.frame_name_image_sensor,
camera_model=image.source.pinhole)
# We can specify where in the gripper we want to grasp. About halfway is generally good for
# small objects like this. For a bigger object like a shoe, 0 is better (use the entire
# gripper)
grasp.grasp_params.grasp_palm_to_fingertip = 0.6
主要提貨請求:
- 使用我們 上面的函數 找到邊界框的中心。
- 使用以下命令構建 PickObjectInImage 請求:
- x 和 y 像素位置
- 來自圖像的 transforms_snapshot(有關拍攝照片時相機所在位置的數據)
- 圖像傳感器幀框的名稱
- 相機的校準數據
- 設置 grabs_palm_to_fingertip 參數。 這將選擇手握狗玩具的位置(想想抓手的手掌與指尖)。
告訴抓取系統我們只接受自上而下的抓取。
- 指定此參數會增加抓取的 robustness。 我們寧願機器人報告它無法抓住東西,也不願成功抓住錯誤的東西。
讓我們分解一下這是如何工作的:
- vector_alignment_with_tolerance 系統採用夾具上的矢量和幀框中的矢量。 它承諾這兩個向量將在公差範圍內(旋轉)。
- 我們想要 “自上而下 top-down” 的抓握,因此我們將夾具的 X 軸設置為筆直向下
- 夾具的 X 軸:axis_on_gripper_ewrt_gripper = geometry_pb2.Vec3(x=1, y=0, z=0)
- 直線向下:axis_to_align_with_ewrt_vision = geometry_pb2.Vec3(x=0, y=0, z=-1)
- 將容差設置為 0.25 弧度,或大約 15 度。
- 告訴機器人我們正在表達視覺幀框中的第二個向量。
是時候下 "抓取 grasp"命令了!
# Build the proto
grasp_request = manipulation_api_pb2.ManipulationApiRequest(
pick_object_in_image=grasp)
# Send the request
print('Sending grasp request...')
cmd_response = manipulation_api_client.manipulation_api_command(
manipulation_api_request=grasp_request)
- 向機器人發送請求。
# Wait for the grasp to finish
grasp_done = False
failed = False
time_start = time.time()
while not grasp_done:
feedback_request = manipulation_api_pb2.ManipulationApiFeedbackRequest(
manipulation_cmd_id=cmd_response.manipulation_cmd_id)
# Send a request for feedback
response = manipulation_api_client.manipulation_api_feedback_command(
manipulation_api_feedback_request=feedback_request)
current_state = response.current_state
current_time = time.time() - time_start
print('Current state ({time:.1f} sec): {state}'.format(
time=current_time,
state=manipulation_api_pb2.ManipulationFeedbackState.Name(
current_state)),
end=' \r')
sys.stdout.flush()
failed_states = [manipulation_api_pb2.MANIP_STATE_GRASP_FAILED,
manipulation_api_pb2.MANIP_STATE_GRASP_PLANNING_NO_SOLUTION,
manipulation_api_pb2.MANIP_STATE_GRASP_FAILED_TO_RAYCAST_INTO_MAP,
manipulation_api_pb2.MANIP_STATE_GRASP_PLANNING_WAITING_DATA_AT_EDGE]
failed = current_state in failed_states
grasp_done = current_state == manipulation_api_pb2.MANIP_STATE_GRASP_SUCCEEDED or failed
time.sleep(0.1)
holding_toy = not failed
抓取需要時間才能完成。
- 輪詢機器人直到抓取完成。
- 打印狀態數據。
- 注意抓取是失敗還是成功。
# Move the arm to a carry position.
print('')
print('Grasp finished, search for a person...')
carry_cmd = RobotCommandBuilder.arm_carry_command()
command_client.robot_command(carry_cmd)
# Wait for the carry command to finish
time.sleep(0.75)
抓握完成後,將手臂移動到“攜帶 carry”位置。這會將手臂移出前置攝像頭。
# For now, we'll just exit...
print('')
print('Done for now, returning control to tablet in 5 seconds...')
time.sleep(5.0)
break
現在,我們將退出。成功測試後,我們將在此處添加人員檢測。
lease_client.return_lease(lease)
if __name__ == '__main__':
if not main(sys.argv[1:]):
sys.exit(1)
通過返回租約和一些 python 樣板來完成。
運行自動狗玩具抓取
為此,我們將在單獨的終端中 持續運行 network_compute_server.py。這是圖表:
請注意,將運行兩個腳本。 優點是您的 ML 模型服務器不關心您對結果做了什麼。
啟動(或繼續運行)您的 network_compute_server.py
python network_compute_server.py -m dogtoy/exported-models/dogtoy-model/saved_model -l dogtoy/annotations/label_map.pbtxt --username user --password YOUR_ROBOTS_PASSWORD 192.168.80.3
運行 fetch.py
- 使用平板電腦命令 Spot to Stand。
- 將狗玩具放在地板上。
- Walk Spot 靠近狗玩具。
python fetch.py --username user --password YOUR_ROBOTS_PASSWORD -s fetch-server -m dogtoy-model 192.168.80.3
成功後,您的機器人應自動尋找並撿起狗玩具!
故障排除
找不到狗玩具
- 您可以隨時使用 ML 模型查看器 ML Model Viewer 並確保您的模型仍在工作。
- fetch.py 使用與 ML 模型查看器相同的代碼路徑,所以如果它工作,這個腳本應該工作。
無法抓取
- 嘗試改變 grasp.grasp_params.grasp_palm_to_fingertip = 0.5 到 0.0 或 1.0
- 使用平板電腦的拾取物體動作,看看 Spot 是否可以使用該方法抓住狗玩具。
- 嘗試運行 arm_grasp API 範例 並比較結果。
- 如果您的狗玩具是一根長繩,則邊界框的中心可能是空的。 嘗試繫繩子,使中心始終可以抓住。
一旦您對自主選擇感到滿意,請轉到第 5 部分開始玩 fetch!


留言
張貼留言
Aron阿龍,謝謝您的留言互動!