from legged_gym import LEGGED_GYM_ROOT_DIR, envs from time import time from warnings import WarningMessage import numpy as np import os
from isaacgym.torch_utils import * from isaacgym import gymtorch, gymapi, gymutil
import torch
# from legged_gym import LEGGED_GYM_ROOT_DIR from legged_gym.envs.base.base_task import BaseTask from legged_gym.utils.helpers import class_to_dict #类转化为字典 from .cartpole2_config import Cartpole2Config
def post_physics_step(self): """check terminations, compute observations and rewards calls self._post_physics_step_callback() for common computations calls self._draw_debug_vis() if needed """ # in some cases a simulation step might be required to refresh some obs (for example body positions) self.gym.refresh_dof_state_tensor(self.sim) # 刷新关节状态
def reset_idx(self, env_ids): """Reset some environments. Calls self._reset_dofs(env_ids), self._reset_root_states(env_ids), and self._resample_commands(env_ids) [Optional] calls self._update_terrain_curriculum(env_ids), self.update_command_curriculum(env_ids) and Logs episode info Resets some buffers
Args: env_ids (list[int]): List of environment ids which must be reset """ if len(env_ids) == 0: # 如果没有需要重置的环境,直接返回 return
# reset robot states self._reset_dofs(env_ids) # 重置关节位置和速度
def compute_reward(self): """Compute rewards Calls each reward function which had a non-zero scale (processed in self._prepare_reward_function()) adds each terms to the episode sums and to the total reward """ self.rew_buf[:] = 0.0 # 计算奖励/惩罚,使用一些神奇的map操作就将文件结尾处的一堆奖励函数给调用了, # 涉及到了一些python的神奇语法,看不懂就算了, # 知道是依次调用结尾的一堆奖励函数即可。 for i in range(len(self.reward_functions)): name = self.reward_names[i] rew = self.reward_functions[i]() * self.reward_scales[name] self.rew_buf += rew self.episode_sums[name] += rew
# 同样是计算奖励,这里计算的是智能体被重置时才获取的额外奖励/惩罚 if "termination" in self.reward_scales: rew = self._reward_termination() * self.reward_scales["termination"] self.rew_buf += rew self.episode_sums["termination"] += rew
def set_camera(self, position, lookat): """Set camera position and direction""" cam_pos = gymapi.Vec3(position[0], position[1], position[2]) cam_target = gymapi.Vec3(lookat[0], lookat[1], lookat[2]) self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) # 设置渲染器初始位姿
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
def _compute_torques(self, actions): """Compute torques from actions. Actions can be interpreted as position or velocity targets given to a PD controller, or directly as scaled torques. [NOTE]: torques must have the same dimension as the number of DOFs, even if some DOFs are not actuated.
Args: actions (torch.Tensor): Actions
Returns: [torch.Tensor]: Torques sent to the simulation """ # 根据policy输出计算力矩,这里仅简单按比例直出了,训练机器人时可以加一些控制器如pd等 actions_scaled = actions * self.cfg.control.action_scale torques = torch.zeros_like(self.torques) torques[:, 0:1] = actions_scaled
def _reset_dofs(self, env_ids): """Resets DOF position and velocities of selected environmments Positions are randomly selected within 0.5:1.5 x default positions. Velocities are set to zero.
# create some wrapper tensors for different slices self.dof_state = gymtorch.wrap_tensor(dof_state_tensor) # 将关节状态矩阵包装成pytorch tensor self.dof_pos = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 0] self.dof_vel = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 1] # 将关节状态矩阵分成位置和速度两部分
# the following tensors are NOT used in this experiment, but are kept for compatibility with other tasks self.commands = torch.zeros(self.num_envs, 3, dtype=torch.float, device=self.device, requires_grad=False) self.base_lin_vel = torch.zeros(self.num_envs, 3, dtype=torch.float, device=self.device, requires_grad=False) self.base_ang_vel = torch.zeros(self.num_envs, 3, dtype=torch.float, device=self.device, requires_grad=False) self.contact_forces = torch.zeros( self.num_envs, 2, 3, dtype=torch.float, device=self.device, requires_grad=False ) self.feet_indices = torch.tensor([0, 1], dtype=torch.int32, device=self.device, requires_grad=False)
# initialize some data used later on self.common_step_counter = 0 # 记录总的步数 self.extras = {} # 用于记录一些额外信息
self.envs是一个包含所有环境实例句柄的列表,可以通过以下代码获取环境0的所有关节的名称 for i in range(self.num_dof): name = self.gym.get_joint_name(self.envs[0], i) print(f”关节 {i}: {name}”)
def _prepare_reward_function(self): """Prepares a list of reward functions, whcih will be called to compute the total reward. Looks for self._reward_<REWARD_NAME>, where <REWARD_NAME> are names of all non zero reward scales in the cfg. """ # 用一堆神奇的map操作将奖励函数给包装进一个map里了, # 涉及到了一些python的神奇语法,看不懂就算了, # 知道是依次调用结尾的一堆奖励函数即可。 # remove zero scales + multiply non-zero ones by dt for key in list(self.reward_scales.keys()): scale = self.reward_scales[key] if scale == 0: self.reward_scales.pop(key) else: self.reward_scales[key] *= self.dt # prepare list of functions self.reward_functions = [] self.reward_names = [] for name, scale in self.reward_scales.items(): if name == "termination": continue self.reward_names.append(name) name = "_reward_" + name self.reward_functions.append(getattr(self, name))
# reward episode sums self.episode_sums = { name: torch.zeros( self.num_envs, dtype=torch.float, device=self.device, requires_grad=False, ) for name in self.reward_scales.keys() }
def _create_envs(self): """Creates environments: 1. loads the robot URDF/MJCF asset, 2. For each environment 2.1 creates the environment, 2.2 calls DOF and Rigid shape properties callbacks, 2.3 create actor with these properties and add them to the env 3. Store indices of different bodies of the robot """ asset_path = self.cfg.asset.file.format(LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR) asset_root = os.path.dirname(asset_path) asset_file = os.path.basename(asset_path) # 设置URDF文件名和文件路径