|
|
|
|
|
|
self.vis_obs_size = vis_obs_size |
|
|
|
self.vec_obs_size = vec_obs_size |
|
|
|
action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS |
|
|
|
self.behavior_spec = BehaviorSpec( |
|
|
|
self._make_obs_spec(), |
|
|
|
action_type, |
|
|
|
tuple(2 for _ in range(action_size)) if use_discrete else action_size, |
|
|
|
) |
|
|
|
if use_discrete: |
|
|
|
self.behavior_spec = BehaviorSpec( |
|
|
|
self._make_obs_spec(), 0, tuple(2 for _ in range(action_size)) |
|
|
|
) |
|
|
|
else: |
|
|
|
self.behavior_spec = BehaviorSpec( |
|
|
|
self._make_obs_spec(), action_size, tuple() |
|
|
|
) |
|
|
|
self.action_size = action_size |
|
|
|
self.names = brain_names |
|
|
|
self.positions: Dict[str, List[float]] = {} |
|
|
|
|
|
|
) |
|
|
|
super().__init__( |
|
|
|
brain_names, |
|
|
|
True, # This is needed for env to generate masks correctly |
|
|
|
True, # This is needed for env to generate masks correctly |
|
|
|
action_size=discrete_action_size, # This is needed for env to generate masks correctly |
|
|
|
action_size=discrete_action_size, # This is needed for env to generate masks correctly |
|
|
|
self._make_obs_spec(), continuous_action_size, tuple(2 for _ in range(discrete_action_size)) |
|
|
|
self._make_obs_spec(), |
|
|
|
continuous_action_size, |
|
|
|
tuple(2 for _ in range(discrete_action_size)), |
|
|
|
) |
|
|
|
self.continuous_action_size = continuous_action_size |
|
|
|
self.discrete_action_size = discrete_action_size |
|
|
|
|
|
|
all_done = cont_done and disc_done |
|
|
|
if all_done: |
|
|
|
reward = 0 |
|
|
|
for _pos in self.continuous_env.positions[name] + self.discrete_env.positions[name]: |
|
|
|
for _pos in ( |
|
|
|
self.continuous_env.positions[name] |
|
|
|
+ self.discrete_env.positions[name] |
|
|
|
): |
|
|
|
self.continuous_env.positions[name] + self.discrete_env.positions[name] |
|
|
|
self.continuous_env.positions[name] |
|
|
|
+ self.discrete_env.positions[name] |
|
|
|
self.step_result[name] = self._make_batched_step( |
|
|
|
name, all_done, reward |
|
|
|
) |
|
|
|
self.step_result[name] = self._make_batched_step(name, all_done, reward) |
|
|
|
|
|
|
|
def reset(self) -> None: # type: ignore |
|
|
|
super().reset() |
|
|
|
|
|
|
self.discrete_env.goal = self.goal |
|
|
|
|
|
|
|
def set_actions(self, behavior_name: BehaviorName, action) -> None: |
|
|
|
#print(action, self.goal[behavior_name]) |
|
|
|
continuous_action = action[:, :self.continuous_action_size] |
|
|
|
discrete_action = action[:, self.continuous_action_size:] |
|
|
|
# print(action, self.goal[behavior_name]) |
|
|
|
continuous_action = action[:, : self.continuous_action_size] |
|
|
|
discrete_action = action[:, self.continuous_action_size :] |
|
|
|
self.continuous_env.set_actions(behavior_name, continuous_action) |
|
|
|
self.discrete_env.set_actions(behavior_name, discrete_action) |
|
|
|
|
|
|
|