浏览代码

Reolving some bugs

/develop/gym-wrapper
vincentpierre 4 年前
当前提交
6ca2daa2
共有 2 个文件被更改,包括 10 次插入7 次删除
  1. 16
      ml-agents-envs/mlagents_envs/gym_wrapper.py
  2. 1
      ml-agents-envs/setup.py

16
ml-agents-envs/mlagents_envs/gym_wrapper.py


self._current_steps = (
DecisionSteps(
obs=[np.expand_dims(obs / self.obs_ratio, axis=0)],
reward=np.array([rew]),
agent_id=np.array([self._AGENT_ID]),
reward=np.array([rew], dtype=np.float32),
agent_id=np.array([self._AGENT_ID], dtype=np.int32),
action_mask=None,
),
TerminalSteps.empty(self._behavior_specs),

DecisionSteps.empty(self._behavior_specs),
TerminalSteps(
obs=[np.expand_dims(obs / self.obs_ratio, axis=0)],
reward=np.array([rew]),
max_step=np.array([info.get("TimeLimit.truncated", False)]),
agent_id=np.array([self._AGENT_ID]),
reward=np.array([rew], dtype=np.float32),
max_step=np.array(
[info.get("TimeLimit.truncated", False)], dtype=np.bool
),
agent_id=np.array([self._AGENT_ID], dtype=np.int32),
),
)

self._current_steps = (
DecisionSteps(
obs=[np.expand_dims(obs / self.obs_ratio, axis=0)],
reward=np.array([0]),
agent_id=np.array([self._AGENT_ID]),
reward=np.array([0], dtype=np.float32),
agent_id=np.array([self._AGENT_ID], dtype=np.int32),
action_mask=None,
),
TerminalSteps.empty(self._behavior_specs),

1
ml-agents-envs/setup.py


zip_safe=False,
install_requires=[
"gym",
"cmake",
"atari-py",
"gym[atari]",
"cloudpickle",

正在加载...
取消
保存