浏览代码

Revert obs to goal in simple 1d test (#3540)

/bug-failed-api-check
GitHub 5 年前
当前提交
a6bf50db
共有 1 个文件被更改,包括 6 次插入4 次删除
  1. 10
      ml-agents/mlagents/trainers/tests/simple_test_envs.py

10
ml-agents/mlagents/trainers/tests/simple_test_envs.py


self.position: Dict[str, float] = {}
self.step_count: Dict[str, float] = {}
self.random = random.Random(str(self.group_spec))
self.goal = self.random.choice([-1, 1])
self.goal: Dict[str, int] = {}
self.action = {}
self.rewards: Dict[str, float] = {}
self.final_rewards: Dict[str, List[float]] = {}

self.goal[name] = self.random.choice([-1, 1])
self.rewards[name] = 0
self.final_rewards[name] = []
self._reset_agent(name)

self.step_count[name] += 1
done = self.position[name] >= 1.0 or self.position[name] <= -1.0
if done:
reward = SUCCESS_REWARD * self.position[name] * self.goal
reward = SUCCESS_REWARD * self.position[name] * self.goal[name]
m_vector_obs = [np.zeros((1, OBS_SIZE), dtype=np.float32)]
m_vector_obs = [np.ones((1, OBS_SIZE), dtype=np.float32) * self.goal[name]]
m_reward = np.array([reward], dtype=np.float32)
m_done = np.array([done], dtype=np.bool)
m_agent_id = np.array([0], dtype=np.int32)

return action_mask
def _reset_agent(self, name):
self.goal[name] = self.random.choice([-1, 1])
self.position[name] = 0.0
self.step_count[name] = 0
self.final_rewards[name].append(self.rewards[name])

for name in self.names:
self._reset_agent(name)
m_vector_obs = [np.zeros((1, OBS_SIZE), dtype=np.float32)]
m_vector_obs = [np.ones((1, OBS_SIZE), dtype=np.float32) * self.goal[name]]
m_reward = np.array([0], dtype=np.float32)
m_done = np.array([False], dtype=np.bool)
m_agent_id = np.array([0], dtype=np.int32)

正在加载...
取消
保存