浏览代码

Add way to check if trajectory is done or max_reached

/develop-newnormalization
Ervin Teng 5 年前
当前提交
6242b67d
共有 4 个文件被更改,包括 18 次插入4 次删除
  1. 2
      ml-agents/mlagents/trainers/bc/trainer.py
  2. 4
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 2
      ml-agents/mlagents/trainers/sac/trainer.py
  4. 14
      ml-agents/mlagents/trainers/trajectory.py

2
ml-agents/mlagents/trainers/bc/trainer.py


else:
self.episode_steps[agent_id] += len(trajectory.steps)
if trajectory.steps[-1].done:
if trajectory.done_reached:
self.stats["Environment/Episode Length"].append(
self.episode_steps.get(agent_id, 0)
)

4
ml-agents/mlagents/trainers/ppo/trainer.py


value_next = self.policy.get_value_estimates(
trajectory.next_obs,
trajectory.steps[-1].done and not trajectory.steps[-1].max_step,
trajectory.done_reached and not trajectory.done_reached,
agent_id,
)

)
# If this was a terminal trajectory, append stats and reset reward collection
if trajectory.steps[-1].done:
if trajectory.done_reached:
self._update_end_episode_stats(agent_id)
def is_ready_update(self):

2
ml-agents/mlagents/trainers/sac/trainer.py


self.update_buffer, training_length=self.policy.sequence_length
)
if trajectory.steps[-1].done:
if trajectory.done_reached:
self._update_end_episode_stats(agent_id)
def is_ready_update(self) -> bool:

14
ml-agents/mlagents/trainers/trajectory.py


# Add the value outputs if needed
agent_buffer_trajectory["environment_rewards"].append(exp.reward)
return agent_buffer_trajectory
@property
def done_reached(self) -> bool:
"""
Returns true if trajectory is terminated with a Done.
"""
return self.steps[-1].done
@property
def max_step_reached(self) -> bool:
"""
Returns true if trajectory was terminated because max steps was reached.
"""
return self.steps[-1].max_step
正在加载...
取消
保存