浏览代码

Fix lost trajectories when they are produced faster than they are consumed (#3233)

* Fix bug when trajectories are produced faster than they are consumed

* Cap max length
/asymm-envs
GitHub 5 年前
当前提交
56a67403
共有 2 个文件被更改,包括 12 次插入7 次删除
  1. 5
      ml-agents/mlagents/trainers/agent_processor.py
  2. 14
      ml-agents/mlagents/trainers/trainer.py

5
ml-agents/mlagents/trainers/agent_processor.py


pass
def __init__(self, behavior_id: str):
def __init__(self, behavior_id: str, maxlen: int = 1000):
self.queue: Deque[T] = deque()
self.maxlen: int = maxlen
self.queue: Deque[T] = deque(maxlen=self.maxlen)
self.behavior_id = behavior_id
def empty(self) -> bool:

14
ml-agents/mlagents/trainers/trainer.py


"""
with hierarchical_timer("process_trajectory"):
for traj_queue in self.trajectory_queues:
try:
t = traj_queue.get_nowait()
self._process_trajectory(t)
except AgentManagerQueue.Empty:
pass
# We grab at most the maximum length of the queue.
# This ensures that even if the queue is being filled faster than it is
# being emptied, the trajectories in the queue are on-policy.
for _ in range(traj_queue.maxlen):
try:
t = traj_queue.get_nowait()
self._process_trajectory(t)
except AgentManagerQueue.Empty:
break
if self.should_still_train:
if self._is_ready_update():
with hierarchical_timer("_update_policy"):

正在加载...
取消
保存