|
|
|
|
|
|
# This ensures that even if the queue is being filled faster than it is |
|
|
|
# being emptied, the trajectories in the queue are on-policy. |
|
|
|
for _ in range(trajectory_queue.maxlen): |
|
|
|
t = trajectory_queue.get_nowait() |
|
|
|
t = trajectory_queue.get(block=not empty_queue, timeout=0.05) |
|
|
|
# adds to wrapped trainers queue |
|
|
|
internal_trajectory_queue.put(t) |
|
|
|
self._process_trajectory(t) |
|
|
|
|
|
|
# Dump trajectories from non-learning policy |
|
|
|
try: |
|
|
|
for _ in range(trajectory_queue.maxlen): |
|
|
|
t = trajectory_queue.get_nowait() |
|
|
|
t = trajectory_queue.get(block=not empty_queue, timeout=0.05) |
|
|
|
if not empty_queue: |
|
|
|
break |
|
|
|
except AgentManagerQueue.Empty: |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
for brain_name in self._internal_policy_queues: |
|
|
|
internal_policy_queue = self._internal_policy_queues[brain_name] |
|
|
|
try: |
|
|
|
policy = cast(TFPolicy, internal_policy_queue.get_nowait()) |
|
|
|
policy = cast(TFPolicy, internal_policy_queue.get(block=False)) |
|
|
|
self.current_policy_snapshot[brain_name] = policy.get_weights() |
|
|
|
except AgentManagerQueue.Empty: |
|
|
|
pass |
|
|
|