|
|
|
|
|
|
from typing import Dict, List |
|
|
|
from collections import defaultdict |
|
|
|
import abc |
|
|
|
import time |
|
|
|
|
|
|
|
from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer |
|
|
|
from mlagents.trainers.buffer import AgentBuffer |
|
|
|
|
|
|
# We grab at most the maximum length of the queue. |
|
|
|
# This ensures that even if the queue is being filled faster than it is |
|
|
|
# being emptied, the trajectories in the queue are on-policy. |
|
|
|
_queried = False |
|
|
|
_queried = True |
|
|
|
if self.threaded and not _queried: |
|
|
|
# Avoid busy-waiting |
|
|
|
time.sleep(0.05) |
|
|
|
if self.should_still_train: |
|
|
|
if self._is_ready_update(): |
|
|
|
with hierarchical_timer("_update_policy"): |
|
|
|