|
|
|
|
|
|
from mlagents.trainers.components.bc.module import BCModule |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
import horovod.tensorflow as hvd |
|
|
|
except ImportError: |
|
|
|
hvd = None |
|
|
|
|
|
|
|
|
|
|
|
class TFOptimizer(Optimizer): # pylint: disable=W0223 |
|
|
|
def __init__(self, policy: TFPolicy, trainer_params: Dict[str, Any]): |
|
|
|
self.sess = policy.sess |
|
|
|
|
|
|
def create_optimizer_op( |
|
|
|
self, learning_rate: tf.Tensor, name: str = "Adam" |
|
|
|
) -> tf.train.Optimizer: |
|
|
|
return tf.train.AdamOptimizer(learning_rate=learning_rate, name=name) |
|
|
|
|
|
|
|
if hvd is not None: |
|
|
|
adam_optimizer = tf.train.AdamOptimizer( |
|
|
|
learning_rate=learning_rate * hvd.size(), name=name |
|
|
|
) |
|
|
|
horovod_optimizer = hvd.DistributedOptimizer(adam_optimizer) |
|
|
|
else: |
|
|
|
adam_optimizer = tf.train.AdamOptimizer( |
|
|
|
learning_rate=learning_rate, name=name |
|
|
|
) |
|
|
|
return horovod_optimizer if hvd is not None else adam_optimizer |
|
|
|
|
|
|
|
def _execute_model( |
|
|
|
self, feed_dict: Dict[tf.Tensor, np.ndarray], out_dict: Dict[str, tf.Tensor] |
|
|
|