浏览代码

Add typing to optimizer, rename self.tf_optimizer

/develop/nopreviousactions
Ervin Teng 5 年前
当前提交
441e6a0c
共有 4 个文件被更改,包括 14 次插入10 次删除
  1. 2
      ml-agents/mlagents/trainers/common/nn_policy.py
  2. 10
      ml-agents/mlagents/trainers/common/tf_optimizer.py
  3. 10
      ml-agents/mlagents/trainers/ppo/optimizer.py
  4. 2
      ml-agents/mlagents/trainers/tf_policy.py

2
ml-agents/mlagents/trainers/common/nn_policy.py


"Losses/Policy Loss": "policy_loss",
}
self.optimizer: Optional[tf.train.AdamOptimizer] = None
self.tf_optimizer: Optional[tf.train.Optimizer] = None
self.grads = None
self.update_batch: Optional[tf.Operation] = None
num_layers = trainer_params["num_layers"]

10
ml-agents/mlagents/trainers/common/tf_optimizer.py


return value_estimates
def create_reward_signals(self, reward_signal_configs):
def create_reward_signals(self, reward_signal_configs: Dict[str, Any]) -> None:
"""
Create reward signals
:param reward_signal_configs: Reward signal config.

)
self.update_dict.update(self.reward_signals[reward_signal].update_dict)
def create_tf_optimizer(self, learning_rate, name="Adam"):
def create_tf_optimizer(
self, learning_rate: float, name: str = "Adam"
) -> tf.train.Optimizer:
def _execute_model(self, feed_dict, out_dict):
def _execute_model(
self, feed_dict: Dict[tf.Tensor, np.ndarray], out_dict: Dict[str, tf.Tensor]
) -> Dict[str, np.ndarray]:
"""
Executes model.
:param feed_dict: Input dictionary mapping nodes to input data.

10
ml-agents/mlagents/trainers/ppo/optimizer.py


trainer_params.get("vis_encode_type", "simple")
)
self.stream_names = self.reward_signals.keys()
self.stream_names = list(self.reward_signals.keys())
self.optimizer: Optional[tf.train.AdamOptimizer] = None
self.tf_optimizer: Optional[tf.train.AdamOptimizer] = None
self.grads = None
self.update_batch: Optional[tf.Operation] = None

)
def create_ppo_optimizer(self):
self.optimizer = self.create_tf_optimizer(self.learning_rate)
self.grads = self.optimizer.compute_gradients(self.loss)
self.update_batch = self.optimizer.minimize(self.loss)
self.tf_optimizer = self.create_tf_optimizer(self.learning_rate)
self.grads = self.tf_optimizer.compute_gradients(self.loss)
self.update_batch = self.tf_optimizer.minimize(self.loss)
@timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:

2
ml-agents/mlagents/trainers/tf_policy.py


)
tf.set_random_seed(seed)
self.saver = None
self.optimizer = None
self.tf_optimizer = None
if self.use_recurrent:
self.m_size = trainer_parameters["memory_size"]
self.sequence_length = trainer_parameters["sequence_length"]

正在加载...
取消
保存