浏览代码

Used NamedTuple for create normalization tensors

/develop/nopreviousactions
Ervin Teng 5 年前
当前提交
7004604d
共有 3 个文件被更改,包括 20 次插入13 次删除
  1. 17
      ml-agents/mlagents/trainers/models.py
  2. 8
      ml-agents/mlagents/trainers/sac/network.py
  3. 8
      ml-agents/mlagents/trainers/tf_policy.py

17
ml-agents/mlagents/trainers/models.py


import logging
from enum import Enum
from typing import Callable, Dict, List, Tuple
from typing import Callable, Dict, List, Tuple, NamedTuple
import numpy as np
from mlagents.tf_utils import tf

class LearningRateSchedule(Enum):
CONSTANT = "constant"
LINEAR = "linear"
class NormalizerTensors(NamedTuple):
update_op: tf.Operation
steps: tf.Tensor
running_mean: tf.Tensor
running_variance: tf.Tensor
class LearningModel:

return normalized_state
@staticmethod
def create_normalizer(
vector_obs: tf.Tensor
) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]:
def create_normalizer(vector_obs: tf.Tensor) -> NormalizerTensors:
vec_obs_size = vector_obs.shape[1]
steps = tf.get_variable(
"normalization_steps",

update_normalization = LearningModel.create_normalizer_update(
vector_obs, steps, running_mean, running_variance
)
return update_normalization, steps, running_mean, running_variance
return NormalizerTensors(
update_normalization, steps, running_mean, running_variance
)
@staticmethod
def create_normalizer_update(

8
ml-agents/mlagents/trainers/sac/network.py


self.vector_in = LearningModel.create_vector_input(policy.vec_obs_size)
if self.policy.normalize:
normalization_tensors = LearningModel.create_normalizer(self.vector_in)
self.update_normalization_op = normalization_tensors[0]
self.normalization_steps = normalization_tensors[1]
self.running_mean = normalization_tensors[2]
self.running_variance = normalization_tensors[3]
self.update_normalization_op = normalization_tensors.update_op
self.normalization_steps = normalization_tensors.steps
self.running_mean = normalization_tensors.running_mean
self.running_variance = normalization_tensors.running_variance
self.processed_vector_in = LearningModel.normalize_vector_obs(
self.vector_in,
self.running_mean,

8
ml-agents/mlagents/trainers/tf_policy.py


self.vector_in = LearningModel.create_vector_input(self.vec_obs_size)
if self.normalize:
normalization_tensors = LearningModel.create_normalizer(self.vector_in)
self.update_normalization_op = normalization_tensors[0]
self.normalization_steps = normalization_tensors[1]
self.running_mean = normalization_tensors[2]
self.running_variance = normalization_tensors[3]
self.update_normalization_op = normalization_tensors.update_op
self.normalization_steps = normalization_tensors.steps
self.running_mean = normalization_tensors.running_mean
self.running_variance = normalization_tensors.running_variance
self.processed_vector_in = LearningModel.normalize_vector_obs(
self.vector_in,
self.running_mean,

正在加载...
取消
保存