浏览代码

Removed floating constants

/develop/nopreviousactions
Ervin Teng 5 年前
当前提交
ffdc41bb
共有 3 个文件被更改,包括 19 次插入16 次删除
  1. 9
      ml-agents/mlagents/trainers/common/nn_policy.py
  2. 5
      ml-agents/mlagents/trainers/ppo/optimizer.py
  3. 21
      ml-agents/mlagents/trainers/sac/optimizer.py

9
ml-agents/mlagents/trainers/common/nn_policy.py


logger = logging.getLogger("mlagents.trainers")
LOG_STD_MAX = 2
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero

self.tanh_squash = tanh_squash
self.resample = resample
self.trainable_variables: List[tf.Variable] = []
# Non-exposed parameters; these aren't exposed because they don't have a
# good explanation and usually shouldn't be touched.
self.log_std_min = -20
self.log_std_max = 2
if create_tf_graph:
self.create_tf_graph()

kernel_initializer=LearningModel.scaled_init(0.01),
)
log_sigma = tf.clip_by_value(log_sigma, LOG_STD_MIN, LOG_STD_MAX)
log_sigma = tf.clip_by_value(log_sigma, self.log_std_min, self.log_std_max)
sigma = tf.exp(log_sigma)

5
ml-agents/mlagents/trainers/ppo/optimizer.py


logger = logging.getLogger("mlagents.trainers")
BURN_IN_RATIO = 0.0
class PPOOptimizer(TFOptimizer):
def __init__(self, policy: TFPolicy, trainer_params: Dict[str, Any]):

vis_encode_type = EncoderType(
trainer_params.get("vis_encode_type", "simple")
)
self.burn_in_ratio = float(trainer_params.get("burn_in_ratio", 0.0))
self.stream_names = list(self.reward_signals.keys())

self, mini_batch: AgentBuffer, num_sequences: int
) -> Dict[tf.Tensor, Any]:
# Do an optional burn-in for memories
num_burn_in = int(BURN_IN_RATIO * self.policy.sequence_length)
num_burn_in = int(self.burn_in_ratio * self.policy.sequence_length)
burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
burn_in_mask[range(0, num_burn_in)] = 0
burn_in_mask = np.tile(burn_in_mask, num_sequences)

21
ml-agents/mlagents/trainers/sac/optimizer.py


from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed
LOG_STD_MAX = 2
LOG_STD_MIN = -20
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
BURN_IN_RATIO = 0.0
class SACOptimizer(TFOptimizer):

trainer_params.get("vis_encode_type", "simple")
)
self.tau = trainer_params.get("tau", 0.005)
self.burn_in_ratio = float(trainer_params.get("burn_in_ratio", 0.0))
# Non-exposed SAC parameters
self.discrete_target_entropy_scale = (
0.2
) # Roughly equal to e-greedy 0.05
self.continuous_target_entropy_scale = 1.0
stream_names = self.reward_signals.keys()
stream_names = list(self.reward_signals.keys())
# Use to reduce "survivor bonus" when using Curiosity or GAIL.
self.gammas = [
_val["gamma"] for _val in trainer_params["reward_signals"].values()

if discrete:
self.target_entropy = [
DISCRETE_TARGET_ENTROPY_SCALE * np.log(i).astype(np.float32)
self.discrete_target_entropy_scale * np.log(i).astype(np.float32)
for i in self.act_size
]
discrete_action_probs = tf.exp(self.policy.all_log_probs)

-1
* CONTINUOUS_TARGET_ENTROPY_SCALE
* self.continuous_target_entropy_scale
* np.prod(self.act_size[0]).astype(np.float32)
)

:param num_sequences: Number of LSTM sequences in batch.
"""
# Do an optional burn-in for memories
num_burn_in = int(BURN_IN_RATIO * self.policy.sequence_length)
num_burn_in = int(self.burn_in_ratio * self.policy.sequence_length)
burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
burn_in_mask[range(0, num_burn_in)] = 0
burn_in_mask = np.tile(burn_in_mask, num_sequences)

正在加载...
取消
保存