浏览代码

Add burn-in for memory PPO

/develop/nopreviousactions
Ervin Teng 5 年前
当前提交
7d616651
共有 1 个文件被更改,包括 9 次插入4 次删除
  1. 13
      ml-agents/mlagents/trainers/ppo/optimizer.py

13
ml-agents/mlagents/trainers/ppo/optimizer.py


logger = logging.getLogger("mlagents.trainers")
BURN_IN_RATIO = 0.1
class PPOOptimizer(TFOptimizer):
def __init__(self, policy, trainer_params):

def construct_feed_dict(
self, mini_batch: AgentBuffer, num_sequences: int
) -> Dict[tf.Tensor, Any]:
# Do a burn-in for memories
num_burn_in = int(BURN_IN_RATIO * self.policy.sequence_length)
burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
burn_in_mask[range(0, num_burn_in)] = 0
burn_in_mask = np.tile(burn_in_mask, num_sequences)
self.policy.sequence_length_ph: len(mini_batch["advantages"])
/ num_sequences, # TODO: Fix LSTM
self.policy.mask_input: mini_batch["masks"],
self.policy.sequence_length_ph: self.policy.sequence_length,
self.policy.mask_input: mini_batch["masks"] * burn_in_mask,
self.advantage: mini_batch["advantages"],
self.all_old_log_probs: mini_batch["action_probs"],
}

正在加载...
取消
保存