浏览代码

try reduce bias more

/develop/coma2/singlenetwork
Ervin Teng 3 年前
当前提交
6094613d
共有 2 个文件被更改,包括 7 次插入8 次删除
  1. 4
      ml-agents/mlagents/trainers/ppo/trainer.py
  2. 11
      ml-agents/mlagents/trainers/torch/networks.py

4
ml-agents/mlagents/trainers/ppo/trainer.py


"""
value_estimates = np.append(value_estimates, value_next)
q_estimate = rewards + gamma * value_estimates[1:]
delta_t = (q_estimate - statistics.mean(q_estimate)) - (
baseline - statistics.mean(baseline)
)
delta_t = (q_estimate) - (baseline)
advantage = discount_rewards(r=delta_t, gamma=gamma * lambd)
return advantage

11
ml-agents/mlagents/trainers/torch/networks.py


def baseline(
self,
self_obs: List[List[torch.Tensor]],
self_obs: List[torch.Tensor],
obs: List[List[torch.Tensor]],
actions: List[AgentAction],
memories: Optional[torch.Tensor] = None,

f_inp = None
concat_f_inp = []
concat_g_inp = []
for inputs, action in zip(obs, actions):
encodes = []
for idx, processor in enumerate(self.processors):

encodes.append(processed_obs)
concat_g_inp.append(torch.cat(encodes, dim=-1))
cat_encodes = [
torch.cat(encodes, dim=-1),
action.to_flat(self.action_spec.discrete_branches),

f_inp = torch.stack(concat_f_inp, dim=1)
self_attn_masks.append(self._get_masks_from_nans(obs))
concat_encoded_obs = []
encodes = []
for idx, processor in enumerate(self.processors):
obs_input = self_obs[idx]

concat_encoded_obs.append(torch.cat(encodes, dim=-1))
g_inp = torch.stack(concat_encoded_obs, dim=1)
concat_g_inp.append(torch.cat(encodes, dim=-1))
g_inp = torch.stack(concat_g_inp, dim=1)
self_attn_masks.append(self._get_masks_from_nans([self_obs]))
self_attn_masks.append(self._get_masks_from_nans([self_obs] + obs))
encoding, memories = self.forward(
f_inp,
g_inp,

正在加载...
取消
保存