浏览代码

fix torch test ppo

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
22f42f5b
共有 2 个文件被更改,包括 8 次插入21 次删除
  1. 18
      ml-agents/mlagents/trainers/tests/mock_brain.py
  2. 11
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py

18
ml-agents/mlagents/trainers/tests/mock_brain.py


steps_list = []
action_size = action_spec.discrete_size + action_spec.continuous_size
prob_ones = np.ones(
int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size),
dtype=np.float32,
)
for _i in range(length - 1):
obs = []
for _shape in observation_shapes:

if action_spec.is_continuous():
action = ActionTuple(continuous=np.zeros(action_size, dtype=np.float32))
action_probs = LogProbsTuple(continuous=prob_ones)
else:
action = ActionTuple(discrete=np.zeros(action_size, dtype=np.float32))
action_probs = LogProbsTuple(discrete=prob_ones)
action = ActionTuple(
continuous=np.zeros(action_spec.continuous_size, dtype=np.float32),
discrete=np.zeros(action_spec.discrete_size, dtype=np.int32),
)
action_probs = LogProbsTuple(
continuous=np.ones(action_spec.continuous_size, dtype=np.float32),
discrete=np.ones(action_spec.discrete_size, dtype=np.float32),
)
action_pre = np.zeros(action_size, dtype=np.float32)
action_mask = (
[

11
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


update_buffer["extrinsic_returns"] = update_buffer["environment_rewards"]
update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
# NOTE: In TensorFlow, the log_probs are saved as one for every discrete action, whereas
# in PyTorch it is saved as the total probability per branch. So we need to modify the
# log prob in the fake buffer here.
if discrete:
update_buffer["discrete_log_probs"] = np.ones_like(
update_buffer["discrete_action"]
)
else:
update_buffer["continuous_log_probs"] = np.ones_like(
update_buffer["continuous_action"]
)
return_stats = optimizer.update(
update_buffer,
num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,

正在加载...
取消
保存