浏览代码

Fix typos

/MLA-1734-demo-provider
Arthur Juliani 4 年前
当前提交
0a876b9c
共有 7 个文件被更改,包括 8 次插入8 次删除
  1. 2
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  2. 2
      ml-agents/mlagents/trainers/policy/torch_policy.py
  3. 2
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  4. 2
      ml-agents/mlagents/trainers/tests/mock_brain.py
  5. 2
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  6. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  7. 4
      ml-agents/mlagents/trainers/torch/model_serialization.py

2
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
n_obs = len(self.policy.behavior_spec.observation_spec)
n_obs = len(self.policy.behavior_spec.observation_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors

2
ml-agents/mlagents/trainers/policy/torch_policy.py


else:
ac_class = SharedActorCritic
self.actor_critic = ac_class(
observation_spec=self.behavior_spec.observation_spec,
observation_spec=self.behavior_spec.observation_specs,
network_settings=trainer_settings.network_settings,
action_spec=behavior_spec.action_spec,
stream_names=reward_signal_names,

2
ml-agents/mlagents/trainers/sac/optimizer_torch.py


for name in self.reward_signals:
rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
n_obs = len(self.policy.behavior_spec.observation_spec)
n_obs = len(self.policy.behavior_spec.observation_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)
# Convert to tensors
current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]

2
ml-agents/mlagents/trainers/tests/mock_brain.py


) -> AgentBuffer:
trajectory = make_fake_trajectory(
length,
behavior_spec.observation_spec,
behavior_spec.observation_specs,
action_spec=behavior_spec.action_spec,
memory_size=memory_size,
)

2
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


time_horizon = 15
trajectory = make_fake_trajectory(
length=time_horizon,
observation_spec=optimizer.policy.behavior_spec.observation_spec,
observation_spec=optimizer.policy.behavior_spec.observation_specs,
action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
max_step_complete=True,
)

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


Helper function for update_batch.
"""
np_obs = ObsUtil.from_buffer(
mini_batch_demo, len(self.policy.behavior_spec.observation_spec)
mini_batch_demo, len(self.policy.behavior_spec.observation_specs)
)
# Convert to tensors
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

4
ml-agents/mlagents/trainers/torch/model_serialization.py


)
dummy_vec_obs = [torch.zeros(batch_dim + [vec_obs_size])]
# create input shape of NCHW
# (It's NHWC in self.policy.behavior_spec.observation_spec.shape)
# (It's NHWC in self.policy.behavior_spec.observation_specs.shape)
for obs_spec in self.policy.behavior_spec.observation_spec
for obs_spec in self.policy.behavior_spec.observation_specs
if len(obs_spec.shape) == 3
]
dummy_masks = torch.ones(

正在加载...
取消
保存