浏览代码

Training runs, but doesn’t actually work

/develop/add-fire
Arthur Juliani 5 年前
当前提交
3c82bf59
共有 11 个文件被更改,包括 37 次插入16 次删除
  1. 5
      Project/ProjectSettings/EditorBuildSettings.asset
  2. 2
      Project/ProjectSettings/ProjectVersion.txt
  3. 2
      Project/ProjectSettings/UnityConnectSettings.asset
  4. 6
      ml-agents/mlagents/trainers/models_torch.py
  5. 17
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  6. 1
      ml-agents/mlagents/trainers/policy/torch_policy.py
  7. 15
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  8. 1
      ml-agents/mlagents/trainers/ppo/trainer.py
  9. 2
      ml-agents/mlagents/trainers/tests/test_ppo.py
  10. 2
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  11. 0
      /ml-agents/mlagents/trainers/ppo/optimizer_tf.py

5
Project/ProjectSettings/EditorBuildSettings.asset


EditorBuildSettings:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Scenes: []
m_Scenes:
- enabled: 1
path: Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
guid: b9ac0cbf961bf4dacbfa0aa9c0d60aaa
m_configObjects: {}

2
Project/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2018.4.17f1
m_EditorVersion: 2018.4.20f1

2
Project/ProjectSettings/UnityConnectSettings.asset


UnityConnectSettings:
m_ObjectHideFlags: 0
serializedVersion: 1
m_Enabled: 1
m_Enabled: 0
m_TestMode: 0
m_EventOldUrl: https://api.uca.cloud.unity3d.com/v1/events
m_EventUrl: https://cdp.cloud.unity3d.com/v1/events

6
ml-agents/mlagents/trainers/models_torch.py


for visual_size in visual_sizes:
self.visual_encoders.append(visual_encoder(visual_size))
self.vector_encoders = nn.ModuleList(self.vector_encoders)
self.visual_encoders = nn.ModuleList(self.visual_encoders)
if use_lstm:
self.lstm = nn.LSTM(h_size, h_size, 1)

use_lstm,
):
super(Critic, self).__init__()
self.stream_names = stream_names
self.network_body = NetworkBody(
vector_sizes,
visual_sizes,

vis_encode_type,
use_lstm,
)
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, h_size)
def forward(self, vec_inputs, vis_inputs):

for name in stream_names:
value = nn.Linear(input_size, 1)
self.value_heads[name] = value
self.value_heads = nn.ModuleDict(self.value_heads)
def forward(self, hidden):
value_outputs = {}

for _ in range(num_layers - 1):
self.layers.append(nn.Linear(hidden_size, hidden_size))
self.layers.append(nn.ReLU())
self.layers = nn.ModuleList(self.layers)
def forward(self, inputs):
x = inputs

17
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
vector_obs = torch.Tensor(np.array(batch["vector_obs"]))
visual_obs = batch["visual_obs"]
vector_obs = [torch.Tensor(np.array(batch["vector_obs"]))]
if self.policy.use_vis_obs:
visual_obs = batch["visual_obs"]
else:
visual_obs = []
next_obs = [torch.Tensor(next_obs[0])]
next_value_estimate, next_value = self.policy.critic(next_obs, next_obs)
value_estimates[name] = estimate.detach()
value_estimates[name] = estimate.squeeze(-1).detach().numpy()
next_value_estimate[name] = (
next_value_estimate[name].squeeze(-1).detach().numpy()
)
return value_estimates, value_estimates
return value_estimates, next_value_estimate

1
ml-agents/mlagents/trainers/policy/torch_policy.py


:param vector_obs: The vector observations to add to the running estimate of the distribution.
"""
vector_obs = np.array(vector_obs)
print(vector_obs.shape)
vector_obs = [vector_obs]
if self.use_vec_obs and self.normalize:
self.critic.network_body.update_normalization(vector_obs)

15
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


value_losses = []
for name, head in values.items():
old_val_tensor = torch.DoubleTensor(old_values[name])
returns_tensor = torch.DoubleTensor(returns[name])
v_opt_a = (torch.DoubleTensor(returns[name]) - torch.sum(head, dim=1)) ** 2
v_opt_b = (torch.DoubleTensor(returns[name]) - clipped_value_estimate) ** 2
v_opt_a = (returns_tensor - torch.sum(head, dim=1)) ** 2
v_opt_b = (returns_tensor - clipped_value_estimate) ** 2
value_loss = torch.mean(torch.max(v_opt_a, v_opt_b))
value_losses.append(value_loss)
value_loss = torch.mean(torch.stack(value_losses))

returns = {}
old_values = {}
for name in self.reward_signals:
returns[name] = batch["{}_returns".format(name)]
old_values[name] = batch["{}_value_estimates".format(name)]
old_values[name] = np.array(batch["{}_value_estimates".format(name)])
returns[name] = np.array(batch["{}_returns".format(name)])
vis_obs = np.array(batch["visual_obs"])
vec_obs = [torch.Tensor(vec_obs)]
if self.policy.use_vis_obs:
vis_obs = np.array(batch["visual_obs"])
else:
vis_obs = []
actions, log_probs, entropy, values = self.policy.execute_model(
vec_obs, vis_obs
)

1
ml-agents/mlagents/trainers/ppo/trainer.py


trajectory.next_obs,
trajectory.done_reached and not trajectory.max_step_reached,
)
for name, v in value_estimates.items():
agent_buffer_trajectory["{}_value_estimates".format(name)].extend(v)
self._stats_reporter.add_stat(

2
ml-agents/mlagents/trainers/tests/test_ppo.py


import yaml
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.policy.nn_policy import NNPolicy
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.agent_processor import AgentManagerQueue

2
ml-agents/mlagents/trainers/tests/test_reward_signals.py


import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.policy.nn_policy import NNPolicy
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
def ppo_dummy_config():

/ml-agents/mlagents/trainers/ppo/optimizer.py → /ml-agents/mlagents/trainers/ppo/optimizer_tf.py

正在加载...
取消
保存