浏览代码

Resolving a few bugs

/develop/add-fire
Arthur Juliani 4 年前
当前提交
2e51260a
共有 4 个文件被更改,包括 30 次插入12 次删除
  1. 26
      ml-agents/mlagents/trainers/models_torch.py
  2. 1
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  3. 8
      ml-agents/mlagents/trainers/policy/torch_policy.py
  4. 7
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py

26
ml-agents/mlagents/trainers/models_torch.py


torch.zeros(1, batch_size, self.m_size),
)
def update_normalization(self, inputs):
def update_normalization(self, vec_inputs):
self.normalizer.update(inputs)
for idx, vec_input in enumerate(vec_inputs):
self.vector_normalizers[idx].update(vec_input)
def forward(self, vec_inputs, vis_inputs):
vec_embeds = []

vec_input = self.normalizers[idx](vec_inputs[idx])
vec_input = self.vector_normalizers[idx](vec_input)
hidden = encoder(vec_input)
vec_embeds.append(hidden)

vis_embeds.append(hidden)
vec_embeds = torch.cat(vec_embeds)
vis_embeds = torch.cat(vis_embeds)
embedding = torch.cat([vec_embeds, vis_embeds])
if len(vec_embeds) > 0:
vec_embeds = torch.cat(vec_embeds)
if len(vis_embeds) > 0:
vis_embeds = torch.cat(vis_embeds)
if len(vec_embeds) > 0 and len(vis_embeds) > 0:
embedding = torch.cat([vec_embeds, vis_embeds])
elif len(vec_embeds) > 0:
embedding = vec_embeds
else:
embedding = vis_embeds
if self.use_lstm:
embedding, self.memory = self.lstm(embedding, self.memory)
return embedding

value_outputs = {}
for stream_name, _ in self.value_heads.items():
value_outputs[stream_name] = self.value_heads[stream_name](hidden)
return value_outputs, torch.mean(torch.stack(list(value_outputs)), dim=0)
return (
value_outputs,
torch.mean(torch.stack(list(value_outputs.values())), dim=0),
)
class VectorEncoder(nn.Module):

1
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


self.m_size: int = 0
self.global_step = torch.tensor(0)
self.bc_module: Optional[BCModule] = None
self.create_reward_signals(trainer_params["reward_signals"])
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
pass

8
ml-agents/mlagents/trainers/policy/torch_policy.py


self.normalize = trainer_params["normalize"]
self.seed = seed
self.brain = brain
self.global_step = 0
self.act_size = brain.vector_action_space_size
self.sequence_length = 1

log_probs = torch.stack(log_probs)
entropies = torch.stack(entropies)
value_heads = self.critic(vec_obs, vis_obs)
value_heads, mean_value = self.critic(vec_obs, vis_obs)
return actions, log_probs, entropies, value_heads
@timed

:return: Outputs from network as defined by self.inference_dict.
"""
vec_obs, vis_obs, masks = self.split_decision_step(decision_requests)
vec_obs = [vec_obs] # For consistency with visual observations
run_out = {}
action, log_probs, entropy, value_heads = self.execute_model(
vec_obs, vis_obs, masks

Gets current model step.
:return: current model step.
"""
step = self.global_step.detach().numpy()
step = self.global_step
return step
def increment_step(self, n_steps):

self.global_step = self.global_step + n_steps
self.global_step += n_steps
return self.get_current_step()
def save_model(self, step):

7
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


# Create the graph here to give more granular control of the TF graph to the Optimizer.
super(PPOOptimizer, self).__init__(policy, trainer_params)
params = list(self.policy.actor.parameters()) + list(
self.policy.critic.parameters()
)
self.policy.actor.parameters() + self.policy.critic.parameters(),
lr=self.trainer_params["learning_rate"],
params, lr=self.trainer_params["learning_rate"]
)
reward_signal_configs = trainer_params["reward_signals"]
self.stats_name_to_update_name = {

正在加载...
取消
保存