浏览代码

Use LSTM and fix a few merge errors

/develop/add-fire
Arthur Juliani 5 年前
当前提交
be7e55e1
共有 4 个文件被更改,包括 20 次插入18 次删除
  1. 26
      ml-agents/mlagents/trainers/models_torch.py
  2. 7
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  3. 2
      ml-agents/mlagents/trainers/policy/policy.py
  4. 3
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py

26
ml-agents/mlagents/trainers/models_torch.py


self.vector_encoders = nn.ModuleList(self.vector_encoders)
self.visual_encoders = nn.ModuleList(self.visual_encoders)
if use_lstm:
self.lstm = nn.GRU(h_size, h_size, 1)
def clear_memory(self, batch_size):
self.memory = (
torch.zeros(1, batch_size, self.m_size),
torch.zeros(1, batch_size, self.m_size),
)
self.lstm = nn.LSTM(h_size, m_size // 2, 1)
def update_normalization(self, vec_inputs):
if self.normalize:

if self.use_lstm:
embedding = embedding.reshape([sequence_length, -1, self.h_size])
memories = torch.split(memories, self.m_size // 2, dim=-1)
embedding = embedding.reshape([-1, self.h_size])
embedding = embedding.reshape([-1, self.m_size // 2])
memories = torch.cat(memories, dim=-1)
return embedding, memories

vis_encode_type,
use_lstm,
)
if use_lstm:
embedding_size = m_size // 2
else:
embedding_size = h_size
self.distribution = GaussianDistribution(h_size, act_size[0])
self.distribution = GaussianDistribution(embedding_size, act_size[0])
self.distribution = MultiCategoricalDistribution(h_size, act_size)
self.distribution = MultiCategoricalDistribution(embedding_size, act_size)
if separate_critic:
self.critic = Critic(
stream_names,

)
else:
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, h_size)
self.value_heads = ValueHeads(stream_names, embedding_size)
def update_normalization(self, vector_obs):
self.network_body.update_normalization(vector_obs)

def critic_pass(self, vec_inputs, vis_inputs):
def critic_pass(self, vec_inputs, vis_inputs, memories=None):
embedding, _ = self.network_body(vec_inputs, vis_inputs)
embedding, _ = self.network_body(vec_inputs, vis_inputs, memories=memories)
return self.value_heads(embedding)
def sample_action(self, dists):

7
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


else:
visual_obs = []
memory = torch.zeros([1, len(vector_obs[0]), self.policy.m_size])
next_memory = torch.zeros([1, 1, self.policy.m_size])
vector_obs, visual_obs
vector_obs, visual_obs, memory
next_obs, next_obs
next_obs, next_obs, next_memory
)
for name, estimate in value_estimates.items():

2
ml-agents/mlagents/trainers/policy/policy.py


self.memory_dict: Dict[str, np.ndarray] = {}
self.normalize = trainer_params["normalize"]
self.use_recurrent = trainer_params["use_recurrent"]
self.model_path = trainer_params["model_path"]
self.model_path = trainer_params["output_path"]
if self.use_recurrent:
self.m_size = trainer_params["memory_size"]

3
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


old_values[name] = np.array(batch["{}_value_estimates".format(name)])
returns[name] = np.array(batch["{}_returns".format(name)])
vec_obs = np.array(batch["vector_obs"])
vec_obs = [torch.Tensor(vec_obs)]
vec_obs = [torch.Tensor(np.array(batch["vector_obs"]))]
act_masks = torch.Tensor(np.array(batch["action_mask"]))
if self.policy.use_continuous_act:
actions = torch.Tensor(np.array(batch["actions"])).unsqueeze(-1)

正在加载...
取消
保存