浏览代码

sac

/develop/jit
Ruo-Ping Dong 4 年前
当前提交
ef3be79e
共有 4 个文件被更改,包括 21 次插入15 次删除
  1. 4
      ml-agents/mlagents/trainers/sac/trainer.py
  2. 3
      ml-agents/mlagents/trainers/torch/distributions.py
  3. 6
      ml-agents/mlagents/trainers/torch/networks.py
  4. 23
      ml-agents/mlagents/trainers/torch/utils.py

4
ml-agents/mlagents/trainers/sac/trainer.py


agent_buffer_trajectory = trajectory.to_agentbuffer()
# Update the normalization
if self.is_training:
self.policy.update_normalization(agent_buffer_trajectory["vector_obs"])
# if self.is_training:
# self.policy.update_normalization(agent_buffer_trajectory["vector_obs"])
# Evaluate all reward functions for reporting purposes
self.collected_rewards["environment"][agent_id] += np.sum(

3
ml-agents/mlagents/trainers/torch/distributions.py


# if self.conditional_sigma:
# log_sigma = torch.clamp(self.log_sigma(inputs), min=-20, max=2)
# else:
log_sigma = self.log_sigma
# log_sigma = self.log_sigma
log_sigma = torch.clamp(self.log_sigma(inputs), min=-20, max=2)
# if self.tanh_squash:
# return [TanhGaussianDistInstance(mu, torch.exp(log_sigma))]
# else:

6
ml-agents/mlagents/trainers/torch/networks.py


self.memory_size = 0
def update_normalization(self, vec_inputs: List[torch.Tensor]) -> None:
for vec_input, vec_enc in zip(vec_inputs, self.vector_encoders):
vec_enc.update_normalization(vec_input)
# for vec_input, vec_enc in zip(vec_inputs, self.vector_encoders):
# vec_enc.update_normalization(vec_input)
self.vector_encoders[0].update_normalization(vec_inputs[0])
def copy_normalization(self, other_network: "NetworkBody") -> None:
if self.normalize:

# def memory_size(self) -> int:
# return self.network_body.memory_size
# @torch.jit.export
def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
self.network_body.update_normalization(vector_obs)

23
ml-agents/mlagents/trainers/torch/utils.py


f"Unsupported shape of {dimension} for observation {i}"
)
if vector_size + unnormalized_inputs > 0:
if unnormalized_inputs > 0:
vector_encoders.append(
VectorAndUnnormalizedInputEncoder(
vector_size, h_size, unnormalized_inputs, num_layers, normalize
)
)
else:
vector_encoders.append(
VectorEncoder(vector_size, h_size, num_layers, normalize)
)
# if unnormalized_inputs > 0:
# vector_encoders.append(
# VectorAndUnnormalizedInputEncoder(
# vector_size, h_size, unnormalized_inputs, num_layers, normalize
# )
# )
# else:
# vector_encoders.append(
# VectorEncoder(vector_size, h_size, num_layers, normalize)
# )
vector_encoders.append(
VectorEncoder(vector_size, h_size, num_layers, normalize)
)
return nn.ModuleList(visual_encoders), nn.ModuleList(vector_encoders)
@staticmethod

正在加载...
取消
保存