浏览代码

Pretty broken

/develop/unified-obs
Ervin Teng 4 年前
当前提交
7a0ebfbd
共有 8 个文件被更改,包括 89 次插入111 次删除
  1. 34
      ml-agents/mlagents/trainers/policy/torch_policy.py
  2. 14
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  3. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  4. 18
      ml-agents/mlagents/trainers/torch/encoders.py
  5. 63
      ml-agents/mlagents/trainers/torch/networks.py
  6. 33
      ml-agents/mlagents/trainers/torch/utils.py
  7. 28
      ml-agents/mlagents/trainers/trajectory.py
  8. 8
      Project/Assets/ML-Agents/Examples/CubeWars.meta

34
ml-agents/mlagents/trainers/policy/torch_policy.py


def _split_decision_step(
self, decision_requests: DecisionSteps
) -> Tuple[SplitObservations, np.ndarray]:
vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
obs = ModelUtils.list_to_tensor_list(decision_requests.obs, )
mask = None
if not self.use_continuous_act:
mask = torch.ones([len(decision_requests), np.sum(self.act_size)])

)
return vec_vis_obs, mask
return obs, mask
def update_normalization(self, vector_obs: np.ndarray) -> None:
def update_normalization(self, obs: List[np.ndarray]) -> None:
vector_obs = [torch.as_tensor(vector_obs)]
all_obs = tuple(ModelUtils.list_to_tensor(_obs) for _obs in obs])]
print(all_obs)
self.actor_critic.update_normalization(vector_obs)
self.actor_critic.update_normalization(all_obs)
vec_obs: List[torch.Tensor],
vis_obs: List[torch.Tensor],
obs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,

entropies, and output memories, all as Torch Tensors.
"""
if memories is None:
dists, memories = self.actor_critic.get_dists(
vec_obs, vis_obs, masks, memories, seq_len
)
dists, memories = self.actor_critic.get_dists(obs, masks, memories, seq_len)
vec_obs, vis_obs, masks, memories, seq_len
obs, masks, memories, seq_len
)
action_list = self.actor_critic.sample_action(dists)
log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(

def evaluate_actions(
self,
vec_obs: torch.Tensor,
vis_obs: torch.Tensor,
obs: List[torch.Tensor],
actions: torch.Tensor,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,

vec_obs, vis_obs, masks, memories, seq_len
obs, masks, memories, seq_len
)
action_list = [actions[..., i] for i in range(actions.shape[-1])]
log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_list, dists)

:param decision_requests: DecisionStep object containing inputs.
:return: Outputs from network as defined by self.inference_dict.
"""
vec_vis_obs, masks = self._split_decision_step(decision_requests)
vec_obs = [torch.as_tensor(vec_vis_obs.vector_observations)]
vis_obs = [
torch.as_tensor(vis_ob) for vis_ob in vec_vis_obs.visual_observations
]
obs, masks = self._split_decision_step(decision_requests)
vec_obs, vis_obs, masks=masks, memories=memories
obs, masks=masks, memories=memories
)
run_out["pre_action"] = ModelUtils.to_numpy(action)

14
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


)
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
obs = ModelUtils.list_to_tensor_list(batch["obs"])
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
if self.policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(batch["actions_pre"]).unsqueeze(-1)

if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_processors
):
vis_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
vis_obs.append(vis_ob)
else:
vis_obs = []
vec_obs,
vis_obs,
obs,
masks=act_masks,
actions=actions,
memories=memories,

2
ml-agents/mlagents/trainers/ppo/trainer.py


agent_buffer_trajectory = trajectory.to_agentbuffer()
# Update the normalization
if self.is_training:
self.policy.update_normalization(agent_buffer_trajectory["vector_obs"])
self.policy.update_normalization(agent_buffer_trajectory["obs"])
# Get all value estimates
value_estimates, value_next = self.optimizer.get_trajectory_value_estimates(

18
ml-agents/mlagents/trainers/torch/encoders.py


return height, width
class VectorInput(nn.Module):
class InputProcessor:
def copy_normalization(self, other_input: "InputProcessor") -> None:
pass
def update_normalization(self, inputs: torch.Tensor) -> None:
pass
class VectorInput(nn.Module, InputProcessor):
def __init__(self, input_size: int, normalize: bool = False):
super().__init__()
self.normalizer: Optional[Normalizer] = None

self.normalizer.update(inputs)
class SmallVisualEncoder(nn.Module):
class SmallVisualEncoder(nn.Module, InputProcessor):
"""
CNN architecture used by King in their Candy Crush predictor
https://www.researchgate.net/publication/328307928_Human-Like_Playtesting_with_Deep_Learning

return self.dense(hidden)
class SimpleVisualEncoder(nn.Module):
class SimpleVisualEncoder(nn.Module, InputProcessor):
def __init__(
self, height: int, width: int, initial_channels: int, output_size: int
):

return self.dense(hidden)
class NatureVisualEncoder(nn.Module):
class NatureVisualEncoder(nn.Module, InputProcessor):
def __init__(
self, height: int, width: int, initial_channels: int, output_size: int
):

return input_tensor + self.layers(input_tensor)
class ResNetVisualEncoder(nn.Module):
class ResNetVisualEncoder(nn.Module, InputProcessor):
def __init__(
self, height: int, width: int, initial_channels: int, output_size: int
):

63
ml-agents/mlagents/trainers/torch/networks.py


else 0
)
self.visual_processors, self.vector_processors, encoder_input_size = ModelUtils.create_input_processors(
self.processors, encoder_input_size = ModelUtils.create_input_processors(
observation_shapes,
self.h_size,
network_settings.vis_encode_type,

else:
self.lstm = None # type: ignore
def update_normalization(self, vec_inputs: List[torch.Tensor]) -> None:
for vec_input, vec_enc in zip(vec_inputs, self.vector_processors):
vec_enc.update_normalization(vec_input)
def update_normalization(self, net_inputs: List[torch.Tensor]) -> None:
for _in, enc in zip(net_inputs, self.processors):
enc.update_normalization(_in)
def copy_normalization(self, other_network: "NetworkBody") -> None:
if self.normalize:

def forward(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
inputs: List[torch.Tensor],
for idx, processor in enumerate(self.vector_processors):
vec_input = vec_inputs[idx]
processed_vec = processor(vec_input)
for idx, processor in enumerate(self.processors):
net_input = inputs[idx]
if not exporting_to_onnx.is_exporting() and len(net_input.shape) > 3:
net_input = net_input.permute([0, 3, 1, 2])
processed_vec = processor(net_input)
for idx, processor in enumerate(self.visual_processors):
vis_input = vis_inputs[idx]
if not exporting_to_onnx.is_exporting():
vis_input = vis_input.permute([0, 3, 1, 2])
processed_vis = processor(vis_input)
encodes.append(processed_vis)
if len(encodes) == 0:
raise Exception("No valid inputs to network.")

def forward(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
net_inputs: List[torch.Tensor],
vec_inputs, vis_inputs, actions, memories, sequence_length
net_inputs, actions, memories, sequence_length
)
output = self.value_heads(encoding)
return output, memories

def memory_size(self) -> int:
return self.network_body.memory_size
def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
self.network_body.update_normalization(vector_obs)
def update_normalization(self, obs: List[torch.Tensor]) -> None:
self.network_body.update_normalization(obs)
def sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
actions = []

def get_dists(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
net_inputs: List[torch.Tensor],
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
net_inputs, memories=memories, sequence_length=sequence_length
)
if self.action_spec.is_continuous():
dists = self.distribution(encoding)

def forward(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
net_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, int, int, int, int]:

dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
dists, _ = self.get_dists(net_inputs, masks, memories, 1)
if self.action_spec.is_continuous():
action_list = self.sample_action(dists)
action_out = torch.stack(action_list, dim=-1)

def critic_pass(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
net_inputs: List[torch.Tensor],
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
net_inputs, memories=memories, sequence_length=sequence_length
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
net_inputs: List[torch.Tensor],
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
net_inputs, memories=memories, sequence_length=sequence_length
)
if self.action_spec.is_continuous():
dists = self.distribution(encoding)

def get_dist_and_value(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
net_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,

critic_mem = None
actor_mem = None
dists, actor_mem_outs = self.get_dists(
vec_inputs,
vis_inputs,
net_inputs,
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
net_inputs, memories=critic_mem, sequence_length=sequence_length
)
if self.use_lstm:
mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=-1)

33
ml-agents/mlagents/trainers/torch/utils.py


:param normalize: Normalize all vector inputs.
:return: Tuple of visual encoders and vector encoders each as a list.
"""
visual_encoders: List[nn.Module] = []
vector_encoders: List[nn.Module] = []
encoders: List[nn.Module] = []
vector_size = 0
visual_output_size = 0
total_encoded_size = 0
visual_encoders.append(
encoders.append(
visual_output_size += h_size
total_encoded_size += h_size
vector_size += dimension[0]
vector_size = dimension[0]
encoders.append(VectorInput(vector_size, normalize))
total_encoded_size += vector_size
if vector_size > 0:
vector_encoders.append(VectorInput(vector_size, normalize))
total_processed_size = vector_size + visual_output_size
nn.ModuleList(visual_encoders),
nn.ModuleList(vector_encoders),
total_processed_size,
nn.ModuleList(encoders),
total_encoded_size,
)
@staticmethod

calling as_tensor on the list directly.
"""
return torch.as_tensor(np.asanyarray(ndarray_list), dtype=dtype)
@staticmethod
def list_to_tensor_list(
ndarray_list: List[np.ndarray], dtype: Optional[torch.dtype] = torch.float32
) -> torch.Tensor:
"""
Converts a list of numpy arrays into a list of tensors. MUCH faster than
calling as_tensor on the list directly.
"""
return [torch.as_tensor(np.asanyarray(_arr), dtype=dtype) for _arr in ndarray_list]
@staticmethod
def to_numpy(tensor: torch.Tensor) -> np.ndarray:

28
ml-agents/mlagents/trainers/trajectory.py


step of the trajectory.
"""
agent_buffer_trajectory = AgentBuffer()
vec_vis_obs = SplitObservations.from_observations(self.steps[0].obs)
curr_obs = self.steps[0].obs
next_vec_vis_obs = SplitObservations.from_observations(
self.steps[step + 1].obs
)
next_obs = self.steps[step + 1].obs
next_vec_vis_obs = SplitObservations.from_observations(self.next_obs)
for i, _ in enumerate(vec_vis_obs.visual_observations):
agent_buffer_trajectory["visual_obs%d" % i].append(
vec_vis_obs.visual_observations[i]
)
agent_buffer_trajectory["next_visual_obs%d" % i].append(
next_vec_vis_obs.visual_observations[i]
)
agent_buffer_trajectory["vector_obs"].append(
vec_vis_obs.vector_observations
)
agent_buffer_trajectory["next_vector_in"].append(
next_vec_vis_obs.vector_observations
)
next_obs = self.next_obs
agent_buffer_trajectory["obs"].append(curr_obs)
agent_buffer_trajectory["next_obs"].append(next_obs)
if exp.memory is not None:
agent_buffer_trajectory["memory"].append(exp.memory)

agent_buffer_trajectory["prev_action"].append(exp.prev_action)
agent_buffer_trajectory["environment_rewards"].append(exp.reward)
# Store the next visual obs as the current
vec_vis_obs = next_vec_vis_obs
# Store the next obs as the current
curr_obs = next_obs
return agent_buffer_trajectory
@property

8
Project/Assets/ML-Agents/Examples/CubeWars.meta


fileFormatVersion: 2
guid: 624ff327b14154c41869c6ecdcdcc167
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存