浏览代码

Multi Directional Walker and Initial Hypernetwork (#4740)

/goal-conditioning
GitHub 4 年前
当前提交
cc6b4564
共有 17 个文件被更改,包括 3209 次插入37 次删除
  1. 11
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  2. 13
      ml-agents/mlagents/trainers/policy/torch_policy.py
  3. 5
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  4. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  5. 34
      ml-agents/mlagents/trainers/torch/action_model.py
  6. 89
      ml-agents/mlagents/trainers/torch/decoders.py
  7. 119
      ml-agents/mlagents/trainers/torch/distributions.py
  8. 56
      ml-agents/mlagents/trainers/torch/networks.py
  9. 2
      ml-agents/mlagents/trainers/trajectory.py
  10. 507
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab
  11. 7
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab.meta
  12. 1001
      Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity
  13. 9
      Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity.meta
  14. 353
      Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs
  15. 11
      Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs.meta
  16. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/MultiDirWalker-5997779.onnx
  17. 26
      config/ppo/MultiDirWalker.yaml

11
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


def get_trajectory_value_estimates(
self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
vector_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
vector_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])[:, 1:]]
goals = [ModelUtils.list_to_tensor(batch["vector_obs"])[:, :1]]
if self.policy.use_vis_obs:
visual_obs = []
for idx, _ in enumerate(

vec_vis_obs = SplitObservations.from_observations(next_obs)
next_vec_obs = [
ModelUtils.list_to_tensor(vec_vis_obs.vector_observations).unsqueeze(0)
ModelUtils.list_to_tensor(vec_vis_obs.vector_observations[1:]).unsqueeze(0)
]
next_vis_obs = [
ModelUtils.list_to_tensor(_vis_ob).unsqueeze(0)

# goals dont change but otherwise broken
next_goals = [torch.as_tensor(vec_vis_obs.vector_observations[:1])]
vector_obs, visual_obs, memory, sequence_length=batch.num_experiences
vector_obs, visual_obs, goals, memory, sequence_length=batch.num_experiences
next_vec_obs, next_vis_obs, next_memory, sequence_length=1
next_vec_obs, next_vis_obs, next_goals, next_memory, sequence_length=1
)
for name, estimate in value_estimates.items():

13
ml-agents/mlagents/trainers/policy/torch_policy.py


If this policy normalizes vector observations, this will update the norm values in the graph.
:param vector_obs: The vector observations to add to the running estimate of the distribution.
"""
vector_obs = [torch.as_tensor(vector_obs)]
vector_obs = [torch.as_tensor(vector_obs)[:, 1:]]
if self.use_vec_obs and self.normalize:
self.actor_critic.update_normalization(vector_obs)

vec_obs: List[torch.Tensor],
vis_obs: List[torch.Tensor],
goals: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,

:return: Tuple of AgentAction, ActionLogProbs, entropies, and output memories.
"""
actions, log_probs, entropies, _, memories = self.actor_critic.get_action_stats_and_value(
vec_obs, vis_obs, masks, memories, seq_len
vec_obs, vis_obs, goals, masks, memories, seq_len
)
return (actions, log_probs, entropies, memories)

vis_obs: torch.Tensor,
goals: torch.Tensor,
actions: AgentAction,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,

vec_obs, vis_obs, actions, masks, memories, seq_len
vec_obs, vis_obs, goals, actions, masks, memories, seq_len
)
return log_probs, entropies, value_heads

:return: Outputs from network as defined by self.inference_dict.
"""
vec_vis_obs, masks = self._split_decision_step(decision_requests)
vec_obs = [torch.as_tensor(vec_vis_obs.vector_observations)]
vec_obs = [torch.as_tensor(vec_vis_obs.vector_observations[:, 1:])]
goals = [torch.as_tensor(vec_vis_obs.vector_observations[:, :1])]
memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
0
)

action, log_probs, entropy, memories = self.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
vec_obs, vis_obs, goals, masks=masks, memories=memories
)
action_tuple = action.to_action_tuple()
run_out["action"] = action_tuple

5
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


)
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])[:, 1:]]
goals = [ModelUtils.list_to_tensor(batch["vector_obs"])[:, :1]]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
actions = AgentAction.from_dict(batch)

log_probs, entropy, values = self.policy.evaluate_actions(
vec_obs,
vis_obs,
goals,
masks=act_masks,
actions=actions,
memories=memories,

+ 0.5 * value_loss
- decay_bet * ModelUtils.masked_mean(entropy, loss_masks)
)
# Set optimizer learning rate
ModelUtils.update_learning_rate(self.optimizer, decay_lr)
self.optimizer.zero_grad()

2
ml-agents/mlagents/trainers/ppo/trainer.py


self.seed,
behavior_spec,
self.trainer_settings,
condition_sigma_on_obs=False, # Faster training for PPO
condition_sigma_on_obs=True, # Faster training for PPO
separate_critic=True, # Match network architecture with TF
)
return policy

34
ml-agents/mlagents/trainers/torch/action_model.py


DistInstance,
DiscreteDistInstance,
GaussianDistribution,
GaussianHyperNetwork,
MultiCategoricalDistribution,
)
from mlagents.trainers.torch.agent_action import AgentAction

self._discrete_distribution = None
if self.action_spec.continuous_size > 0:
self._continuous_distribution = GaussianDistribution(
self.encoding_size,
self.action_spec.continuous_size,
self._continuous_distribution = GaussianHyperNetwork(
num_layers=1,
layer_size=256,
hidden_size=self.encoding_size,
num_outputs=self.action_spec.continuous_size,
num_goals=2,
)
if self.action_spec.discrete_size > 0:

discrete_action.append(discrete_dist.sample())
return AgentAction(continuous_action, discrete_action)
def _get_dists(self, inputs: torch.Tensor, masks: torch.Tensor) -> DistInstances:
def _get_dists(
self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
) -> DistInstances:
"""
Creates a DistInstances tuple using the continuous and discrete distributions
:params inputs: The encoding from the network body

discrete_dist: Optional[List[DiscreteDistInstance]] = None
# This checks None because mypy complains otherwise
if self._continuous_distribution is not None:
continuous_dist = self._continuous_distribution(inputs)
continuous_dist = self._continuous_distribution(inputs, goal)
if self._discrete_distribution is not None:
discrete_dist = self._discrete_distribution(inputs, masks)
return DistInstances(continuous_dist, discrete_dist)

return action_log_probs, entropies
def evaluate(
self, inputs: torch.Tensor, masks: torch.Tensor, actions: AgentAction
self,
inputs: torch.Tensor,
masks: torch.Tensor,
actions: AgentAction,
goal: torch.Tensor,
) -> Tuple[ActionLogProbs, torch.Tensor]:
"""
Given actions and encoding from the network body, gets the distributions and

:params actions: The AgentAction
:return: An ActionLogProbs tuple and a torch tensor of the distribution entropies.
"""
dists = self._get_dists(inputs, masks)
dists = self._get_dists(inputs, masks, goal)
def get_action_out(self, inputs: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
def get_action_out(
self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
) -> torch.Tensor:
"""
Gets the tensors corresponding to the output of the policy network to be used for
inference. Called by the Actor's forward call.

"""
dists = self._get_dists(inputs, masks)
dists = self._get_dists(inputs, masks, goal)
continuous_out, discrete_out, action_out_deprecated = None, None, None
if self.action_spec.continuous_size > 0 and dists.continuous is not None:
continuous_out = dists.continuous.exported_model_output()

return continuous_out, discrete_out, action_out_deprecated
def forward(
self, inputs: torch.Tensor, masks: torch.Tensor
self, inputs: torch.Tensor, masks: torch.Tensor, goal: torch.Tensor
) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor]:
"""
The forward method of this module. Outputs the action, log probs,

:return: Given the input, an AgentAction of the actions generated by the policy and the corresponding
ActionLogProbs and entropies.
"""
dists = self._get_dists(inputs, masks)
dists = self._get_dists(inputs, masks, goal)
actions = self._sample_action(dists)
log_probs, entropies = self._get_probs_and_entropy(actions, dists)
# Use the sum of entropy across actions, not the mean

89
ml-agents/mlagents/trainers/torch/decoders.py


from typing import List, Dict
from mlagents.torch_utils import torch, nn
from mlagents.trainers.torch.layers import linear_layer
from mlagents.trainers.torch.layers import (
linear_layer,
LinearEncoder,
Initialization,
Swish,
)
from collections import defaultdict
class ValueHeads(nn.Module):

for stream_name, head in self.value_heads.items():
value_outputs[stream_name] = head(hidden).squeeze(-1)
return value_outputs
class ValueHeadsHyperNetwork(nn.Module):
def __init__(
self,
num_layers,
layer_size,
num_goals,
stream_names: List[str],
input_size: int,
output_size: int = 1,
):
super().__init__()
self.stream_names = stream_names
self._num_goals = num_goals
self.input_size = input_size
self.output_size = output_size
self.streams_size = len(stream_names)
layers = []
layers.append(
linear_layer(
num_goals,
layer_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
bias_init=Initialization.Zero,
)
)
layers.append(Swish())
for _ in range(num_layers - 1):
layers.append(
linear_layer(
layer_size,
layer_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
bias_init=Initialization.Zero,
)
)
layers.append(Swish())
flat_output = linear_layer(
layer_size,
input_size * output_size * self.streams_size
+ self.output_size * self.streams_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
self.hypernet = torch.nn.Sequential(*layers, flat_output)
def forward(
self, hidden: torch.Tensor, goal: torch.Tensor
) -> Dict[str, torch.Tensor]:
goal_onehot = torch.nn.functional.one_hot(
goal[0].long(), self._num_goals
).float()
# (b, i * o * streams + o * streams)
flat_output_weights = self.hypernet(goal_onehot)
b = hidden.size(0)
output_weights, output_bias = torch.split(
flat_output_weights,
self.streams_size * self.input_size * self.output_size,
dim=-1,
)
output_weights = torch.reshape(
output_weights, (self.streams_size, b, self.input_size, self.output_size)
)
output_bias = torch.reshape(
output_bias, (self.streams_size, b, self.output_size)
)
output_bias = output_bias.unsqueeze(dim=2)
value_outputs = {}
for stream_name, out_w, out_b in zip(
self.stream_names, output_weights, output_bias
):
inp_out_w = torch.bmm(hidden.unsqueeze(dim=1), out_w)
inp_out_w_out_b = inp_out_w + out_b
value_outputs[stream_name] = inp_out_w_out_b.squeeze()
return value_outputs

119
ml-agents/mlagents/trainers/torch/distributions.py


from mlagents.torch_utils import torch, nn
import numpy as np
import math
from mlagents.trainers.torch.layers import linear_layer, Initialization
from mlagents.trainers.torch.layers import (
linear_layer,
Initialization,
LinearEncoder,
Swish,
)
from mlagents.trainers.torch.utils import ModelUtils
EPSILON = 1e-7 # Small value to avoid divide by zero

# torch.cat here instead of torch.expand() becuase it is not supported in the
# verified version of Barracuda (1.0.2).
log_sigma = torch.cat([self.log_sigma] * inputs.shape[0], axis=0)
if self.tanh_squash:
return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
else:
return GaussianDistInstance(mu, torch.exp(log_sigma))
class GaussianHyperNetwork(nn.Module):
def __init__(
self,
num_layers,
layer_size,
hidden_size,
num_outputs,
conditional_sigma,
tanh_squash,
num_goals,
):
super().__init__()
self._num_goals = num_goals
self.hidden_size = hidden_size
self.tanh_squash = tanh_squash
self.conditional_sigma = conditional_sigma
self.num_outputs = num_outputs
layers = []
layers.append(
linear_layer(
num_goals,
layer_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
)
layers.append(Swish())
for _ in range(num_layers - 1):
layers.append(
linear_layer(
layer_size,
layer_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
)
layers.append(Swish())
if conditional_sigma:
flat_output = linear_layer(
layer_size,
2 * (hidden_size * num_outputs + num_outputs),
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
self._log_sigma_w = None
else:
flat_output = linear_layer(
layer_size,
hidden_size * num_outputs + num_outputs,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
self._log_sigma_w = linear_layer(
num_goals,
num_outputs,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
self.hypernet = torch.nn.Sequential(*layers, flat_output)
def forward(self, inputs: torch.Tensor, goal: torch.Tensor):
goal_onehot = torch.nn.functional.one_hot(
goal[0].long(), self._num_goals
).float()
# cond (b, 2 * H * O + O
# not cond (b, H * O + O
flat_output_weights = self.hypernet(goal_onehot)
b = inputs.size(0)
inputs = inputs.unsqueeze(dim=1)
if self.conditional_sigma:
mu_w_log_sigma_w, mu_b, log_sigma_b = torch.split(
flat_output_weights,
[
2 * self.hidden_size * self.num_outputs,
self.num_outputs,
self.num_outputs,
],
dim=-1,
)
mu_w_log_sigma_w = torch.reshape(
mu_w_log_sigma_w, (b, 2 * self.hidden_size, self.num_outputs)
)
mu_w, log_sigma_w = torch.split(mu_w_log_sigma_w, self.hidden_size, dim=1)
log_sigma = torch.bmm(inputs, log_sigma_w)
log_sigma = log_sigma + log_sigma_b
log_sigma = log_sigma.squeeze()
log_sigma = torch.clamp(log_sigma, min=-20, max=2)
else:
mu_w, mu_b = torch.split(
flat_output_weights, self.hidden_size * self.num_outputs, dim=-1
)
mu_w = torch.reshape(mu_w, (b, self.hidden_size, self.num_outputs))
log_sigma = self._log_sigma_w(goal_onehot)
log_sigma = torch.squeeze(log_sigma)
mu = torch.bmm(inputs, mu_w)
mu = mu + mu_b
mu = mu.squeeze()
if self.tanh_squash:
return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
else:

56
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.decoders import ValueHeads, ValueHeadsHyperNetwork
from mlagents.trainers.torch.layers import LSTM, LinearEncoder
from mlagents.trainers.torch.model_serialization import exporting_to_onnx

self.vector_processors,
encoder_input_size,
) = ModelUtils.create_input_processors(
observation_shapes,
observation_shapes[1:],
self.h_size,
network_settings.vis_encode_type,
normalize=self.normalize,

encoding_size = network_settings.memory.memory_size // 2
else:
encoding_size = network_settings.hidden_units
self.value_heads = ValueHeads(stream_names, encoding_size, outputs_per_stream)
self.value_heads = ValueHeadsHyperNetwork(
num_layers=1,
layer_size=256,
num_goals=2,
stream_names=stream_names,
input_size=encoding_size,
output_size=outputs_per_stream,
)
@property
def memory_size(self) -> int:

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goal: List[torch.tensor],
actions: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,

)
output = self.value_heads(encoding)
output = self.value_heads(encoding, goal)
return output, memories

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goal: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:

At this moment, torch.onnx.export() doesn't accept None as tensor to be exported,
so the size of return tuple varies with action spec.
"""
vec_inputs = [vec_inputs[0][:, 1:]]
goal = [vec_inputs[0][:, :1]]
encoding, memories_out = self.network_body(
vec_inputs, vis_inputs, memories=memories, sequence_length=1
)

disc_action_out,
action_out_deprecated,
) = self.action_model.get_action_out(encoding, masks)
) = self.action_model.get_action_out(encoding, masks, goal)
export_out = [
self.version_number,
torch.Tensor([self.network_body.memory_size]),

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goal: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:

encoding, memories = self.network_body(
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
log_probs, entropies = self.action_model.evaluate(
encoding, masks, actions, goal
)
value_outputs = self.value_heads(encoding)
return log_probs, entropies, value_outputs

encoding, memories = self.network_body(
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
action, log_probs, entropies = self.action_model(encoding, masks)
action, log_probs, entropies = self.action_model(encoding, masks, goal)
value_outputs = self.value_heads(encoding)
return action, log_probs, entropies, value_outputs, memories

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goal: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:

actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
value_outputs, critic_mem_out = self.critic(
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
vec_inputs,
vis_inputs,
goal,
memories=critic_mem,
sequence_length=sequence_length,
)
if actor_mem is not None:
# Make memories with the actor mem unchanged

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goals: List[torch.Tensor],
actions: AgentAction,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,

else:
critic_mem = None
actor_mem = None
log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
log_probs, entropies = self.action_model.evaluate(
encoding, masks, actions, goals
)
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
vec_inputs,
vis_inputs,
goals,
memories=critic_mem,
sequence_length=sequence_length,
)
return log_probs, entropies, value_outputs

vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
goals: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,

encoding, actor_mem_outs = self.network_body(
vec_inputs, vis_inputs, memories=actor_mem, sequence_length=sequence_length
)
action, log_probs, entropies = self.action_model(encoding, masks)
action, log_probs, entropies = self.action_model(encoding, masks, goals)
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
vec_inputs,
vis_inputs,
goals,
memories=critic_mem,
sequence_length=sequence_length,
)
if self.use_lstm:
mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=-1)

2
ml-agents/mlagents/trainers/trajectory.py


agent_buffer_trajectory["vector_obs"].append(
vec_vis_obs.vector_observations
)
if exp.memory is not None:
agent_buffer_trajectory["memory"].append(exp.memory)

507
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!114 &4469182458895145650
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1077752704035527923}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: c52bddbfaf39944a6bb673a9dfcfe4b6, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
m_TargetWalkingSpeed: 10
randomizeWalkSpeedEachEpisode: 0
target: {fileID: 4058446934158437408}
hips: {fileID: 1077752704392483292}
chest: {fileID: 7818481575961221087}
spine: {fileID: 7818481575902529953}
head: {fileID: 7818481576732930258}
thighL: {fileID: 7818481576528932657}
shinL: {fileID: 7818481576468061548}
footL: {fileID: 7818481575932963445}
thighR: {fileID: 7818481577110242841}
shinR: {fileID: 7818481577111017236}
footR: {fileID: 7818481576882516798}
armL: {fileID: 7818481576458883964}
forearmL: {fileID: 7818481576500842159}
handL: {fileID: 7818481576440584931}
armR: {fileID: 7818481575774466714}
forearmR: {fileID: 7818481576563420652}
handR: {fileID: 7818481575132336870}
goals: 2
--- !u!114 &1800586501491974962
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1077752704035527923}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 163dac4bcbb2f4d8499db2cdcb22a89e, type: 3}
m_Name:
m_EditorClassIdentifier:
observationSize: 1
--- !u!1001 &186987432828422960
PrefabInstance:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: walkDirectionMethod
value: 0
objectReference: {fileID: 0}
- target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: target
value:
objectReference: {fileID: 4058446934158437408}
- target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: randomizeWalkSpeedEachEpisode
value: 1
objectReference: {fileID: 0}
- target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_Enabled
value: 0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalPosition.x
value: -500
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalPosition.y
value: 2.57
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalPosition.z
value: -250
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalRotation.x
value: -0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalRotation.y
value: 0.7071068
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalRotation.z
value: -0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalRotation.w
value: 0.7071068
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_RootOrder
value: 0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalEulerAnglesHint.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalEulerAnglesHint.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
propertyPath: updateManually
value: 1
objectReference: {fileID: 0}
- target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
propertyPath: updateViaScript
value: 1
objectReference: {fileID: 0}
- target: {fileID: 693499830, guid: 765582efd9dda46ed98564603316353f, type: 3}
propertyPath: updatedByAgent
value: 1
objectReference: {fileID: 0}
- target: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_Name
value: MultiDirRagDoll
objectReference: {fileID: 0}
- target: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_IsActive
value: 1
objectReference: {fileID: 0}
- target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_BehaviorName
value: MultiDirWalker
objectReference: {fileID: 0}
- target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_Model
value:
objectReference: {fileID: 5022602860645237092, guid: c5c81d94c2dfe4c2b9f7440f533957fa,
type: 3}
- target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_BrainParameters.VectorObservationSize
value: 243
objectReference: {fileID: 0}
- target: {fileID: 6359877977706987617, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalPosition.y
value: -2.517
objectReference: {fileID: 0}
- target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: -0.699997
objectReference: {fileID: 0}
- target: {fileID: 7933235353030744139, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: -0.69999707
objectReference: {fileID: 0}
- target: {fileID: 7933235353041637847, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: 0.5000001
objectReference: {fileID: 0}
- target: {fileID: 7933235353195701979, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 7933235353228551180, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.29999995
objectReference: {fileID: 0}
- target: {fileID: 7933235353228551180, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: -0.5000001
objectReference: {fileID: 0}
- target: {fileID: 7933235353240438170, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 7933235353713167636, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.29999995
objectReference: {fileID: 0}
- target: {fileID: 7933235353713167636, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235354074184678, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: 0.5119995
objectReference: {fileID: 0}
- target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: 0.69999707
objectReference: {fileID: 0}
- target: {fileID: 7933235354616748522, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 7933235354652902044, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: 0.3829999
objectReference: {fileID: 0}
- target: {fileID: 7933235354845945066, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: 0.3050002
objectReference: {fileID: 0}
- target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.y
value: -0.00000011920929
objectReference: {fileID: 0}
- target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.x
value: 0.699997
objectReference: {fileID: 0}
- target: {fileID: 7933235355057813929, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235353272702555, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235353655703554, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235353711811619, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.000000059604645
objectReference: {fileID: 0}
- target: {fileID: 7933235354882597209, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_ConnectedAnchor.z
value: -0.000000059604645
objectReference: {fileID: 0}
m_RemovedComponents:
- {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f, type: 3}
m_SourcePrefab: {fileID: 100100000, guid: 765582efd9dda46ed98564603316353f, type: 3}
--- !u!1 &1077752704035527923 stripped
GameObject:
m_CorrespondingSourceObject: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &1077752704035527914 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &1077752704392483292 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 895268871264836332, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576528932657 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353228551169, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576468061548 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353272702556, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481575932963445 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235354882597189, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481577110242841 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353713167657, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481577111017236 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353711811620, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576882516798 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353655703566, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481575902529953 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235354652902033, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481575961221087 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235354845945071, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576458883964 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353030744140, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576500842159 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353240438175, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576440584931 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353041637843, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481575774466714 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235355057813930, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576563420652 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235353195701980, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481575132336870 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235354616748502, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!4 &7818481576732930258 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 7933235354074184674, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 186987432828422960}
m_PrefabAsset: {fileID: 0}
--- !u!1001 &942701540323662238
PrefabInstance:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Modification:
m_TransformParent: {fileID: 1077752704035527914}
m_Modifications:
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalPosition.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalPosition.y
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalPosition.z
value: 1800
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalRotation.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalRotation.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalRotation.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalRotation.w
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_RootOrder
value: 3
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalEulerAnglesHint.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalEulerAnglesHint.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 3840539935788495952, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
propertyPath: m_Name
value: StaticTarget
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e, type: 3}
--- !u!4 &4058446934158437408 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 3839136118347789758, guid: 2173d15c0b5fc49e5870c9d1c7f7ee8e,
type: 3}
m_PrefabInstance: {fileID: 942701540323662238}
m_PrefabAsset: {fileID: 0}

7
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/MultiDirRagDoll.prefab.meta


fileFormatVersion: 2
guid: d32d9be22fe544fd38de3cf5db023465
PrefabImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

1001
Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity
文件差异内容过多而无法显示
查看文件

9
Project/Assets/ML-Agents/Examples/Walker/Scenes/MultiDirWalkerStatic.unity.meta


fileFormatVersion: 2
guid: 0c5ba64aa7c084a63b21f8e2b900fc29
timeCreated: 1520420566
licenseType: Free
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

353
Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs


using System;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgentsExamples;
using Unity.MLAgents.Sensors;
using BodyPart = Unity.MLAgentsExamples.BodyPart;
using Random = UnityEngine.Random;
public class MultiDirWalkerAgent : Agent
{
[Header("Walk Speed")]
[Range(0.1f, 10)]
[SerializeField]
//The walking speed to try and achieve
private float m_TargetWalkingSpeed = 10;
private Vector3 m_startingPos; //the starting position of the target
public float MTargetWalkingSpeed // property
{
get { return m_TargetWalkingSpeed; }
set { m_TargetWalkingSpeed = Mathf.Clamp(value, .1f, m_maxWalkingSpeed); }
}
const float m_maxWalkingSpeed = 10; //The max walking speed
//Should the agent sample a new goal velocity each episode?
//If true, walkSpeed will be randomly set between zero and m_maxWalkingSpeed in OnEpisodeBegin()
//If false, the goal velocity will be walkingSpeed
public bool randomizeWalkSpeedEachEpisode;
//The direction an agent will walk during training.
private Vector3 m_WorldDirToWalk = Vector3.right;
[Header("Target To Walk Towards")] public Transform target; //Target the agent will walk towards during training.
[Header("Body Parts")] public Transform hips;
public Transform chest;
public Transform spine;
public Transform head;
public Transform thighL;
public Transform shinL;
public Transform footL;
public Transform thighR;
public Transform shinR;
public Transform footR;
public Transform armL;
public Transform forearmL;
public Transform handL;
public Transform armR;
public Transform forearmR;
public Transform handR;
int m_Goal;
float dir;
public int goals;
float[] m_GoalOneHot;
//This will be used as a stabilized model space reference point for observations
//Because ragdolls can move erratically during training, using a stabilized reference transform improves learning
OrientationCubeController m_OrientationCube;
GoalSensorComponent goalSensor;
//The indicator graphic gameobject that points towards the target
DirectionIndicator m_DirectionIndicator;
JointDriveController m_JdController;
EnvironmentParameters m_ResetParams;
public override void Initialize()
{
m_startingPos = target.position;
m_Goal = Random.Range(0, goals);
//m_Goal = 0;
m_GoalOneHot = new float[goals];
System.Array.Clear(m_GoalOneHot, 0, m_GoalOneHot.Length);
m_GoalOneHot[m_Goal] = 1;
if (m_Goal == 0)
{
var newTargetPos = new Vector3(1800f, 1f, m_startingPos.z);
target.position = newTargetPos;
dir = 1f;
}
else
{
var newTargetPos = new Vector3(-1800f, 1f, m_startingPos.z);
target.position = newTargetPos;
dir = -1f;
}
m_OrientationCube = GetComponentInChildren<OrientationCubeController>();
m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>();
//Setup each body part
m_JdController = GetComponent<JointDriveController>();
m_JdController.SetupBodyPart(hips);
m_JdController.SetupBodyPart(chest);
m_JdController.SetupBodyPart(spine);
m_JdController.SetupBodyPart(head);
m_JdController.SetupBodyPart(thighL);
m_JdController.SetupBodyPart(shinL);
m_JdController.SetupBodyPart(footL);
m_JdController.SetupBodyPart(thighR);
m_JdController.SetupBodyPart(shinR);
m_JdController.SetupBodyPart(footR);
m_JdController.SetupBodyPart(armL);
m_JdController.SetupBodyPart(forearmL);
m_JdController.SetupBodyPart(handL);
m_JdController.SetupBodyPart(armR);
m_JdController.SetupBodyPart(forearmR);
m_JdController.SetupBodyPart(handR);
m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
}
/// <summary>
/// Loop over body parts and reset them to initial conditions.
/// </summary>
public override void OnEpisodeBegin()
{
m_Goal = Random.Range(0, goals);
//m_Goal = 0;
System.Array.Clear(m_GoalOneHot, 0, m_GoalOneHot.Length);
m_GoalOneHot[m_Goal] = 1;
if (m_Goal == 0)
{
var newTargetPos = new Vector3(1800f, 1f, m_startingPos.z);
target.position = newTargetPos;
dir = 1f;
}
else
{
var newTargetPos = new Vector3(-1800f, 1f, m_startingPos.z);
target.position = newTargetPos;
dir = -1f;
}
//Reset all of the body parts
foreach (var bodyPart in m_JdController.bodyPartsDict.Values)
{
bodyPart.Reset(bodyPart);
}
//Random start rotation to help generalize
hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
UpdateOrientationObjects();
//Set our goal walking speed
MTargetWalkingSpeed =
randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : MTargetWalkingSpeed;
SetResetParameters();
}
/// <summary>
/// Add relevant information on each body part to observations.
/// </summary>
public void CollectObservationBodyPart(BodyPart bp, VectorSensor sensor)
{
//GROUND CHECK
sensor.AddObservation(bp.groundContact.touchingGround); // Is this bp touching the ground
//Get velocities in the context of our orientation cube's space
//Note: You can get these velocities in world space as well but it may not train as well.
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
//Get position relative to hips in the context of our orientation cube's space
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));
if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR)
{
sensor.AddObservation(bp.rb.transform.localRotation);
sensor.AddObservation(bp.currentStrength / m_JdController.maxJointForceLimit);
}
}
/// <summary>
/// Loop over body parts to add them to observation.
/// </summary>
public override void CollectObservations(VectorSensor sensor)
{
var cubeForward = m_OrientationCube.transform.forward;
//velocity we want to match
var velGoal = cubeForward * MTargetWalkingSpeed;
//ragdoll's avg vel
var avgVel = GetAvgVelocity();
//current ragdoll velocity. normalized
sensor.AddObservation(Vector3.Distance(velGoal, avgVel));
//avg body vel relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(avgVel));
//vel goal relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(velGoal));
//rotation deltas
sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));
sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));
//Position of target position relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position));
foreach (var bodyPart in m_JdController.bodyPartsList)
{
CollectObservationBodyPart(bodyPart, sensor);
}
//sensor.AddObservation(m_GoalOneHot);
goalSensor = this.GetComponent<GoalSensorComponent>();
goalSensor.AddGoal(m_Goal);
}
public override void OnActionReceived(ActionBuffers actionBuffers)
{
var bpDict = m_JdController.bodyPartsDict;
var i = -1;
var continuousActions = actionBuffers.ContinuousActions;
bpDict[chest].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[spine].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[thighL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[thighR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[shinL].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[shinR].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[footR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[footL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[armL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[armR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[forearmL].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[forearmR].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[head].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
//update joint strength settings
bpDict[chest].SetJointStrength(continuousActions[++i]);
bpDict[spine].SetJointStrength(continuousActions[++i]);
bpDict[head].SetJointStrength(continuousActions[++i]);
bpDict[thighL].SetJointStrength(continuousActions[++i]);
bpDict[shinL].SetJointStrength(continuousActions[++i]);
bpDict[footL].SetJointStrength(continuousActions[++i]);
bpDict[thighR].SetJointStrength(continuousActions[++i]);
bpDict[shinR].SetJointStrength(continuousActions[++i]);
bpDict[footR].SetJointStrength(continuousActions[++i]);
bpDict[armL].SetJointStrength(continuousActions[++i]);
bpDict[forearmL].SetJointStrength(continuousActions[++i]);
bpDict[armR].SetJointStrength(continuousActions[++i]);
bpDict[forearmR].SetJointStrength(continuousActions[++i]);
}
//Update OrientationCube and DirectionIndicator
void UpdateOrientationObjects()
{
m_WorldDirToWalk = target.position - hips.position;
m_OrientationCube.UpdateOrientation(hips, target);
if (m_DirectionIndicator)
{
m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);
}
}
void FixedUpdate()
{
UpdateOrientationObjects();
var cubeForward = m_OrientationCube.transform.forward;
// Set reward for this step according to mixture of the following elements.
// a. Match target speed
//This reward will approach 1 if it matches perfectly and approach zero as it deviates
var matchSpeedReward = GetMatchingVelocityReward(cubeForward * MTargetWalkingSpeed, GetAvgVelocity());
//Check for NaNs
if (float.IsNaN(matchSpeedReward))
{
throw new ArgumentException(
"NaN in moveTowardsTargetReward.\n" +
$" cubeForward: {cubeForward}\n" +
$" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n" +
$" maximumWalkingSpeed: {m_maxWalkingSpeed}"
);
}
// b. Rotation alignment with target direction.
//This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates
var lookAtTargetReward = (Vector3.Dot(cubeForward, dir * head.forward) + 1) * .5F;
//Check for NaNs
if (float.IsNaN(lookAtTargetReward))
{
throw new ArgumentException(
"NaN in lookAtTargetReward.\n" +
$" cubeForward: {cubeForward}\n" +
$" head.forward: {head.forward}"
);
}
Debug.Log(lookAtTargetReward);
Debug.Log(matchSpeedReward);
AddReward(matchSpeedReward * lookAtTargetReward);
}
//Returns the average velocity of all of the body parts
//Using the velocity of the hips only has shown to result in more erratic movement from the limbs, so...
//...using the average helps prevent this erratic movement
Vector3 GetAvgVelocity()
{
Vector3 velSum = Vector3.zero;
Vector3 avgVel = Vector3.zero;
//ALL RBS
int numOfRB = 0;
foreach (var item in m_JdController.bodyPartsList)
{
numOfRB++;
velSum += item.rb.velocity;
}
avgVel = velSum / numOfRB;
return avgVel;
}
//normalized value of the difference in avg speed vs goal walking speed.
public float GetMatchingVelocityReward(Vector3 velocityGoal, Vector3 actualVelocity)
{
//distance between our actual velocity and goal velocity
var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, MTargetWalkingSpeed);
//return the value on a declining sigmoid shaped curve that decays from 1 to 0
//This reward will approach 1 if it matches perfectly and approach zero as it deviates
return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / MTargetWalkingSpeed, 2), 2);
}
/// <summary>
/// Agent touched the target
/// </summary>
public void TouchedTarget()
{
AddReward(1f);
}
public void SetTorsoMass()
{
m_JdController.bodyPartsDict[chest].rb.mass = m_ResetParams.GetWithDefault("chest_mass", 8);
m_JdController.bodyPartsDict[spine].rb.mass = m_ResetParams.GetWithDefault("spine_mass", 8);
m_JdController.bodyPartsDict[hips].rb.mass = m_ResetParams.GetWithDefault("hip_mass", 8);
}
public void SetResetParameters()
{
SetTorsoMass();
}
}

11
Project/Assets/ML-Agents/Examples/Walker/Scripts/MultiDirWalkerAgent.cs.meta


fileFormatVersion: 2
guid: c52bddbfaf39944a6bb673a9dfcfe4b6
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/MultiDirWalker-5997779.onnx
文件差异内容过多而无法显示
查看文件

26
config/ppo/MultiDirWalker.yaml


behaviors:
MultiDirWalker:
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 30000000
time_horizon: 1000
summary_freq: 30000
threaded: true
正在加载...
取消
保存