浏览代码

Modify CrawlerDynamic

/goal-conditioning/new
Arthur Juliani 4 年前
当前提交
d2526ce2
共有 6 个文件被更改,包括 109 次插入27 次删除
  1. 20
      Project/Assets/ML-Agents/Examples/Crawler/Prefabs/CrawlerBase.prefab
  2. 4
      Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
  3. 19
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GoalSensorComponent.cs
  4. 3
      config/ppo/CrawlerDynamic.yaml
  5. 80
      ml-agents/mlagents/trainers/torch/layers.py
  6. 10
      ml-agents/mlagents/trainers/torch/networks.py

20
Project/Assets/ML-Agents/Examples/Crawler/Prefabs/CrawlerBase.prefab


- component: {fileID: 4845971001715176663}
- component: {fileID: 4845971001715176660}
- component: {fileID: 8968436865147081600}
- component: {fileID: 1768700195492406288}
m_Layer: 0
m_Name: CrawlerBase
m_TagString: Untagged

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 32
VectorObservationSize: 29
m_ActionSpec:
m_NumContinuousActions: 20
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: c6509001ba679447fba27f894761c3ba, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

UseJointPositionsAndAngles: 0
UseJointForces: 0
sensorName:
--- !u!114 &1768700195492406288
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 4845971001715176661}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: fd48a95d788a348b0b7a8cefd39e7c27, type: 3}
m_Name:
m_EditorClassIdentifier:
observationSize: 3
--- !u!1 &4845971001730692034
GameObject:
m_ObjectHideFlags: 0

4
Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs


"Static - The agent will run towards a static target. "
)]
public CrawlerAgentBehaviorType typeOfCrawler;
GoalSensorComponent goalSensor;
//Crawler Brains
//A different brain will be used depending on the CrawlerAgentBehaviorType selected

sensor.AddObservation(Quaternion.FromToRotation(body.forward, cubeForward));
//Add pos of target relative to orientation cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(m_Target.transform.position));
goalSensor = this.GetComponent<GoalSensorComponent>();
goalSensor.AddGoal(m_OrientationCube.transform.InverseTransformPoint(m_Target.transform.position));
RaycastHit hit;
float maxRaycastDist = 10;

19
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GoalSensorComponent.cs


using System.Collections.Generic;
using System.Collections.ObjectModel;
using UnityEngine;
public class GoalSensorComponent : SensorComponent

return new[] { observationSize };
}
public void AddGoal(IEnumerable<float> goal)
{
if (goalSensor != null)
{
goalSensor.AddObservation(goal);
}
}
public void AddGoal(float goal)
{
if (goalSensor != null)

if (goalSensor != null)
{
goalSensor.AddOneHotObservation(goal, range);
}
}
public void AddGoal(Vector3 goal)
{
if (goalSensor != null)
{
goalSensor.AddObservation(goal);
}
}
}

3
config/ppo/CrawlerDynamic.yaml


learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
hidden_units: 256
num_layers: 3
vis_encode_type: simple
reward_signals:

time_horizon: 1000
summary_freq: 30000
threaded: true
checkpoint_interval: 1000000

80
ml-agents/mlagents/trainers/torch/layers.py


kernel_gain: float = 1.0,
):
super().__init__()
self.goal_encoder = LinearEncoder(goal_size, 2, hidden_size)
self.layers = [
linear_layer(
input_size,
hidden_size,
kernel_init=kernel_init,
kernel_gain=kernel_gain,
)
]
self.layers.append(Swish())
for _ in range(num_layers - 1):
self.layers = []
self.goal_encoders = []
prev_size = input_size
for i in range(num_layers):
hidden_size,
prev_size,
self.goal_encoders.append(LinearEncoder(goal_size, 2, hidden_size, final_activation=False))
prev_size = hidden_size
goal_activation = self.goal_encoder(goal_tensor)
for idx, layer in enumerate(self.layers):
if isinstance(layer, Swish):
activation = layer(activation)
else:
activation = layer(activation) * self.goal_encoders[idx//2](goal_tensor)
return activation
class HyperEncoder(torch.nn.Module):
"""
Linear layers.
"""
def __init__(
self,
input_size: int,
goal_size: int,
num_layers: int,
hidden_size: int,
kernel_init: Initialization = Initialization.KaimingHeNormal,
kernel_gain: float = 1.0,
num_hyper_layers: int = 1,
):
super().__init__()
self.layers = []
prev_size = input_size
for i in range(num_layers):
if i < num_layers - num_hyper_layers:
self.layers.append(
linear_layer(
prev_size,
hidden_size,
kernel_init=kernel_init,
kernel_gain=kernel_gain,
)
)
else:
self.layers.append(
HyperNetwork(prev_size, hidden_size, goal_size, 2, hidden_size)
)
self.layers.append(Swish())
prev_size = hidden_size
def forward(
self, input_tensor: torch.Tensor, goal_tensor: torch.Tensor
) -> torch.Tensor:
activation = input_tensor
activation = layer(activation)
if layer is not Swish():
activation *= goal_activation
if isinstance(layer, HyperNetwork):
activation = layer(activation, goal_tensor)
else:
activation = layer(activation)
return activation

hidden_size: int,
kernel_init: Initialization = Initialization.KaimingHeNormal,
kernel_gain: float = 1.0,
final_activation: bool = True,
):
super().__init__()
self.layers = [

)
]
self.layers.append(Swish())
for _ in range(num_layers - 1):
for i in range(num_layers - 1):
self.layers.append(
linear_layer(
hidden_size,

)
)
self.layers.append(Swish())
if i < num_layers - 2 or final_activation:
self.layers.append(Swish())
self.seq_layers = torch.nn.Sequential(*self.layers)
def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:

10
ml-agents/mlagents/trainers/torch/networks.py


LSTM,
LinearEncoder,
HyperNetwork,
ConditionalEncoder,
ConditionalEncoder, HyperEncoder,
ActivationFunction = Callable[[torch.Tensor], torch.Tensor]
EncoderFunction = Callable[

encoded_act_size: int = 0,
):
super().__init__()
self.conditioning_mode = ConditioningMode.HYPER
self.conditioning_mode = ConditioningMode.SOFT
self.normalize = network_settings.normalize
self.use_lstm = network_settings.memory is not None
self.h_size = network_settings.hidden_units

ObservationType.GOAL in self.obs_types
and self.conditioning_mode == ConditioningMode.HYPER
):
self.linear_encoder = HyperNetwork(
self.linear_encoder = HyperEncoder(
self.h_size,
num_hyper_layers=0
)
elif (
ObservationType.GOAL in self.obs_types

) -> Tuple[
AgentAction, ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
]:
encoding, memories = self.network_body(
inputs, memories=memories, sequence_length=sequence_length
)

正在加载...
取消
保存