浏览代码

soccer comms on the cloud

/comms-grad
Andrew Cohen 4 年前
当前提交
ca5a5194
共有 9 个文件被更改,包括 186 次插入33 次删除
  1. 8
      Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab
  2. 23
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayCollabAgent.cs
  3. 14
      Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
  4. 140
      Project/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity
  5. 6
      config/ppo/HallwayCollab.yaml
  6. 16
      config/ppo/SoccerTwos.yaml
  7. 2
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  8. 4
      ml-agents/mlagents/trainers/torch/distributions.py
  9. 6
      ml-agents/mlagents/trainers/trajectory.py

8
Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab


- symbol_O
- symbol_X
- symbol_S
- symbol_O_Goal
- symbol_X_Goal
- symbol_S_Goal
- wall
m_RaysPerDirection: 2
m_MaxRayDegrees: 70

m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1164310131249751511}
m_Text: 'Message: '
m_Text: 'Sent:'
m_OffsetZ: 0
m_CharacterSize: 1
m_LineSpacing: 1

m_EditorClassIdentifier:
m_SensorName: RayPerceptionSensor
m_DetectableTags:
- symbol_O
- symbol_X
- symbol_S
- symbol_O_Goal
- symbol_X_Goal
- symbol_S_Goal

23
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayCollabAgent.cs


public HallwayCollabAgent teammate;
public bool isSpotter = true;
TextMesh m_MessageText;
TextMesh m_MessageRec;
public override void Initialize()
{
base.Initialize();

if (isSpotter)
{
m_MessageText.text = "Message:" + comm_act.ToString();
m_MessageText.text = "Sent:" + comm_act.ToString();
}
teammate.tellAgent(comm_act);
// if (isSpotter) // Test

}
}
public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
{
// Mask the necessary actions if selected by the user.
if (!isSpotter)
{
// Prevents the agent from picking an action that would make it collide with a wall
actionMask.WriteMask(1, new[] {0});
}
}
//public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
//{
// // Mask the necessary actions if selected by the user.
// if (!isSpotter)
// {
// // Prevents the agent from picking an action that would make it collide with a wall
// actionMask.WriteMask(1, new[] { 0 });
// }
//}
}

14
Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab


m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 2
NumStackedVectorObservations: 3
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 3
BranchSizes: 02000000

m_BehaviorType: 0
m_BehaviorName: SoccerTwos
TeamId: 1
GroupId: 0
m_UseChildSensors: 1
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0

m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 2
NumStackedVectorObservations: 3
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 3
BranchSizes: 02000000

m_BehaviorType: 0
m_BehaviorName: SoccerTwos
TeamId: 0
GroupId: 0
m_UseChildSensors: 1
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0

m_BehaviorType: 0
m_BehaviorName: SoccerTwos
TeamId: 1
GroupId: 0
m_UseChildSensors: 1
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0

m_BehaviorType: 0
m_BehaviorName: SoccerTwos
TeamId: 1
GroupId: 0
m_UseChildSensors: 1
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0

m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 2
NumStackedVectorObservations: 3
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 3
BranchSizes: 02000000

m_BehaviorType: 0
m_BehaviorName: SoccerTwos
TeamId: 1
GroupId: 0
m_UseChildSensors: 1
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0

m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 2
NumStackedVectorObservations: 3
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 3
BranchSizes: 02000000

m_BehaviorType: 0
m_BehaviorName: SoccerTwos
TeamId: 0
GroupId: 0
m_UseChildSensors: 1
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0

140
Project/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity


propertyPath: m_RootOrder
value: 8
objectReference: {fileID: 0}
- target: {fileID: 114387866097048300, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 4
objectReference: {fileID: 0}
- target: {fileID: 114734187185382186, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 4
objectReference: {fileID: 0}
- target: {fileID: 8577361778124631730, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 4
objectReference: {fileID: 0}
- target: {fileID: 3316085536305919483, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 4
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
--- !u!1001 &774084574

propertyPath: m_RootOrder
value: 11
objectReference: {fileID: 0}
- target: {fileID: 114387866097048300, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 7
objectReference: {fileID: 0}
- target: {fileID: 114734187185382186, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 7
objectReference: {fileID: 0}
- target: {fileID: 8577361778124631730, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 7
objectReference: {fileID: 0}
- target: {fileID: 3316085536305919483, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 7
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
--- !u!1001 &1177695920

propertyPath: m_RootOrder
value: 5
objectReference: {fileID: 0}
- target: {fileID: 114387866097048300, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 1
objectReference: {fileID: 0}
- target: {fileID: 114734187185382186, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 1
objectReference: {fileID: 0}
- target: {fileID: 8577361778124631730, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3316085536305919483, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 1
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
--- !u!1001 &1325062279

propertyPath: m_RootOrder
value: 7
objectReference: {fileID: 0}
- target: {fileID: 114387866097048300, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 3
objectReference: {fileID: 0}
- target: {fileID: 114734187185382186, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 3
objectReference: {fileID: 0}
- target: {fileID: 8577361778124631730, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 3
objectReference: {fileID: 0}
- target: {fileID: 3316085536305919483, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 3
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
--- !u!1001 &1348885204

propertyPath: m_RootOrder
value: 9
objectReference: {fileID: 0}
- target: {fileID: 114387866097048300, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 5
objectReference: {fileID: 0}
- target: {fileID: 114734187185382186, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 5
objectReference: {fileID: 0}
- target: {fileID: 8577361778124631730, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 5
objectReference: {fileID: 0}
- target: {fileID: 3316085536305919483, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 5
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
--- !u!1001 &1748755984

propertyPath: m_RootOrder
value: 10
objectReference: {fileID: 0}
- target: {fileID: 114387866097048300, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 6
objectReference: {fileID: 0}
- target: {fileID: 114734187185382186, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 6
objectReference: {fileID: 0}
- target: {fileID: 8577361778124631730, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 6
objectReference: {fileID: 0}
- target: {fileID: 3316085536305919483, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 6
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
--- !u!1001 &2043067438

type: 3}
propertyPath: m_BehaviorType
value: 0
objectReference: {fileID: 0}
- target: {fileID: 114387866097048300, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 2
objectReference: {fileID: 0}
- target: {fileID: 114734187185382186, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 2
objectReference: {fileID: 0}
- target: {fileID: 8577361778124631730, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 2
objectReference: {fileID: 0}
- target: {fileID: 3316085536305919483, guid: 54f3340298537426e96a6cc530e2d5d8,
type: 3}
propertyPath: GroupId
value: 2
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}

6
config/ppo/HallwayCollab.yaml


HallwayCollab:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 1024
batch_size: 512
buffer_size: 4096
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2

network_settings:
normalize: false
hidden_units: 128
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:

16
config/ppo/SoccerTwos.yaml


SoccerTwos:
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
batch_size: 4096
buffer_size: 40960
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2

behavior: SoccerTwos
signal_smoothing: true
min_lesson_length: 100
threshold: 0.0005
threshold: 0.001
value: 1.0
- name: Lesson1
completion_criteria:

min_lesson_length: 100
threshold: 0.001
threshold: 0.00175
value: 0.5
- name: Lesson2
completion_criteria:

min_lesson_length: 100
threshold: 0.0015
threshold: 0.00225
value: 0.1
- name: Lesson3
completion_criteria:

min_lesson_length: 100
threshold: 0.002
threshold: 0.003
env_settings:
num_envs: 2
#env_settings:
# num_envs: 2

2
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


memories=memories,
seq_len=self.policy.sequence_length,
)
obs[-1] = comms[1]
obs[-1] = comms[0]
log_probs, entropy, values = self.policy.evaluate_actions(
obs,

4
ml-agents/mlagents/trainers/torch/distributions.py


) -> torch.Tensor:
# Zero out masked logits, then subtract a large value. Technique mentionend here:
# https://arxiv.org/abs/2006.14171. Our implementation is ONNX and Barracuda-friendly.
if allow_mask.shape[1] == 3:
allow_mask = allow_mask[:, :1].expand(-1, 3)
#if allow_mask.shape[1] == 3:
# allow_mask = allow_mask[:, :1].expand(-1, 3)
block_mask = -1.0 * allow_mask + 1.0
# We do -1 * tensor + constant instead of constant - tensor because it seems
# Barracuda might swap the inputs of a "Sub" operation

6
ml-agents/mlagents/trainers/trajectory.py


for step, exp in enumerate(self.steps):
if step == 0:
# this initial all zeros creates the offset for comms
agent_buffer_trajectory["comm_obs"].append(
np.zeros_like(exp.collab_obs)
)
dummy = [[np.zeros_like(col_ob) for col_ob in exp.collab_obs[_ag]] for _ag in range(len(exp.collab_obs))]
agent_buffer_trajectory["comm_obs"].append(dummy)
if step < len(self.steps) - 1:
next_obs = self.steps[step + 1].obs
else:

正在加载...
取消
保存