浏览代码

continuous comms

/continuous-comms
Andrew Cohen 4 年前
当前提交
a20287f7
共有 7 个文件被更改,包括 1067 次插入25 次删除
  1. 12
      Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab
  2. 33
      Project/Assets/ML-Agents/Examples/Hallway/Scenes/HallwayCollab.unity
  3. 22
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayCollabAgent.cs
  4. 8
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  5. 3
      ml-agents/mlagents/trainers/torch/action_model.py
  6. 1001
      Project/Assets/ML-Agents/Examples/Hallway/TFModels/diff-comms.onnx
  7. 13
      Project/Assets/ML-Agents/Examples/Hallway/TFModels/diff-comms.onnx.meta

12
Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab


VectorObservationSize: 3
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 0500000003000000
VectorActionSize: 0500000003000000
m_NumContinuousActions: 3
BranchSizes: 05000000
VectorActionSize:
VectorActionDescriptions: []
VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1

VectorObservationSize: 3
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 0500000003000000
VectorActionSize: 0500000003000000
m_NumContinuousActions: 3
BranchSizes: 05000000
VectorActionSize:
VectorActionDescriptions: []
VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1

33
Project/Assets/ML-Agents/Examples/Hallway/Scenes/HallwayCollab.unity


m_EnableBakedLightmaps: 1
m_EnableRealtimeLightmaps: 1
m_LightmapEditorSettings:
serializedVersion: 10
serializedVersion: 12
m_Resolution: 2
m_BakeResolution: 40
m_AtlasSize: 1024

m_CompAOExponentDirect: 0
m_ExtractAmbientOcclusion: 0
m_Padding: 2
m_LightmapParameters: {fileID: 0}
m_LightmapsBakeMode: 1

m_PVRDirectSampleCount: 32
m_PVRSampleCount: 500
m_PVRBounces: 2
m_PVREnvironmentSampleCount: 500
m_PVREnvironmentReferencePointCount: 2048
m_PVRFilteringMode: 2
m_PVRDenoiserTypeDirect: 0
m_PVRDenoiserTypeIndirect: 0
m_PVRDenoiserTypeAO: 0
m_PVRFilteringMode: 1
m_PVREnvironmentMIS: 0
m_PVRCulling: 1
m_PVRFilteringGaussRadiusDirect: 1
m_PVRFilteringGaussRadiusIndirect: 5

m_PVRFilteringAtrousPositionSigmaAO: 1
m_ShowResolutionOverlay: 1
m_ExportTrainingData: 0
m_TrainingDataDestination: TrainingData
m_LightingDataAsset: {fileID: 112000002, guid: 03723c7f910c3423aa1974f1b9ce8392,
type: 2}
m_UseShadowmask: 1

m_GameObject: {fileID: 255077123}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 1077351063, guid: f70555f144d8491a825f0804e09c671c, type: 3}
m_Script: {fileID: 11500000, guid: 4f231c4fb786f3946a6b90b886c48677, type: 3}
m_Name:
m_EditorClassIdentifier:
m_HorizontalAxis: Horizontal

m_GameObject: {fileID: 255077123}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: -619905303, guid: f70555f144d8491a825f0804e09c671c, type: 3}
m_Script: {fileID: 11500000, guid: 76c392e42b5098c458856cdf6ecaaaa1, type: 3}
m_Name:
m_EditorClassIdentifier:
m_FirstSelected: {fileID: 0}

propertyPath: m_Constraints
value: 126
objectReference: {fileID: 0}
- target: {fileID: 1410733827718496, guid: f3a451555dc514f46a69319857762eda, type: 3}
propertyPath: m_Name
value: symbol1Goal
objectReference: {fileID: 0}
- target: {fileID: 1745841960385024, guid: f3a451555dc514f46a69319857762eda, type: 3}
propertyPath: m_Name
value: HallwayArea (1)

propertyPath: m_RootOrder
value: 5
objectReference: {fileID: 0}
- target: {fileID: 1410733827718496, guid: f3a451555dc514f46a69319857762eda, type: 3}
propertyPath: m_Name
value: symbol1Goal
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: f3a451555dc514f46a69319857762eda, type: 3}
--- !u!1001 &812231986

m_ClearFlags: 1
m_BackGroundColor: {r: 0.46666667, g: 0.5647059, b: 0.60784316, a: 1}
m_projectionMatrixMode: 1
m_GateFitMode: 2
m_FOVAxisMode: 0
m_GateFitMode: 2
m_FocalLength: 50
m_NormalizedViewPortRect:
serializedVersion: 2

objectReference: {fileID: 0}
- target: {fileID: 4726744827719472, guid: f3a451555dc514f46a69319857762eda, type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4919701787480650, guid: f3a451555dc514f46a69319857762eda, type: 3}
propertyPath: m_LocalPosition.x
value: 0
objectReference: {fileID: 0}
m_RemovedComponents: []

22
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayCollabAgent.cs


public bool isSpotter = true;
TextMesh m_MessageText;
TextMesh m_MessageRec;
int m_Message = 0;
//int m_Message = 0;
float[] m_Message = new float[3];
[HideInInspector]

public override void OnEpisodeBegin()
{
m_Message = -1;
//m_Message = -1;
System.Array.Clear(m_Message, 0, m_Message.Length);
var agentOffset = 10f;
if (isSpotter)
{

//{
// sensor.AddObservation(StepCount / (float)MaxStep);
//}
sensor.AddObservation(toOnehot(m_Message));
//sensor.AddObservation(toOnehot(m_Message));
sensor.AddObservation(m_Message);
}
float[] toOnehot(int message)

return onehot;
}
public void tellAgent(int message)
public void tellAgent(float[] message)
//public void tellAgent(int message)
//{
// m_Message = message;
//}
public override void OnActionReceived(ActionBuffers actionBuffers)
{
AddReward(-1f / MaxStep);

}
int comm_act = actionBuffers.DiscreteActions[1];
//int comm_act = actionBuffers.DiscreteActions[1];
float[] comm_act = actionBuffers.ContinuousActions.Array;
m_MessageText.text = "Sent:" + comm_act.ToString();
//m_MessageText.text = "Sent:" + comm_act.ToString();
m_MessageText.text = "Sent:" + comm_act[0].ToString("F2") + ", " + comm_act[1].ToString("F2") + ", " + comm_act[2].ToString("F2");
}
teammate.tellAgent(comm_act);
// if (isSpotter) // Test

8
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


)
# this is a little bit of a hack but is whats recommended in the
# gumbel softmax documentation
one_hot_diff_comms = obs[-1] - comms[1].detach() + comms[1]
obs[-1] = one_hot_diff_comms
#one_hot_diff_comms = obs[-1] - comms[1].detach() + comms[1]
#obs[-1] = one_hot_diff_comms
with torch.no_grad():
rand_n = (obs[-1] - comms.mean) / (comms.std + 1E-7)
obs[-1] = comms.mean + rand_n * comms.std
log_probs, entropy, values = self.policy.evaluate_actions(
obs,

3
ml-agents/mlagents/trainers/torch/action_model.py


:return: Given the input, an AgentAction of the actions generated by the policy and the corresponding
ActionLogProbs and entropies.
"""
comms = self._discrete_distribution.differentiable_forward(inputs, masks)
#comms = self._discrete_distribution.differentiable_forward(inputs, masks)
comms = self._continuous_distribution(inputs)
return comms
def forward(

1001
Project/Assets/ML-Agents/Examples/Hallway/TFModels/diff-comms.onnx
文件差异内容过多而无法显示
查看文件

13
Project/Assets/ML-Agents/Examples/Hallway/TFModels/diff-comms.onnx.meta


fileFormatVersion: 2
guid: 9d5d8d894cf434ee186ecd94a331ba3b
ScriptedImporter:
internalIDToNameTable: []
externalObjects: {}
serializedVersion: 2
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3}
optimizeModel: 1
forceArbitraryBatchSize: 1
treatErrorsAsWarnings: 0
正在加载...
取消
保存