浏览代码

Trim some public fields on the Agent (#3269)

* Triming some of the methods of the agent but left SetReward

* Fixing bugs

* modifying the environments

* Reintroducing IsDone and IsMaxStepReached

* Updating the Migrating doc

* more details on the Migration
/asymm-envs
GitHub 5 年前
当前提交
a1a1126d
共有 28 个文件被更改,包括 647 次插入606 次删除
  1. 2
      UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs
  2. 21
      UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
  3. 25
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  4. 6
      UnitySDK/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
  5. 6
      UnitySDK/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallHardNew.prefab
  6. 6
      UnitySDK/Assets/ML-Agents/Examples/Crawler/Prefabs/DynamicPlatform.prefab
  7. 6
      UnitySDK/Assets/ML-Agents/Examples/Crawler/Prefabs/FixedPlatform.prefab
  8. 2
      UnitySDK/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
  9. 26
      UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab
  10. 21
      UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Prefabs/VisualFoodCollectorArea.prefab
  11. 932
      UnitySDK/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
  12. 6
      UnitySDK/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab
  13. 6
      UnitySDK/Assets/ML-Agents/Examples/Hallway/Prefabs/VisualSymbolFinderArea.prefab
  14. 4
      UnitySDK/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  15. 6
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockArea.prefab
  16. 6
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockVisualArea.prefab
  17. 2
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
  18. 6
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/AreaPB.prefab
  19. 6
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab
  20. 2
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
  21. 6
      UnitySDK/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab
  22. 24
      UnitySDK/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
  23. 12
      UnitySDK/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
  24. 6
      UnitySDK/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPair.prefab
  25. 6
      UnitySDK/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
  26. 87
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  27. 3
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
  28. 12
      docs/Migrating.md

2
UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs


serializedAgent.Update();
var maxSteps = serializedAgent.FindProperty(
"agentParameters.maxStep");
"maxStep");
EditorGUILayout.PropertyField(
maxSteps,

21
UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs


}
}
static List<Agent> GetFakeAgents()
static List<TestAgent> GetFakeAgents()
{
var goA = new GameObject("goA");
var bpA = goA.AddComponent<BehaviorParameters>();

bpB.brainParameters.numStackedVectorObservations = 1;
var agentB = goB.AddComponent<TestAgent>();
var agents = new List<Agent> { agentA, agentB };
var agents = new List<TestAgent> { agentA, agentB };
foreach (var agent in agents)
{
var agentEnableMethod = typeof(Agent).GetMethod("OnEnableHelper",

actionMasks = new[] { true, false, false, false, false },
};
agentA.Info = infoA;
agentB.Info = infoB;
agentA._Info = infoA;
agentB._Info = infoB;
return agents;
}

var agent1 = agentInfos[1];
var inputs = new List<AgentInfoSensorsPair>
{
new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
new AgentInfoSensorsPair{agentInfo = agent0._Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1._Info, sensors = agent1.sensors},
};
generator.Generate(inputTensor, batchSize, inputs);
Assert.IsNotNull(inputTensor.data);

var agent1 = agentInfos[1];
var inputs = new List<AgentInfoSensorsPair>
{
new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
new AgentInfoSensorsPair{agentInfo = agent0._Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1._Info, sensors = agent1.sensors},
};
generator.Generate(inputTensor, batchSize, inputs);
Assert.IsNotNull(inputTensor.data);

var agent1 = agentInfos[1];
var inputs = new List<AgentInfoSensorsPair>
{
new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
new AgentInfoSensorsPair{agentInfo = agent0._Info, sensors = agent0.sensors},
new AgentInfoSensorsPair{agentInfo = agent1._Info, sensors = agent1.sensors},
};
generator.Generate(inputTensor, batchSize, inputs);

25
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


{
public class TestAgent : Agent
{
public AgentInfo _Info
{
get
{
return (AgentInfo)typeof(Agent).GetField("m_Info", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
}
set
{
typeof(Agent).GetField("m_Info", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
}
}
public bool IsDone()
{
return (bool)typeof(Agent).GetField("m_Done", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
}
public int initializeAgentCalls;
public int collectObservationsCalls;
public int agentActionCalls;

var agentEnableMethod = typeof(Agent).GetMethod(
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
decisionRequester.DecisionPeriod = 2;
decisionRequester.Awake();

var agentEnableMethod = typeof(Agent).GetMethod(
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
decisionRequester.DecisionPeriod = 2;

var agentEnableMethod = typeof(Agent).GetMethod(
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
agent1.agentParameters.maxStep = 20;
agent1.maxStep = 20;
agentEnableMethod?.Invoke(agent2, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] { });

6
UnitySDK/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab


m_Script: {fileID: 11500000, guid: aaba48bf82bee4751aa7b89569e57f73, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
ball: {fileID: 1036225416237908}
--- !u!114 &1306725529891448089
MonoBehaviour:

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1533320402322554
GameObject:
m_ObjectHideFlags: 0

6
UnitySDK/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallHardNew.prefab


m_Script: {fileID: 11500000, guid: edf26e11cf4ed42eaa3ffb7b91bb4676, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
ball: {fileID: 1142513601053358}
--- !u!114 &8193279139064749781
MonoBehaviour:

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1978072206102878
GameObject:
m_ObjectHideFlags: 0

6
UnitySDK/Assets/ML-Agents/Examples/Crawler/Prefabs/DynamicPlatform.prefab


m_Script: {fileID: 11500000, guid: 2f37c30a5e8d04117947188818902ef3, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
target: {fileID: 4490950947783742}
ground: {fileID: 4684408634944056}
detectTargets: 1

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 0
offsetStep: 0
--- !u!1 &1520563409393552
GameObject:
m_ObjectHideFlags: 0

6
UnitySDK/Assets/ML-Agents/Examples/Crawler/Prefabs/FixedPlatform.prefab


m_Script: {fileID: 11500000, guid: 2f37c30a5e8d04117947188818902ef3, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
target: {fileID: 4749909135913778}
ground: {fileID: 4856650706546504}
detectTargets: 0

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 0
offsetStep: 0
--- !u!1 &1492926997393242
GameObject:
m_ObjectHideFlags: 0

2
UnitySDK/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs


{
foreach (var bodyPart in m_JdController.bodyPartsDict.Values)
{
if (bodyPart.targetContact && !IsDone() && bodyPart.targetContact.touchingTarget)
if (bodyPart.targetContact && bodyPart.targetContact.touchingTarget)
{
TouchedTarget();
}

26
UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab


m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1482701732800114
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 0
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 0
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 0
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 0
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

21
UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Prefabs/VisualFoodCollectorArea.prefab


m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 0
area: {fileID: 1145096862361766}
turnSpeed: 300
moveSpeed: 2

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 0
area: {fileID: 1145096862361766}
turnSpeed: 300
moveSpeed: 2

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
area: {fileID: 1145096862361766}
turnSpeed: 300
moveSpeed: 2

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1399553220224106
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 0
area: {fileID: 1145096862361766}
turnSpeed: 300
moveSpeed: 2

932
UnitySDK/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
文件差异内容过多而无法显示
查看文件

6
UnitySDK/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab


m_Script: {fileID: 11500000, guid: b446afae240924105b36d07e8d17a608, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 3000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 6
maxStep: 3000
ground: {fileID: 1510027348950282}
area: {fileID: 1745841960385024}
symbolOGoal: {fileID: 1410733827718496}

m_EditorClassIdentifier:
DecisionPeriod: 6
RepeatAction: 1
offsetStep: 0
--- !u!1 &1510027348950282
GameObject:
m_ObjectHideFlags: 0

6
UnitySDK/Assets/ML-Agents/Examples/Hallway/Prefabs/VisualSymbolFinderArea.prefab


m_Script: {fileID: 11500000, guid: b446afae240924105b36d07e8d17a608, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 3000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 6
maxStep: 3000
ground: {fileID: 1625056884785366}
area: {fileID: 1689874756253538}
symbolOGoal: {fileID: 1800868804754718}

m_EditorClassIdentifier:
DecisionPeriod: 6
RepeatAction: 1
offsetStep: 0
--- !u!1 &1377584197416466
GameObject:
m_ObjectHideFlags: 0

4
UnitySDK/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


{
if (useVectorObs)
{
AddVectorObs(GetStepCount() / (float)agentParameters.maxStep);
AddVectorObs(GetStepCount() / (float)maxStep);
}
}

public override void AgentAction(float[] vectorAction)
{
AddReward(-1f / agentParameters.maxStep);
AddReward(-1f / maxStep);
MoveAgent(vectorAction);
}

6
UnitySDK/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockArea.prefab


m_Script: {fileID: 11500000, guid: dea8c4f2604b947e6b7b97750dde87ca, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
ground: {fileID: 1500989011945850}
area: {fileID: 1125452240183160}
areaBounds:

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1500989011945850
GameObject:
m_ObjectHideFlags: 0

6
UnitySDK/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockVisualArea.prefab


m_Script: {fileID: 11500000, guid: dea8c4f2604b947e6b7b97750dde87ca, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
ground: {fileID: 1913379827958244}
area: {fileID: 1632733799967290}
areaBounds:

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1626651094211584
GameObject:
m_ObjectHideFlags: 0

2
UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs


MoveAgent(vectorAction);
// Penalty given each step to encourage agent to finish task quickly.
AddReward(-1f / agentParameters.maxStep);
AddReward(-1f / maxStep);
}
public override float[] Heuristic()

6
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/AreaPB.prefab


m_Script: {fileID: 11500000, guid: b8db44472779248d3be46895c4d562d5, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
area: {fileID: 1464170487903594}
areaSwitch: {fileID: 1432086782037750}
useVectorObs: 1

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1148882946833254
GameObject:
m_ObjectHideFlags: 0

6
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab


m_Script: {fileID: 11500000, guid: b8db44472779248d3be46895c4d562d5, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
area: {fileID: 1055559745433172}
areaSwitch: {fileID: 1212218760704844}
useVectorObs: 0

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1747856067778386
GameObject:
m_ObjectHideFlags: 0

2
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs


public override void AgentAction(float[] vectorAction)
{
AddReward(-1f / agentParameters.maxStep);
AddReward(-1f / maxStep);
MoveAgent(vectorAction);
}

6
UnitySDK/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab


m_Script: {fileID: 11500000, guid: 220b156e3b142406c8b76d4db981d044, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 4000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 4
maxStep: 4000
pendulumA: {fileID: 1644872085946016}
pendulumB: {fileID: 1053261483945176}
hand: {fileID: 1654288206095398}

m_EditorClassIdentifier:
DecisionPeriod: 4
RepeatAction: 1
offsetStep: 0
--- !u!1 &1644872085946016
GameObject:
m_ObjectHideFlags: 0

24
UnitySDK/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab


m_Script: {fileID: 11500000, guid: 2a2688ef4a36349f9aa010020c32d198, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 3000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 3000
team: 0
agentRole: 0
area: {fileID: 114559182131992928}

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1100217258374548
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 2a2688ef4a36349f9aa010020c32d198, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 3000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 3000
team: 1
agentRole: 1
area: {fileID: 114559182131992928}

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1131626411948014
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 2a2688ef4a36349f9aa010020c32d198, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 3000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 3000
team: 1
agentRole: 0
area: {fileID: 114559182131992928}

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1141134673700168
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 2a2688ef4a36349f9aa010020c32d198, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 3000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 3000
team: 0
agentRole: 1
area: {fileID: 114559182131992928}

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1931023723143276
GameObject:
m_ObjectHideFlags: 0

12
UnitySDK/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab


m_Script: {fileID: 11500000, guid: e51a3fb0b3186433ea84fc1e0549cc91, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
ball: {fileID: 1273406647218856}
invertX: 0
score: 0

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1194790474478638
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: e51a3fb0b3186433ea84fc1e0549cc91, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
ball: {fileID: 1273406647218856}
invertX: 1
score: 0

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1969551055586186
GameObject:
m_ObjectHideFlags: 0

6
UnitySDK/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPair.prefab


m_Script: {fileID: 11500000, guid: ccb0f85f0009540d7ad997952e2aed7b, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 5000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 5000
target: {fileID: 4085853164035250}
hips: {fileID: 4333477265252406}
chest: {fileID: 4027052323869384}

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 0
offsetStep: 0
--- !u!1 &1907539933197724
GameObject:
m_ObjectHideFlags: 0

6
UnitySDK/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab


m_Script: {fileID: 11500000, guid: 676fca959b8ee45539773905ca71afa1, type: 3}
m_Name:
m_EditorClassIdentifier:
agentParameters:
maxStep: 2000
onDemandDecision: 0
numberOfActionsBetweenDecisions: 5
maxStep: 2000
noWallBrain: {fileID: 11400000, guid: fb2ce36eb40b6480e94ea0b5d7573e47, type: 3}
smallWallBrain: {fileID: 11400000, guid: fb2ce36eb40b6480e94ea0b5d7573e47, type: 3}
bigWallBrain: {fileID: 11400000, guid: 0468bf44b1efd4992b6bf22cadb50d89, type: 3}

m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
--- !u!1 &1264699583886832
GameObject:
m_ObjectHideFlags: 0

87
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


public struct AgentAction
{
public float[] vectorActions;
public float value;
/// <summary>
/// Struct that contains all the Agent-specific parameters provided in the
/// Editor. This excludes the Brain linked to the Agent since it can be
/// modified programmatically.
/// </summary>
[Serializable]
public class AgentParameters
{
/// <summary>
/// The maximum number of steps the agent takes before being done.
/// </summary>
/// <remarks>
/// If set to 0, the agent can only be set to done programmatically (or
/// when the Academy is done).
/// If set to any positive integer, the agent will be set to done after
/// that many steps. Note that setting the max step to a value greater
/// than the academy max step value renders it useless.
/// </remarks>
public int maxStep;
}
/// <summary>

BehaviorParameters m_PolicyFactory;
/// <summary>
/// Agent parameters specified within the Editor via AgentEditor.
/// The maximum number of steps the agent takes before being done.
[HideInInspector] public AgentParameters agentParameters;
/// <remarks>
/// If set to 0, the agent can only be set to done programmatically (or
/// when the Academy is done).
/// If set to any positive integer, the agent will be set to done after
/// that many steps. Note that setting the max step to a value greater
/// than the academy max step value renders it useless.
/// </remarks>
[HideInInspector] public int maxStep;
public AgentInfo Info
{
get { return m_Info; }
set { m_Info = value; }
}
/// Current Agent action (message sent from Brain).
AgentAction m_Action;

}
/// <summary>
/// Returns the current step counter (within the current epside).
/// Returns the current step counter (within the current episode).
/// </summary>
/// <returns>
/// Current episode number.

return m_StepCount;
}
/// <summary>
/// Resets the step reward and possibly the episode reward for the agent.
/// </summary>
public void ResetReward()
{
m_Reward = 0f;
if (m_Done)
{
m_CumulativeReward = 0f;
}
}
/// <summary>

#endif
m_Reward += increment;
m_CumulativeReward += increment;
}
/// <summary>
/// Retrieves the step reward for the Agent.
/// </summary>
/// <returns>The step reward.</returns>
public float GetReward()
{
return m_Reward;
}
/// <summary>

}
/// <summary>
/// Updates the value of the agent.
/// </summary>
public void UpdateValueAction(float value)
{
m_Action.value = value;
}
protected float GetValueEstimate()
{
return m_Action.value;
}
/// <summary>
/// Scales continuous action from [-1, 1] to arbitrary range.
/// </summary>
/// <param name="rawAction"></param>

/// Signals the agent that it must reset if its done flag is set to true.
void ResetIfDone()
{
if (IsDone())
if (m_Done)
{
_AgentReset();
}

void SendInfo()
{
// If the Agent is done, it has just reset and thus requires a new decision
if (m_RequestDecision || IsDone())
if (m_RequestDecision || m_Done)
ResetReward();
m_Reward = 0f;
if (m_Done)
{
m_CumulativeReward = 0f;
}
m_Done = false;
m_MaxStepReached = false;
m_RequestDecision = false;

AgentAction(m_Action.vectorActions);
}
if ((m_StepCount >= agentParameters.maxStep)
&& (agentParameters.maxStep > 0))
if ((m_StepCount >= maxStep) && (maxStep > 0))
{
m_MaxStepReached = true;
Done();

3
UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs


{
return new AgentAction
{
vectorActions = aap.VectorActions.ToArray(),
value = aap.Value,
vectorActions = aap.VectorActions.ToArray()
};
}

12
docs/Migrating.md


## Migrating from 0.13 to latest
### Important changes
* The `Decision Period` and `On Demand decision` checkbox have been removed from the Agent. On demand decision is now the default (calling `RequestDecision` on the Agent manually.)
* The Academy class was changed to a singleton, and its virtual methods were removed.
* Trainer steps are now counted per-Agent, not per-environment as in previous versions. For instance, if you have 10 Agents in the scene, 20 environment steps now corresponds to 200 steps as printed in the terminal and in Tensorboard.
* Curriculum config files are now YAML formatted and all curricula for a training run are combined into a single file.

* The `Decision Period` and `On Demand decision` checkbox have been removed from the Agent. On demand decision is now the default (calling `RequestDecision` on the Agent manually.)
* The `agentParameters` field of the Agent has been removed. (Contained only `maxStep` information)
* `maxStep` is now a public field on the Agent. (Was moved from `agentParameters`)
* The `Info` field of the Agent has been made private. (Was only used internally and not meant to be modified outside of the Agent)
* The `GetReward()` method on the Agent has been removed. (It was being confused with `GetCumulativeReward()`)
* The `AgentAction` struct no longer contains a `value` field. (Value estimates were not set during inference)
* The `GetValueEstimate()` method on the Agent has been removed.
* The `UpdateValueAction()` method on the Agent has been removed.
* If you were not using `On Demand Decision` for your Agent, you **must** add a `DecisionRequester` component to your Agent GameObject and set its `Decision Period` field to the old `Decision Period` of the Agent.
* If you have a class that inherits from Academy:
* If the class didn't override any of the virtual methods and didn't store any additional data, you can just remove the old script from the scene.
* If the class had additional data, create a new MonoBehaviour and store the data on this instead.

* Combine curriculum configs into a single file. See [the WallJump curricula](../config/curricula/wall_jump.yaml) for an example of the new curriculum config format.
A tool like https://www.json2yaml.com may be useful to help with the conversion.
* If your Agent implements `AgentOnDone` and your Agent does not have the checkbox `Reset On Done` checked in the inspector, you must call the code that was in `AgentOnDone` manually.
* If you were not using `On Demand Decision` for your Agent, you must add a `DecisionRequester` component to your Agent GameObject and set its `Decision Period` field to the old `Decision Period` of the Agent.
## Migrating from ML-Agents toolkit v0.12.0 to v0.13.0

正在加载...
取消
保存