cubewar and tennis stability test

5 年前 · 4ba0d98c
--- a/Project/Assets/ML-Agents/Examples/CubeWars/Prefabs/CubeWarArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/CubeWars/Prefabs/CubeWarArea.prefab
  m_GameObject: {fileID: 1265511327613192}
  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
  m_LocalPosition: {x: 0, y: 0.5, z: 0}
-  m_LocalScale: {x: 1, y: 2, z: 1}
+  m_LocalScale: {x: 2, y: 2, z: 2}
  m_Children:
  - {fileID: 4430881949022472}
  - {fileID: 4534035899647546}
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 3
-    numStackedVectorObservations: 1
-    vectorActionSize: 0300000003000000030000000200000002000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 3
+    NumStackedVectorObservations: 1
+    VectorActionSize: 0300000003000000030000000200000002000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
  m_Model: {fileID: 11400000, guid: fa881d0c4f6b44ea9880a781d0771fc9, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_RaysPerDirection: 20
  m_MaxRayDegrees: 180
  m_SphereCastRadius: 0.5
-  m_RayLength: 20
+  m_RayLength: 40
  m_RayLayerMask:
    serializedVersion: 2
    m_Bits: 4294967291
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
  TakeActionsBetweenDecisions: 1
-  offsetStep: 0
 --- !u!114 &690297395971514217
 MonoBehaviour:
  m_ObjectHideFlags: 0
  agentParameters:
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
-  maxStep: 3000
+  MaxStep: 3000
  area: {fileID: 1819751139121548}
  turnSpeed: 150
  moveSpeed: 1
--- a/Project/Assets/ML-Agents/Examples/CubeWars/Scenes/CubeWar.unity
+++ b/Project/Assets/ML-Agents/Examples/CubeWars/Scenes/CubeWar.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.44971216, g: 0.49977785, b: 0.5756371, a: 1}
+  m_IndirectSpecularColor: {r: 0.4497121, g: 0.49977785, b: 0.57563704, a: 1}
  m_UseRadianceAmbientProbe: 0
 --- !u!157 &3
 LightmapSettings:
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 1009000883}
  m_LocalRotation: {x: 0.2588191, y: 0, z: 0, w: 0.9659258}
-  m_LocalPosition: {x: 0, y: 75, z: -140}
+  m_LocalPosition: {x: 0, y: 100, z: -215}
  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children: []
  m_Father: {fileID: 0}
--- a/Project/Assets/ML-Agents/Examples/CubeWars/Scripts/SmallCubeAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/CubeWars/Scripts/SmallCubeAgent.cs
    public override void Initialize()
    {
        m_AgentRb = GetComponent<Rigidbody>();
+        //m_AgentRb.useGravity = true;
+        m_ResetParams = Academy.Instance.EnvironmentParameters; 
        SetResetParameters();
    }

    {
        if (m_HitPoints <= 1f && m_HitPoints > .5f)
        {
+            m_Dead = false;
            gameObject.tag = "StrongSmallAgent";
            myBody.GetComponentInChildren<Renderer>().material = normalMaterial;
        }
+            m_Dead = false;
            gameObject.tag = "WeakSmallAgent";
            myBody.GetComponentInChildren<Renderer>().material = weakMaterial;

--- a/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs

    public override void Initialize()
    {
-        m_Existential = 1f / MaxStep;
+        m_Existential = 1f / (2f * MaxStep);
        m_AgentRb = GetComponent<Rigidbody>();
        m_BallRb = ball.GetComponent<Rigidbody>();
        m_BallScript = ball.GetComponent<HitWall>();
        var rgV = m_AgentRb.velocity;
        m_AgentRb.velocity = new Vector3(Mathf.Clamp(rgV.x, -20, 20), Mathf.Min(rgV.y, 10f), rgV.z);

-        //timePenalty -= m_Existential;
+        timePenalty -= m_Existential;
        m_TextComponent.text = score.ToString();
    }

--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
    time_horizon: 50
    normalize: true
    self_play:
-        window: 100
+        window: 10
        play_against_latest_model_ratio: 0.5
        save_steps: 50000
        swap_steps: 2000

 SmallCubeSoldier:
    normalize: false
-    max_steps: 5.0e7
+    max_steps: 1.0e8
+    beta: 1.0e-2
    time_horizon: 1000
    num_layers: 2
    self_play:
-        swap_steps: 150000
+        swap_steps: 15000
-    max_steps: 5.0e7
+    max_steps: 1.0e8
+    beta: 1.0e-2
    time_horizon: 1000
    num_layers: 2
    self_play:
-        swap_steps: 17000
+        swap_steps: 1700
        team_change: 200000