浏览代码

-

/bullet-hell-barracuda-test-1.3.1
vincentpierre 4 年前
当前提交
48bd37ee
共有 3 个文件被更改,包括 121 次插入38 次删除
  1. 26
      Project/Assets/ML-Agents/Examples/Arena-Sequence/Scripts/SequencerAgent.cs
  2. 105
      config/ppo/Sorter_curriculum.yaml
  3. 28
      config/ppo/Sorter.yaml

26
Project/Assets/ML-Agents/Examples/Arena-Sequence/Scripts/SequencerAgent.cs


public class SequencerAgent : Agent
{
private const int k_HighestTileValue = 20;
public bool SelectNewTiles;
int m_NumberOfTilesToSpawn;

private Vector3 m_StartingPos;
GameObject m_Area;
EnvironmentParameters m_ResetParams;
// private SequenceTile m_NextExpectedTile;

public override void Initialize()
{
m_Area = transform.parent.gameObject;
m_MaxNumberOfTiles = SequenceTilesList.Count;
m_MaxNumberOfTiles = k_HighestTileValue;
m_ResetParams = Academy.Instance.EnvironmentParameters;
m_BufferSensor = GetComponent<BufferSensorComponent>();
m_PushBlockSettings = FindObjectOfType<PushBlockSettings>();
m_AgentRb = GetComponent<Rigidbody>();

/// </summary>
public override void OnEpisodeBegin()
{
m_MaxNumberOfTiles = (int)m_ResetParams.GetWithDefault("num_tiles", 5);
m_NumberOfTilesToSpawn = Random.Range(1, m_MaxNumberOfTiles);
m_NumberOfTilesToSpawn = Random.Range(1, m_MaxNumberOfTiles + 1);
SelectTilesToShow();
SetTilePositions();

foreach (var item in CurrentlyVisibleTilesList)
{
float[] listObservation = new float[m_MaxNumberOfTiles + 2];
float[] listObservation = new float[k_HighestTileValue + 2];
listObservation[m_MaxNumberOfTiles] = (item.transform.localRotation.eulerAngles.y / 360f);
listObservation[m_MaxNumberOfTiles + 1] = item.visited ? 1.0f : 0.0f;
listObservation[k_HighestTileValue] = (item.transform.localRotation.eulerAngles.y / 360f);
listObservation[k_HighestTileValue + 1] = item.visited ? 1.0f : 0.0f;
//Debug.Log(listObservation[20]);
//Debug.Log(listObservation[21]);
//Debug.Log(listObservation[22]);

int rndPosIndx = 0;
while (!posChosen)
{
rndPosIndx = Random.Range(0, SequenceTilesList.Count);
rndPosIndx = Random.Range(0, k_HighestTileValue);
if (!m_UsedPositionsList.Contains(rndPosIndx))
{
m_UsedPositionsList.Add(rndPosIndx);

item.transform.localRotation = Quaternion.Euler(0, rndPosIndx * (360f / SequenceTilesList.Count), 0);
item.transform.localRotation = Quaternion.Euler(0, rndPosIndx * (360f / k_HighestTileValue), 0);
item.rend.sharedMaterial = TileMaterial;
item.gameObject.SetActive(true);
}

int numLeft = m_NumberOfTilesToSpawn;
while (numLeft > 0)
{
int rndInt = Random.Range(0, m_MaxNumberOfTiles);
int rndInt = Random.Range(0, k_HighestTileValue);
var tmp = SequenceTilesList[rndInt];
if (!CurrentlyVisibleTilesList.Contains(tmp))
{

m_AgentRb.AddForce(dirToGo * m_PushBlockSettings.agentRunSpeed,
ForceMode.VelocityChange);
}
}
/// <summary>
/// Called every step of the engine. Here the agent takes an action.

public override void Heuristic(in ActionBuffers actionsOut)
{
var discreteActionsOut = actionsOut.DiscreteActions;
var discreteActionsOut = actionsOut.DiscreteActions;
discreteActionsOut.Clear();
//forward
if (Input.GetKey(KeyCode.W))

105
config/ppo/Sorter_curriculum.yaml


behaviors:
Sorter:
trainer_type: ppo
hyperparameters:
batch_size: 512
buffer_size: 40960
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: False
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 100000000
time_horizon: 256
summary_freq: 10000
threaded: true
environment_parameters:
num_tiles:
curriculum:
- name: Lesson0 # The '-' is important as this is a list
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.05
value: 2.0
- name: Lesson1 # The '-' is important as this is a list
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.1
value: 4.0
- name: Lesson2 # This is the start of the second lesson
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.15
value: 6.0
- name: Lesson3
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.2
value: 8.0
- name: Lesson4 # The '-' is important as this is a list
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.25
value: 10.0
- name: Lesson5 # This is the start of the second lesson
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.3
value: 12.0
- name: Lesson6
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.35
value: 14.0
- name: Lesson7 # This is the start of the second lesson
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.4
value: 16.0
- name: Lesson8
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.45
value: 18.0
- name: Lesson9
value: 20.0
env_settings:
num_envs: 8

28
config/ppo/Sorter.yaml


behaviors:
Sorter:
trainer_type: ppo
hyperparameters:
batch_size: 256
buffer_size: 10240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: False
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 512
summary_freq: 10000
threaded: true
env_settings:
num_envs: 8
正在加载...
取消
保存