浏览代码

update sequencer env

/layernorm
Andrew Cohen 4 年前
当前提交
bedf9886
共有 4 个文件被更改,包括 126 次插入13 次删除
  1. 4
      Project/Assets/ML-Agents/Examples/Arena-Sequence/Prefabs/Area.prefab
  2. 13
      Project/Assets/ML-Agents/Examples/Arena-Sequence/Scripts/SequencerAgent.cs
  3. 17
      config/ppo/Sorter.yaml
  4. 105
      config/ppo/Sorter_curriculum.yaml

4
Project/Assets/ML-Agents/Examples/Arena-Sequence/Prefabs/Area.prefab


hasUpgradedFromAgentParameters: 1
MaxStep: 5000
SelectNewTiles: 0
MaxNumberOfTiles: 5
MaxNumberOfTiles: 20
SequenceTilesList:
- {fileID: 3646228404148452640}
- {fileID: 3646228403214730312}

m_Script: {fileID: 11500000, guid: dd8012d5925524537b27131fef517017, type: 3}
m_Name:
m_EditorClassIdentifier:
ObservableSize: 7
ObservableSize: 22
MaxNumObservables: 20
--- !u!1 &3647816068884681782
GameObject:

13
Project/Assets/ML-Agents/Examples/Arena-Sequence/Scripts/SequencerAgent.cs


// public int SpawnIndexPos;
// }
public bool SelectNewTiles;
EnvironmentParameters m_ResetParams;
int m_NumberOfTilesToSpawn;
public int MaxNumberOfTiles;

m_PushBlockSettings = FindObjectOfType<PushBlockSettings>();
m_AgentRb = GetComponent<Rigidbody>();
m_StartingPos = transform.position;
m_ResetParams = Academy.Instance.EnvironmentParameters;
}

public override void OnEpisodeBegin()
{
m_NumberOfTilesToSpawn = Random.Range(1, MaxNumberOfTiles);
MaxNumberOfTiles = (int)m_ResetParams.GetWithDefault("num_tiles", 5);
m_NumberOfTilesToSpawn = Random.Range(1, MaxNumberOfTiles + 1);
SelectTilesToShow();
SetTilePositions();

foreach (var item in CurrentlyVisibleTilesList)
{
float[] listObservation = new float[MaxNumberOfTiles + 2];
float[] listObservation = new float[20 + 2];
listObservation[MaxNumberOfTiles] = (item.transform.localRotation.eulerAngles.y / 360f);
listObservation[MaxNumberOfTiles + 1] = item.visited ? 1.0f : 0.0f;
listObservation[20] = (item.transform.localRotation.eulerAngles.y / 360f);
listObservation[21] = item.visited ? 1.0f : 0.0f;
//Debug.Log(listObservation[20]);
//Debug.Log(listObservation[21]);
//Debug.Log(listObservation[22]);

int numLeft = m_NumberOfTilesToSpawn;
while (numLeft > 0)
{
int rndInt = Random.Range(0, MaxNumberOfTiles);
int rndInt = Random.Range(0, SequenceTilesList.Count);// MaxNumberOfTiles);
var tmp = SequenceTilesList[rndInt];
if (!CurrentlyVisibleTilesList.Contains(tmp))
{

17
config/ppo/Sorter.yaml


Sorter:
trainer_type: ppo
hyperparameters:
batch_size: 256
buffer_size: 10240
batch_size: 512
buffer_size: 40960
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2

network_settings:
normalize: False
hidden_units: 128
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:

keep_checkpoints: 5
max_steps: 50000000
time_horizon: 512
max_steps: 100000000
time_horizon: 256
environment_parameters:
num_tiles:
curriculum:
- name: Lesson9
value: 12.0
num_envs: 8
num_envs: 1

105
config/ppo/Sorter_curriculum.yaml


behaviors:
Sorter:
trainer_type: ppo
hyperparameters:
batch_size: 512
buffer_size: 40960
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: False
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 100000000
time_horizon: 256
summary_freq: 10000
threaded: true
environment_parameters:
num_tiles:
curriculum:
- name: Lesson0 # The '-' is important as this is a list
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.05
value: 2.0
- name: Lesson1 # The '-' is important as this is a list
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.1
value: 4.0
- name: Lesson2 # This is the start of the second lesson
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.15
value: 6.0
- name: Lesson3
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.2
value: 8.0
- name: Lesson4 # The '-' is important as this is a list
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.25
value: 10.0
- name: Lesson5 # This is the start of the second lesson
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.3
value: 12.0
- name: Lesson6
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.35
value: 14.0
- name: Lesson7 # This is the start of the second lesson
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.4
value: 16.0
- name: Lesson8
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.45
value: 18.0
- name: Lesson9
value: 20.0
env_settings:
num_envs: 8
正在加载...
取消
保存