ml-agents/config/ppo/Match3.yaml


								default_settings:

								  trainer_type: ppo

								  hyperparameters:

								    batch_size: 16

								    buffer_size: 120

								    learning_rate: 0.0003

								    beta: 0.005

								    epsilon: 0.2

								    lambd: 0.99

								    num_epoch: 3

								    learning_rate_schedule: constant

								  network_settings:

								    normalize: true

								    hidden_units: 256

								    num_layers: 4

								    vis_encode_type: match3

								  reward_signals:

								    extrinsic:

								      gamma: 0.99

								      strength: 1.0

								  keep_checkpoints: 5

								  max_steps: 5000000

								  time_horizon: 128

								  summary_freq: 10000


								behaviors:

								  Match3SimpleHeuristic:

								    # Settings can be very simple since we don't care about actually training the model

								    trainer_type: ppo

								    hyperparameters:

								      batch_size: 16

								      buffer_size: 120

								    network_settings:

								      hidden_units: 4

								      num_layers: 1

								    max_steps: 5000000

								    summary_freq: 10000

								  Match3SmartHeuristic:

								    # Settings can be very simple since we don't care about actually training the model

								    trainer_type: ppo

								    hyperparameters:

								      batch_size: 16

								      buffer_size: 120

								    network_settings:

								      hidden_units: 4

								      num_layers: 1

								    max_steps: 5000000

								    summary_freq: 10000