# TouchCube: # trainer: sac # hyperparameters: # batch_size: 128 # buffer_size: 50000 # buffer_init_steps: 0 # hidden_units: 128 # init_entcoef: 1.0 # learning_rate: 3.0e-4 # learning_rate_schedule: constant # max_steps: 5.0e5 # memory_size: 128 # normalize: false # num_update: 1 # train_interval: 1 # steps_per_update: 1 # num_layers: 2 # time_horizon: 64 # sequence_length: 64 # summary_freq: 5000 # tau: 0.005 # use_recurrent: false # vis_encode_type: simple # reward_signals: # extrinsic: # strength: 1.0 # gamma: 0.95 behaviors: TouchCube: trainer_type: ppo hyperparameters: batch_size: 512 #2048 buffer_size: 5120 #20480 learning_rate: 0.0003 beta: 0.005 epsilon: 0.2 lambd: 0.95 num_epoch: 3 learning_rate_schedule: linear network_settings: normalize: true hidden_units: 512 num_layers: 3 vis_encode_type: simple reward_signals: extrinsic: gamma: 0.995 strength: 1.0 output_path: default keep_checkpoints: 5 max_steps: 20000000 time_horizon: 500 #1000 summary_freq: 30000 threaded: true # behaviors: # TouchCube: # trainer_type: sac # hyperparameters: # learning_rate: 0.0003 # learning_rate_schedule: constant # batch_size: 256 # buffer_size: 500000 # buffer_init_steps: 0 # tau: 0.005 # steps_per_update: 30.0 # save_replay_buffer: false # init_entcoef: 1.0 # reward_signal_steps_per_update: 30.0 # network_settings: # normalize: true # hidden_units: 512 # num_layers: 4 # vis_encode_type: simple # reward_signals: # extrinsic: # gamma: 0.995 # strength: 1.0 # output_path: default # keep_checkpoints: 5 # max_steps: 20000000 # time_horizon: 1000 # summary_freq: 30000 # threaded: true