epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
learning_rate_schedule: linear
model_schedule: linear
encoder_layers: 2
action_layers: 2