lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
model_schedule: constant
model_schedule: linear
encoder_layers: 2
action_layers: 2
policy_layers: 2