num_epoch: 3
learning_rate_schedule: constant
encoder_layers: 3
action_layers: 2
action_feature_size: 128
reuse_encoder: true
in_epoch_alter: false
use_op_buffer: false
separate_value_train: true
train_model: false
load_model: true
train_action: false
load_action: true
train_policy: true
load_policy: false
train_value: true