init_entcoef: 0.5
buffer_init_steps: 1000
buffer_size: 50000
max_steps: 5.0e5
max_steps: 50000
summary_freq: 2000
time_horizon: 5
reward_signals:
hidden_units: 256
beta: 5.0e-3
buffer_size: 256