hidden_units: 256
summary_freq: 2000
time_horizon: 128
init_entcoef: 0.1
init_entcoef: 0.5
batch_size: 64
normalize: false
normalize: true
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
curiosity:
strength: 0.02
encoding_size: 256
StrikerLearning:
max_steps: 5.0e5