|
|
|
|
|
|
update_buffer[RewardSignalUtil.rewards_key("extrinsic")] = update_buffer[ |
|
|
|
BufferKey.ENVIRONMENT_REWARDS |
|
|
|
] |
|
|
|
# Mock out value memories |
|
|
|
update_buffer[BufferKey.CRITIC_MEMORY] = update_buffer[BufferKey.MEMORY] |
|
|
|
return_stats = optimizer.update( |
|
|
|
update_buffer, |
|
|
|
num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length, |
|
|
|