self.is_training,
self.load,
tanh_squash=True,
resample=True,
)
for _reward_signal in policy.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)