init_reward_expert = gail_rp.evaluate(buffer_expert)[0]
init_reward_policy = gail_rp.evaluate(buffer_policy)[0]
for _ in range(10):
for _ in range(20):
gail_rp.update(buffer_policy)
reward_expert = gail_rp.evaluate(buffer_expert)[0]
reward_policy = gail_rp.evaluate(buffer_policy)[0]
)
self._estimator = torch.nn.Sequential(
linear_layer(estimator_input_size, 1), torch.nn.Sigmoid()
linear_layer(estimator_input_size, 1, kernel_gain=0.2), torch.nn.Sigmoid()
def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor: