|
|
|
|
|
|
self.policy.sequence_length_ph: self.policy.sequence_length, |
|
|
|
} |
|
|
|
feed_dict[self.model.action_in_expert] = mini_batch_demo["actions"] |
|
|
|
if self.policy.behavior_spec.is_action_discrete(): |
|
|
|
if self.policy.action_spec.is_action_discrete(): |
|
|
|
sum(self.policy.behavior_spec.discrete_action_branches), |
|
|
|
sum(self.policy.action_spec.discrete_action_branches), |
|
|
|
), |
|
|
|
dtype=np.float32, |
|
|
|
) |
|
|
|