|
|
|
|
|
|
mask = np.ones( |
|
|
|
( |
|
|
|
len(batched_step_result), |
|
|
|
sum(self.behavior_spec.discrete_action_branches), |
|
|
|
sum(self.action_spec.discrete_action_branches), |
|
|
|
), |
|
|
|
dtype=np.float32, |
|
|
|
) |
|
|
|
|
|
|
self.mask = tf.cast(self.mask_input, tf.int32) |
|
|
|
|
|
|
|
tf.Variable( |
|
|
|
int(self.behavior_spec.is_action_continuous()), |
|
|
|
int(self.action_spec.is_action_continuous()), |
|
|
|
name="is_continuous_control", |
|
|
|
trainable=False, |
|
|
|
dtype=tf.int32, |
|
|
|
|
|
|
tf.Variable( |
|
|
|
self.m_size, name="memory_size", trainable=False, dtype=tf.int32 |
|
|
|
) |
|
|
|
if self.behavior_spec.is_action_continuous(): |
|
|
|
if self.action_spec.is_action_continuous(): |
|
|
|
tf.Variable( |
|
|
|
self.act_size[0], |
|
|
|
name="action_output_shape", |
|
|
|