浏览代码

fixing tensorflow tests

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
f654df34
共有 5 个文件被更改,包括 8 次插入9 次删除
  1. 6
      ml-agents/mlagents/trainers/policy/tf_policy.py
  2. 5
      ml-agents/mlagents/trainers/tests/tensorflow/test_models.py
  3. 2
      ml-agents/mlagents/trainers/tf/components/bc/model.py
  4. 2
      ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/model.py
  5. 2
      ml-agents/mlagents/trainers/tf/components/reward_signals/gail/model.py

6
ml-agents/mlagents/trainers/policy/tf_policy.py


mask = np.ones(
(
len(batched_step_result),
sum(self.behavior_spec.discrete_action_branches),
sum(self.action_spec.discrete_action_branches),
),
dtype=np.float32,
)

self.mask = tf.cast(self.mask_input, tf.int32)
tf.Variable(
int(self.behavior_spec.is_action_continuous()),
int(self.action_spec.is_action_continuous()),
name="is_continuous_control",
trainable=False,
dtype=tf.int32,

tf.Variable(
self.m_size, name="memory_size", trainable=False, dtype=tf.int32
)
if self.behavior_spec.is_action_continuous():
if self.action_spec.is_action_continuous():
tf.Variable(
self.act_size[0],
name="action_output_shape",

5
ml-agents/mlagents/trainers/tests/tensorflow/test_models.py


from mlagents.trainers.tf.models import ModelUtils
from mlagents.tf_utils import tf
from mlagents_envs.base_env import BehaviorSpec, ActionType
from mlagents_envs.base_env import BehaviorSpec, ActionSpec
ActionType.DISCRETE,
(1,),
ActionSpec(0, (1,)),
)
return behavior_spec

2
ml-agents/mlagents/trainers/tf/components/bc/model.py


self.done_expert = tf.placeholder(shape=[None, 1], dtype=tf.float32)
self.done_policy = tf.placeholder(shape=[None, 1], dtype=tf.float32)
if self.policy.behavior_spec.is_action_continuous():
if self.policy.action_spec.is_action_continuous():
action_length = self.policy.act_size[0]
self.action_in_expert = tf.placeholder(
shape=[None, action_length], dtype=tf.float32

2
ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/model.py


"""
combined_input = tf.concat([encoded_state, encoded_next_state], axis=1)
hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
if self.policy.behavior_spec.is_action_continuous():
if self.policy.action_spec.is_action_continuous():
pred_action = tf.layers.dense(
hidden, self.policy.act_size[0], activation=None
)

2
ml-agents/mlagents/trainers/tf/components/reward_signals/gail/model.py


self.done_expert = tf.expand_dims(self.done_expert_holder, -1)
self.done_policy = tf.expand_dims(self.done_policy_holder, -1)
if self.policy.behavior_spec.is_action_continuous():
if self.policy.action_spec.is_action_continuous():
action_length = self.policy.act_size[0]
self.action_in_expert = tf.placeholder(
shape=[None, action_length], dtype=tf.float32

正在加载...
取消
保存