浏览代码

Fix Pytests (#843)

/develop-generalizationTraining-TrainerController
GitHub 7 年前
当前提交
282d5bd4
共有 4 个文件被更改,包括 62 次插入7 次删除
  1. 46
      python/tests/test_bc.py
  2. 14
      python/tests/test_unitytrainers.py
  3. 5
      python/unitytrainers/bc/models.py
  4. 4
      python/unitytrainers/models.py

46
python/tests/test_bc.py


with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete=True, visual_input=False)
env = UnityEnvironment(' ')
model = BehavioralCloningModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.sample_action, model.policy]
feed_dict = {model.batch_size: 2,
model.dropout_rate: 1.0,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
def test_visual_dc_bc_model(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete=True, visual_input=True)
env = UnityEnvironment(' ')
model = BehavioralCloningModel(env.brains["RealFakeBrain"])

run_list = [model.sample_action, model.policy]
feed_dict = {model.batch_size: 2,
model.dropout_rate: 1.0,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),
model.visual_in[0]: np.ones([2, 40, 30, 3])}
sess.run(run_list, feed_dict=feed_dict)
env.close()
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
def test_visual_cc_bc_model(mock_communicator, mock_launcher):
tf.reset_default_graph()
with tf.Session() as sess:
with tf.variable_scope("FakeGraphScope"):
mock_communicator.return_value = MockCommunicator(
discrete=False, visual_input=True)
env = UnityEnvironment(' ')
model = BehavioralCloningModel(env.brains["RealFakeBrain"])
init = tf.global_variables_initializer()
sess.run(init)
run_list = [model.sample_action, model.policy]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]]),

14
python/tests/test_unitytrainers.py


summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
dummy_bc_config = yaml.load('''

summary_freq: 1000
use_recurrent: false
memory_size: 8
use_curiosity: false
curiosity_strength: 0.0
curiosity_enc_size: 1
''')
dummy_bad_config = yaml.load('''

mock_communicator.return_value = MockCommunicator(
discrete=True, visual_input=True)
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '', "tests/test_unitytrainers.py")
1, 1, 1, '', "tests/test_unitytrainers.py", False)
assert(tc.env.brain_names[0] == 'RealFakeBrain')

discrete=True, visual_input=True)
mock_load.return_value = dummy_config
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '','')
1, 1, 1, '','', False)
config = tc._load_config()
assert(len(config) == 1)
assert(config['default']['trainer'] == "ppo")

mock_communicator.return_value = MockCommunicator(
discrete=True, visual_input=True)
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '', "tests/test_unitytrainers.py")
1, 1, 1, '', "tests/test_unitytrainers.py", False)
# Test for PPO trainer
mock_load.return_value = dummy_config

)
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4,
100 * fake_agent_id + 10 * step + 5])
a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=None, sequential=True)
a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True)
assert_array(a, np.array([[171, 172, 173], [181, 182, 183]]))
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True)
assert_array(a, np.array([

5
python/unitytrainers/bc/models.py


if self.use_recurrent:
tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in)
hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
self.sequence_length)
self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False,
self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False, name='pre_action',
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
if brain.vector_action_space_type == "discrete":

4
python/unitytrainers/models.py


activation=tf.nn.elu, reuse=reuse, name="conv_2")
hidden = c_layers.flatten(conv2)
hidden_flat = self.create_continuous_observation_encoder(hidden, h_size, activation, num_layers, scope, reuse)
with tf.variable_scope(scope+'/'+'flat_encoding'):
hidden_flat = self.create_continuous_observation_encoder(hidden, h_size, activation,
num_layers, scope, reuse)
return hidden_flat
@staticmethod

正在加载...
取消
保存