浏览代码

[Semantics] Modified the placeholder names (#381)

/develop-generalizationTraining-TrainerController
GitHub 6 年前
当前提交
a7c9096f
共有 7 个文件被更改,包括 40 次插入40 次删除
  1. 6
      python/tests/test_bc.py
  2. 6
      python/tests/test_ppo.py
  3. 8
      python/tests/test_unitytrainers.py
  4. 12
      python/unitytrainers/bc/trainer.py
  5. 30
      python/unitytrainers/models.py
  6. 16
      python/unitytrainers/ppo/trainer.py
  7. 2
      unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs

6
python/tests/test_bc.py


run_list = [model.sample_action, model.policy]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.state_in: np.array([[1, 2, 3, 1, 2, 3],
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()

feed_dict = {model.batch_size: 2,
model.dropout_rate: 1.0,
model.sequence_length: 1,
model.state_in: np.array([[1, 2, 3, 1, 2, 3],
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
model.observation_in[0]: np.ones([2, 40, 30, 3])}
model.visual_in[0]: np.ones([2, 40, 30, 3])}
sess.run(run_list, feed_dict=feed_dict)
env.close()

6
python/tests/test_ppo.py


model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.state_in: np.array([[1, 2, 3, 1, 2, 3],
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
[3, 4, 5, 3, 4, 5]])}
sess.run(run_list, feed_dict=feed_dict)
env.close()

model.learning_rate]
feed_dict = {model.batch_size: 2,
model.sequence_length: 1,
model.state_in: np.array([[1, 2, 3, 1, 2, 3],
model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
model.observation_in[0]: np.ones([2, 40, 30, 3])
model.visual_in[0]: np.ones([2, 40, 30, 3])
}
sess.run(run_list, feed_dict=feed_dict)
env.close()

8
python/tests/test_unitytrainers.py


b = Buffer()
for fake_agent_id in range(4):
for step in range(9):
b[fake_agent_id]['state'].append(
b[fake_agent_id]['vector_observation'].append(
[100 * fake_agent_id + 10 * step + 1,
100 * fake_agent_id + 10 * step + 2,
100 * fake_agent_id + 10 * step + 3]

a = b[1]['state'].get_batch(batch_size=2, training_length=None, sequential=True)
a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=None, sequential=True)
a = b[2]['state'].get_batch(batch_size=2, training_length=3, sequential=True)
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True)
a = b[2]['state'].get_batch(batch_size=2, training_length=3, sequential=False)
a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False)
assert_array(a, np.array([
[[251, 252, 253], [261, 262, 263], [271, 272, 273]],
[[261, 262, 263], [271, 272, 273], [281, 282, 283]]

12
python/unitytrainers/bc/trainer.py


run_list = [self.model.sample_action]
if self.use_observations:
for i, _ in enumerate(agent_brain.visual_observations):
feed_dict[self.model.observation_in[i]] = agent_brain.visual_observations[i]
feed_dict[self.model.visual_in[i]] = agent_brain.visual_observations[i]
feed_dict[self.model.state_in] = agent_brain.vector_observations
feed_dict[self.model.vector_in] = agent_brain.vector_observations
if self.use_recurrent:
if agent_brain.memories.shape[1] == 0:
agent_brain.memories = np.zeros((len(agent_brain.agents), self.m_size))

self.model.batch_size: self.n_sequences,
self.model.sequence_length: self.sequence_length}
if not self.is_continuous:
feed_dict[self.model.state_in] = batch_states.reshape([-1, 1])
feed_dict[self.model.vector_in] = batch_states.reshape([-1, 1])
feed_dict[self.model.state_in] = batch_states.reshape([-1, self.brain.vector_observation_space_size *
feed_dict[self.model.vector_in] = batch_states.reshape([-1, self.brain.vector_observation_space_size *
for i, _ in enumerate(self.model.observation_in):
for i, _ in enumerate(self.model.visual_in):
feed_dict[self.model.observation_in[i]] = _obs.reshape([-1, _w, _h, _c])
feed_dict[self.model.visual_in[i]] = _obs.reshape([-1, _w, _h, _c])
if self.use_recurrent:
feed_dict[self.model.memory_in] = np.zeros([self.n_sequences, self.m_size])

30
python/unitytrainers/models.py


class LearningModel(object):
def __init__(self, m_size, normalize, use_recurrent, brain):
self.brain = brain
self.state_in = None
self.vector_in = None
self.observation_in = []
self.visual_in = []
self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
self.sequence_length = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length')
self.m_size = m_size

else:
c_channels = 3
observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, c_channels], dtype=tf.float32, name=name)
return observation_in
visual_in = tf.placeholder(shape=[None, o_size_h, o_size_w, c_channels], dtype=tf.float32, name=name)
return visual_in
self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state')
self.vector_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='vector_observation')
if self.normalize:
self.running_mean = tf.get_variable("running_mean", [s_size], trainable=False, dtype=tf.float32,
initializer=tf.zeros_initializer())

self.update_mean = tf.assign(self.running_mean, self.new_mean)
self.update_variance = tf.assign(self.running_variance, self.new_variance)
self.normalized_state = tf.clip_by_value((self.state_in - self.running_mean) / tf.sqrt(
self.normalized_state = tf.clip_by_value((self.vector_in - self.running_mean) / tf.sqrt(
self.normalized_state = self.state_in
self.normalized_state = self.vector_in
self.state_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='state')
self.vector_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='vector_observation')
def create_continuous_state_encoder(self, h_size, activation, num_layers):
"""

:param num_layers: number of hidden layers to create.
:return: List of hidden layer tensors.
"""
conv1 = tf.layers.conv2d(self.observation_in[-1], 16, kernel_size=[8, 8], strides=[4, 4],
conv1 = tf.layers.conv2d(self.visual_in[-1], 16, kernel_size=[8, 8], strides=[4, 4],
activation=tf.nn.elu)
conv2 = tf.layers.conv2d(conv1, 32, kernel_size=[4, 4], strides=[2, 2],
activation=tf.nn.elu)

:param num_layers: number of hidden layers to create.
:return: List of hidden layer tensors.
"""
state_in = tf.reshape(self.state_in, [-1])
state_onehot = c_layers.one_hot_encoding(state_in, s_size)
vector_in = tf.reshape(self.vector_in, [-1])
state_onehot = c_layers.one_hot_encoding(vector_in, s_size)
hidden = state_onehot
for j in range(num_layers):
hidden = tf.layers.dense(hidden, h_size, use_bias=False, activation=activation)

else:
activation_fn = self.swish
self.observation_in = []
self.visual_in = []
self.observation_in.append(visual_input)
self.visual_in.append(visual_input)
self.create_vector_input(s_size)
final_hiddens = []

_half_point = int(m_size / 2)
with tf.variable_scope(name):
rnn_cell = tf.contrib.rnn.BasicLSTMCell(_half_point)
lstm_state_in = tf.contrib.rnn.LSTMStateTuple(memory_in[:, :_half_point], memory_in[:, _half_point:])
lstm_vector_in = tf.contrib.rnn.LSTMStateTuple(memory_in[:, :_half_point], memory_in[:, _half_point:])
initial_state=lstm_state_in,
initial_state=lstm_vector_in,
time_major=False,
dtype=tf.float32)

16
python/unitytrainers/ppo/trainer.py


feed_dict[self.model.prev_action] = np.reshape(curr_brain_info.previous_vector_actions, [-1])
if self.use_observations:
for i, _ in enumerate(curr_brain_info.visual_observations):
feed_dict[self.model.observation_in[i]] = curr_brain_info.visual_observations[i]
feed_dict[self.model.visual_in[i]] = curr_brain_info.visual_observations[i]
feed_dict[self.model.state_in] = curr_brain_info.vector_observations
feed_dict[self.model.vector_in] = curr_brain_info.vector_observations
if self.use_recurrent:
if curr_brain_info.memories.shape[1] == 0:
curr_brain_info.memories = np.zeros((len(curr_brain_info.agents), self.m_size))

feed_dict = {self.model.batch_size: len(info.vector_observations), self.model.sequence_length: 1}
if self.use_observations:
for i in range(len(info.visual_observations)):
feed_dict[self.model.observation_in[i]] = info.visual_observations[i]
feed_dict[self.model.visual_in[i]] = info.visual_observations[i]
feed_dict[self.model.state_in] = info.vector_observations
feed_dict[self.model.vector_in] = info.vector_observations
if self.use_recurrent:
if info.memories.shape[1] == 0:
info.memories = np.zeros((len(info.vector_observations), self.m_size))

_buffer['prev_action'][start:end]).reshape([-1])
if self.use_states:
if self.brain.vector_observation_space_type == "continuous":
feed_dict[self.model.state_in] = np.array(
feed_dict[self.model.vector_in] = np.array(
feed_dict[self.model.state_in] = np.array(
feed_dict[self.model.vector_in] = np.array(
for i, _ in enumerate(self.model.observation_in):
for i, _ in enumerate(self.model.visual_in):
feed_dict[self.model.observation_in[i]] = _obs.reshape([-1, _w, _h, _c])
feed_dict[self.model.visual_in[i]] = _obs.reshape([-1, _w, _h, _c])
# Memories are zeros
if self.use_recurrent:
# feed_dict[self.model.memory_in] = np.zeros([batch_size, self.m_size])

2
unity-environment/Assets/ML-Agents/Scripts/CoreBrainInternal.cs


/// Modify only in inspector : Name of the placholder of the batch size
public string BatchSizePlaceholderName = "batch_size";
/// Modify only in inspector : Name of the state placeholder
public string VectorObservationPlacholderName = "state";
public string VectorObservationPlacholderName = "vector_observation";
/// Modify only in inspector : Name of the recurrent input
public string RecurrentInPlaceholderName = "recurrent_in";
/// Modify only in inspector : Name of the recurrent output

正在加载...
取消
保存