浏览代码

Fix for Discrete observations + Curiosity (#866)

/develop-generalizationTraining-TrainerController
GitHub 6 年前
当前提交
6df07946
共有 3 个文件被更改,包括 30 次插入11 次删除
  1. 2
      python/unitytrainers/models.py
  2. 36
      python/unitytrainers/ppo/models.py
  3. 3
      python/unitytrainers/ppo/trainer.py

2
python/unitytrainers/models.py


:param num_layers: number of hidden layers to create.
:return: List of hidden layer tensors.
"""
with tf.name_scope(scope):
with tf.variable_scope(scope):
vector_in = tf.reshape(observation_input, [-1])
state_onehot = tf.one_hot(vector_in, s_size)
hidden = state_onehot

36
python/unitytrainers/ppo/models.py


encoded_next_state_list.append(hidden_next_visual)
if self.o_size > 0:
# Create input op for next (t+1) vector observation.
self.next_vector_in = tf.placeholder(shape=[None, self.o_size], dtype=tf.float32,
name='next_vector_observation')
encoded_vector_obs = self.create_continuous_observation_encoder(self.vector_in,
self.curiosity_enc_size,
self.swish, 2, "vector_obs_encoder", False)
encoded_next_vector_obs = self.create_continuous_observation_encoder(self.next_vector_in,
self.curiosity_enc_size,
self.swish, 2, "vector_obs_encoder",
True)
if self.brain.vector_observation_space_type == "continuous":
# Create input op for next (t+1) vector observation.
self.next_vector_in = tf.placeholder(shape=[None, self.o_size], dtype=tf.float32,
name='next_vector_observation')
encoded_vector_obs = self.create_continuous_observation_encoder(self.vector_in,
self.curiosity_enc_size,
self.swish, 2, "vector_obs_encoder",
False)
encoded_next_vector_obs = self.create_continuous_observation_encoder(self.next_vector_in,
self.curiosity_enc_size,
self.swish, 2,
"vector_obs_encoder",
True)
else:
self.next_vector_in = tf.placeholder(shape=[None, 1], dtype=tf.int32,
name='next_vector_observation')
encoded_vector_obs = self.create_discrete_observation_encoder(self.vector_in, self.o_size,
self.curiosity_enc_size,
self.swish, 2, "vector_obs_encoder",
False)
encoded_next_vector_obs = self.create_discrete_observation_encoder(self.next_vector_in, self.o_size,
self.curiosity_enc_size,
self.swish, 2, "vector_obs_encoder",
True)
encoded_state_list.append(encoded_vector_obs)
encoded_next_state_list.append(encoded_next_vector_obs)

3
python/unitytrainers/ppo/trainer.py


else:
feed_dict[self.model.vector_in] = np.array(buffer['vector_obs'][start:end]).reshape(
[-1, self.brain.num_stacked_vector_observations])
if self.use_curiosity:
feed_dict[self.model.next_vector_in] = np.array(buffer['next_vector_in'][start:end]) \
.reshape([-1, self.brain.num_stacked_vector_observations])
if self.use_visual_obs:
for i, _ in enumerate(self.model.visual_in):
_obs = np.array(buffer['visual_obs%d' % i][start:end])

正在加载...
取消
保存