浏览代码

Fix naming conflict between Curiosity and GAIL (#2406)

/hotfix-v0.9.2a
GitHub 6 年前
当前提交
ab690b93
共有 2 个文件被更改,包括 16 次插入16 次删除
  1. 12
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  2. 20
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py

12
ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py


# Create input ops for next (t+1) visual observations.
next_visual_input = LearningModel.create_visual_input(
self.policy_model.brain.camera_resolutions[i],
name="next_visual_observation_" + str(i),
name="curiosity_next_visual_observation_" + str(i),
)
self.next_visual_in.append(next_visual_input)

self.encoding_size,
LearningModel.swish,
1,
"stream_{}_visual_obs_encoder".format(i),
"curiosity_stream_{}_visual_obs_encoder".format(i),
False,
)

LearningModel.swish,
1,
"stream_{}_visual_obs_encoder".format(i),
"curiosity_stream_{}_visual_obs_encoder".format(i),
True,
)
visual_encoders.append(encoded_visual)

self.next_vector_in = tf.placeholder(
shape=[None, self.policy_model.vec_obs_size],
dtype=tf.float32,
name="next_vector_observation",
name="curiosity_next_vector_observation",
)
encoded_vector_obs = self.policy_model.create_vector_observation_encoder(

2,
"vector_obs_encoder",
"curiosity_vector_obs_encoder",
False,
)
encoded_next_vector_obs = self.policy_model.create_vector_observation_encoder(

2,
"vector_obs_encoder",
"curiosity_vector_obs_encoder",
True,
)
encoded_state_list.append(encoded_vector_obs)

20
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


# Create input ops for next (t+1) visual observations.
visual_input = self.policy_model.create_visual_input(
self.policy_model.brain.camera_resolutions[i],
name="visual_observation_" + str(i),
name="gail_visual_observation_" + str(i),
)
self.expert_visual_in.append(visual_input)

LearningModel.swish,
1,
"stream_{}_visual_obs_encoder".format(i),
"gail_stream_{}_visual_obs_encoder".format(i),
False,
)

LearningModel.swish,
1,
"stream_{}_visual_obs_encoder".format(i),
"gail_stream_{}_visual_obs_encoder".format(i),
True,
)
visual_policy_encoders.append(encoded_policy_visual)

concat_input,
self.h_size,
activation=LearningModel.swish,
name="d_hidden_1",
name="gail_d_hidden_1",
reuse=reuse,
)

activation=LearningModel.swish,
name="d_hidden_2",
name="gail_d_hidden_2",
reuse=reuse,
)

hidden_2,
self.z_size,
reuse=reuse,
name="z_mean",
name="gail_z_mean",
kernel_initializer=LearningModel.scaled_init(0.01),
)

estimate_input,
1,
activation=tf.nn.sigmoid,
name="d_estimate",
name="gail_d_estimate",
reuse=reuse,
)
return estimate, z_mean, concat_input

"""
if self.use_vail:
self.z_sigma = tf.get_variable(
"sigma_vail",
"gail_sigma_vail",
self.z_size,
dtype=tf.float32,
initializer=tf.ones_initializer(),

self.use_noise = tf.placeholder(
shape=[1], dtype=tf.float32, name="NoiseLevel"
shape=[1], dtype=tf.float32, name="gail_NoiseLevel"
)
self.expert_estimate, self.z_mean_expert, _ = self.create_encoder(
self.encoded_expert, self.expert_action, self.done_expert, reuse=False

reuse=True,
)
self.discriminator_score = tf.reshape(
self.policy_estimate, [-1], name="GAIL_reward"
self.policy_estimate, [-1], name="gail_reward"
)
self.intrinsic_reward = -tf.log(1.0 - self.discriminator_score + EPSILON)

正在加载...
取消
保存