浏览代码

Merge pull request #520 from Unity-Technologies/feature-trainer-ppo-is-continuous

Feature trainer ppo is continuous
/develop-generalizationTraining-TrainerController
GitHub 7 年前
当前提交
74064891
共有 1 个文件被更改,包括 8 次插入7 次删除
  1. 15
      python/unitytrainers/ppo/trainer.py

15
python/unitytrainers/ppo/trainer.py


self.training_buffer = Buffer()
self.cumulative_rewards = {}
self.episode_steps = {}
self.is_continuous = (env.brains[brain_name].vector_action_space_type == "continuous")
self.is_continuous_action = (env.brains[brain_name].vector_action_space_type == "continuous")
self.is_continuous_observation = (env.brains[brain_name].vector_observation_space_type == "continuous")
self.use_observations = (env.brains[brain_name].number_visual_observations > 0)
self.use_states = (env.brains[brain_name].vector_observation_space_size > 0)
self.summary_path = trainer_parameters['summary_path']

feed_dict = {self.model.batch_size: len(curr_brain_info.vector_observations), self.model.sequence_length: 1}
run_list = [self.model.output, self.model.all_probs, self.model.value, self.model.entropy,
self.model.learning_rate]
if self.is_continuous:
if self.is_continuous_action:
run_list.append(self.model.epsilon)
elif self.use_recurrent:
feed_dict[self.model.prev_action] = np.reshape(curr_brain_info.previous_vector_actions, [-1])

curr_brain_info.memories = np.zeros((len(curr_brain_info.agents), self.m_size))
feed_dict[self.model.memory_in] = curr_brain_info.memories
run_list += [self.model.memory_out]
if (self.is_training and self.brain.vector_observation_space_type == "continuous" and
if (self.is_training and self.is_continuous_observation and
self.use_states and self.trainer_parameters['normalize']):
new_mean, new_variance = self.running_average(
curr_brain_info.vector_observations, steps, self.model.running_mean, self.model.running_variance)

if stored_info.memories.shape[1] == 0:
stored_info.memories = np.zeros((len(stored_info.agents), self.m_size))
self.training_buffer[agent_id]['memory'].append(stored_info.memories[idx])
if self.is_continuous:
if self.is_continuous_action:
epsi = stored_take_action_outputs[self.model.epsilon]
self.training_buffer[agent_id]['epsilons'].append(epsi[idx])
actions = stored_take_action_outputs[self.model.output]

if info.memories.shape[1] == 0:
info.memories = np.zeros((len(info.vector_observations), self.m_size))
feed_dict[self.model.memory_in] = info.memories
if not self.is_continuous and self.use_recurrent:
if not self.is_continuous_action and self.use_recurrent:
feed_dict[self.model.prev_action] = np.reshape(info.previous_vector_actions, [-1])
value_next = self.sess.run(self.model.value, feed_dict)[l]
agent_id = info.agents[l]

self.model.advantage: np.array(_buffer['advantages'][start:end]).reshape([-1, 1]),
self.model.all_old_probs: np.array(
_buffer['action_probs'][start:end]).reshape([-1, self.brain.vector_action_space_size])}
if self.is_continuous:
if self.is_continuous_action:
feed_dict[self.model.epsilon] = np.array(
_buffer['epsilons'][start:end]).reshape([-1, self.brain.vector_action_space_size])
else:

feed_dict[self.model.prev_action] = np.array(
_buffer['prev_action'][start:end]).reshape([-1])
if self.use_states:
if self.brain.vector_observation_space_type == "continuous":
if self.is_continuous_observation:
feed_dict[self.model.vector_in] = np.array(
_buffer['states'][start:end]).reshape(
[-1, self.brain.vector_observation_space_size * self.brain.num_stacked_vector_observations])

正在加载...
取消
保存