浏览代码

Merge pull request #309 from Unity-Technologies/dev-imitation

Miscellaneous Fixes
/develop-generalizationTraining-TrainerController
GitHub 7 年前
当前提交
d1cf3030
共有 5 个文件被更改,包括 53 次插入57 次删除
  1. 68
      python/trainers/imitation_trainer.py
  2. 34
      python/trainers/ppo_models.py
  3. 2
      python/trainers/trainer.py
  4. 2
      unity-environment/Assets/ML-Agents/Scripts/Academy.cs
  5. 4
      unity-environment/Assets/ML-Agents/Scripts/Brain.cs

68
python/trainers/imitation_trainer.py


def __str__(self):
return '''Hypermarameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
self.brain_name, '\n'.join(['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
@property

:return: a tupple containing action, memories, values and an object
to be passed to add experiences
"""
E = info[self.brain_name]
agent_action = self.sess.run(self.network.sample_action, feed_dict={self.network.state: E.states})
agent_brain = info[self.brain_name]
agent_action = self.sess.run(self.network.sample_action, feed_dict={self.network.state: agent_brain.states})
return agent_action, None, None, None
def add_experiences(self, info, next_info, take_action_outputs):

:param next_info: Next BrainInfo.
:param take_action_outputs: The outputs of the take action method.
"""
info_P = info[self.brain_to_imitate]
next_info_P = next_info[self.brain_to_imitate]
for agent_id in info_P.agents:
if agent_id in next_info_P.agents:
idx = info_P.agents.index(agent_id)
next_idx = next_info_P.agents.index(agent_id)
if not info_P.local_done[idx]:
self.training_buffer[agent_id]['states'].append(info_P.states[idx])
self.training_buffer[agent_id]['actions'].append(next_info_P.previous_actions[next_idx])
# self.training_buffer[agent_id]['rewards'].append(next_info.rewards[next_idx])
info_expert = info[self.brain_to_imitate]
next_info_expert = next_info[self.brain_to_imitate]
for agent_id in info_expert.agents:
if agent_id in next_info_expert.agents:
idx = info_expert.agents.index(agent_id)
next_idx = next_info_expert.agents.index(agent_id)
if not info_expert.local_done[idx]:
self.training_buffer[agent_id]['states'].append(info_expert.states[idx])
self.training_buffer[agent_id]['actions'].append(next_info_expert.previous_actions[next_idx])
info_E = next_info[self.brain_name]
next_info_E = next_info[self.brain_name]
for agent_id in info_E.agents:
idx = info_E.agents.index(agent_id)
next_idx = next_info_E.agents.index(agent_id)
if not info_E.local_done[idx]:
info_student = next_info[self.brain_name]
next_info_student = next_info[self.brain_name]
for agent_id in info_student.agents:
idx = info_student.agents.index(agent_id)
next_idx = next_info_student.agents.index(agent_id)
if not info_student.local_done[idx]:
self.cumulative_rewards[agent_id] += next_info_E.rewards[next_idx]
self.cumulative_rewards[agent_id] += next_info_student.rewards[next_idx]
if agent_id not in self.episode_steps:
self.episode_steps[agent_id] = 0
self.episode_steps[agent_id] += 1

:param info: Current BrainInfo
"""
info_P = info[self.brain_to_imitate]
for l in range(len(info_P.agents)):
if ((info_P.local_done[l] or
len(self.training_buffer[info_P.agents[l]]['actions']) > self.trainer_parameters['time_horizon'])
and len(self.training_buffer[info_P.agents[l]]['actions']) > 0):
agent_id = info_P.agents[l]
info_expert = info[self.brain_to_imitate]
for l in range(len(info_expert.agents)):
if ((info_expert.local_done[l] or
len(self.training_buffer[info_expert.agents[l]]['actions']) > self.trainer_parameters['time_horizon'])
and len(self.training_buffer[info_expert.agents[l]]['actions']) > 0):
agent_id = info_expert.agents[l]
info_E = info[self.brain_name]
for l in range(len(info_E.agents)):
if info_E.local_done[l]:
agent_id = info_E.agents[l]
info_student = info[self.brain_name]
for l in range(len(info_student.agents)):
if info_student.local_done[l]:
agent_id = info_student.agents[l]
self.stats['cumulative_reward'].append(self.cumulative_rewards[agent_id])
self.stats['episode_length'].append(self.episode_steps[agent_id])
self.cumulative_rewards[agent_id] = 0

def is_ready_update(self):
"""
Returns wether or not the trainer has enough elements to run update model
Returns whether or not the trainer has enough elements to run update model
:return: A boolean corresponding to wether or not update_model() can be run
"""
return len(self.training_buffer.update_buffer['actions']) > self.batch_size

if not self.is_continuous:
feed_dict = {
self.network.state: batch_states.reshape([-1, 1]),
self.network.true_action: np.reshape(batch_actions, -1)
}
self.network.true_action: np.reshape(batch_actions, -1)}
self.network.true_action: batch_actions.reshape([self.batch_size, -1])
}
self.network.true_action: batch_actions.reshape([self.batch_size, -1])}
loss, _ = self.sess.run([self.network.loss, self.network.update], feed_dict=feed_dict)
batch_losses.append(loss)
if len(batch_losses) > 0:

34
python/trainers/ppo_models.py


update_reward = tf.assign(last_reward, new_reward)
return last_reward, new_reward, update_reward
def create_recurrent_encoder(self, input_state, memory_in, name = 'lstm'):
def create_recurrent_encoder(self, input_state, memory_in, name='lstm'):
"""
Builds a recurrent encoder for either state or observations (LSTM).
:param input_state: The input tensor to the LSTM cell.

s_size = input_state.get_shape().as_list()[1]
m_size = memory_in.get_shape().as_list()[1]
lstm_input_state = tf.reshape(input_state, shape = [-1, self.sequence_length, s_size])
_half_point = int(m_size/2)
lstm_input_state = tf.reshape(input_state, shape=[-1, self.sequence_length, s_size])
_half_point = int(m_size / 2)
lstm_state_in = tf.contrib.rnn.LSTMStateTuple(memory_in[:,:_half_point], memory_in[:,_half_point:])
lstm_state_in = tf.contrib.rnn.LSTMStateTuple(memory_in[:, :_half_point], memory_in[:, _half_point:])
initial_state=lstm_state_in,
time_major=False,
dtype=tf.float32)
recurrent_state = tf.reshape(recurrent_state, shape = [-1, _half_point])
return recurrent_state, tf.concat([lstm_state_out.c, lstm_state_out.h], axis = 1)
initial_state=lstm_state_in,
time_major=False,
dtype=tf.float32)
recurrent_state = tf.reshape(recurrent_state, shape=[-1, _half_point])
return recurrent_state, tf.concat([lstm_state_out.c, lstm_state_out.h], axis=1)
def create_visual_encoder(self, o_size_h, o_size_w, bw, h_size, num_streams, activation, num_layers):
"""

hidden_value = tf.concat([hidden_visual[1], hidden_state[1]], axis=1)
if self.use_recurrent:
self.memory_in = tf.placeholder(shape=[None, self.m_size],dtype=tf.float32, name='recurrent_in')
_half_point = int(self.m_size/2)
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
_half_point = int(self.m_size / 2)
self.mu = tf.layers.dense(hidden_policy, a_size, activation=None, use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))

raise Exception("No valid network configuration possible. "
"There are no states or observations in this brain")
elif hidden_visual is not None and hidden_state is None:
hidden = hidden_visual[0]
hidden = hidden_visual
hidden = tf.concat([hidden_visual[0], hidden_state], axis=1)
hidden = tf.concat([hidden_visual, hidden_state], axis=1)
self.memory_in = tf.placeholder(shape=[None, self.m_size],dtype=tf.float32, name='recurrent_in')
hidden, self.memory_out = self.create_recurrent_encoder( hidden, self.memory_in)
self.memory_out = tf.identity(self.memory_out, name = 'recurrent_out')
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
hidden, self.memory_out = self.create_recurrent_encoder(hidden, self.memory_in)
self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
self.policy = tf.layers.dense(hidden, a_size, activation=None, use_bias=False,
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))

2
python/trainers/trainer.py


def is_ready_update(self):
"""
Returns wether or not the trainer has enough elements to run update model
Returns whether or not the trainer has enough elements to run update model
:return: A boolean corresponding to wether or not update_model() can be run
"""
raise UnityTrainerException("The is_ready_update method was not implemented.")

2
unity-environment/Assets/ML-Agents/Scripts/Academy.cs


var child = transform.GetChild(i);
var brain = child.GetComponent<Brain>();
if (brain != null)
if (brain != null && child.gameObject.activeSelf)
brains.Add(brain);
}
}

4
unity-environment/Assets/ML-Agents/Scripts/Brain.cs


if ((states.Count != brainParameters.stateSize * brainParameters.stackedStates) && (brainParameters.stateSpaceType == StateType.continuous))
{
throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}:
Was expecting {1} continuous states but received {2}.", idAgent.Value.gameObject.name, brainParameters.stateSize, states.Count));
Was expecting {1} continuous states but received {2}.", idAgent.Value.gameObject.name, brainParameters.stateSize * brainParameters.stackedStates, states.Count));
Was expecting 1 discrete states but received {1}.", idAgent.Value.gameObject.name, states.Count));
Was expecting {1} discrete states but received {2}.", idAgent.Value.gameObject.name, brainParameters.stackedStates, states.Count));
}
currentStates.Add(idAgent.Key, states);
}

正在加载...
取消
保存