浏览代码

[Initial Commit]

/develop-generalizationTraining-TrainerController
vincentpierre 6 年前
当前提交
e47cec56
共有 4 个文件被更改,包括 31 次插入11 次删除
  1. 22
      python/unityagents/environment.py
  2. 4
      python/unitytrainers/bc/trainer.py
  3. 4
      python/unitytrainers/ppo/trainer.py
  4. 12
      python/unitytrainers/trainer_controller.py

22
python/unityagents/environment.py


else:
raise UnityEnvironmentException("No Unity environment is loaded.")
def step(self, vector_action=None, memory=None, text_action=None) -> AllBrainInfo:
def step(self, vector_action=None, memory=None, text_action=None, value=None) -> AllBrainInfo:
"""
Provides the environment with an action, moves the environment dynamics forward accordingly, and returns
observation, state, and reward information to the agent.

vector_action = {} if vector_action is None else vector_action
memory = {} if memory is None else memory
text_action = {} if text_action is None else text_action
value = {} if value is None else value
if self._loaded and not self._global_done and self._global_done is not None:
if isinstance(vector_action, (int, np.int_, float, np.float_, list, np.ndarray)):
if self._num_external_brains == 1:

raise UnityActionException(
"There are no external brains in the environment, "
"step cannot take a value input")
if isinstance(value, (int, np.int_, float, np.float_, list, np.ndarray)):
if self._num_external_brains == 1:
value = {self._external_brain_names[0]: value}
elif self._num_external_brains > 1:
raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and state/action value estimates as values".format(self._num_brains))
else:
raise UnityActionException(
"There are no external brains in the environment, "
"step cannot take a value input")
for brain_name in list(vector_action.keys()) + list(memory.keys()) + list(text_action.keys()):
if brain_name not in self._external_brain_names:

str(vector_action[b])))
outputs = self.communicator.exchange(
self._generate_step_input(vector_action, memory, text_action)
self._generate_step_input(vector_action, memory, text_action, value)
)
if outputs is None:
raise KeyboardInterrupt

)
return _data, global_done
def _generate_step_input(self, vector_action, memory, text_action) -> UnityRLInput:
def _generate_step_input(self, vector_action, memory, text_action, value) -> UnityRLInput:
rl_in = UnityRLInput()
for b in vector_action:
n_agents = self._n_agents[b]

action = AgentActionProto(
vector_actions=vector_action[b][i*_a_s: (i+1)*_a_s],
memories=memory[b][i*_m_s: (i+1)*_m_s],
text_actions=text_action[b][i]
text_actions=text_action[b][i],
if b in value:
action.value = value[b][i]
rl_in.agent_actions[b].value.extend([action])
rl_in.command = 0
return self.wrap_unity_input(rl_in)

4
python/unitytrainers/bc/trainer.py


feed_dict[self.model.memory_in] = agent_brain.memories
if self.use_recurrent:
agent_action, memories = self.sess.run(self.inference_run_list, feed_dict)
return agent_action, memories, None, None
return agent_action, memories, None, None, None
return agent_action, None, None, None
return agent_action, None, None, None, None
def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take_action_outputs):
"""

4
python/unitytrainers/ppo/trainer.py


self.stats['entropy'].append(run_out[self.model.entropy].mean())
self.stats['learning_rate'].append(run_out[self.model.learning_rate])
if self.use_recurrent:
return run_out[self.model.output], run_out[self.model.memory_out], None, run_out
return run_out[self.model.output], run_out[self.model.memory_out], None, run_out[self.model.value], run_out
return run_out[self.model.output], None, None, run_out
return run_out[self.model.output], None, None, run_out[self.model.value], run_out
def generate_intrinsic_rewards(self, curr_info, next_info):
"""

12
python/unitytrainers/trainer_controller.py


if self.trainers[brain_name].parameters["trainer"] == "imitation":
nodes += [scope + x for x in ["action"]]
else:
nodes += [scope + x for x in ["action", "value_estimate", "action_probs"]]
nodes += [scope + x for x in ["action", "value_estimate", "action_probs", "value_estimate"]]
if self.trainers[brain_name].parameters["use_recurrent"]:
nodes += [scope + x for x in ["recurrent_out", "memory_size"]]
if len(scopes) > 1:

for brain_name, trainer in self.trainers.items():
trainer.end_episode()
# Decide and take an action
take_action_vector, take_action_memories, take_action_text, take_action_outputs = {}, {}, {}, {}
take_action_vector, \
take_action_memories, \
take_action_text, \
take_action_value, \
take_action_outputs \
= {}, {}, {}, {}, {}
take_action_value,
text_action=take_action_text)
text_action=take_action_text, value=take_action_value)
for brain_name, trainer in self.trainers.items():
trainer.add_experiences(curr_info, new_info, take_action_outputs[brain_name])
trainer.process_experiences(curr_info, new_info)

正在加载...
取消
保存