|
|
|
|
|
|
self.summary_writer = tf.summary.FileWriter(self.summary_path) |
|
|
|
self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap) |
|
|
|
self.policy: TFPolicy = None |
|
|
|
self.policies: Dict[str, TFPolicy] = {} |
|
|
|
self.step: int = 0 |
|
|
|
|
|
|
|
def check_param_keys(self): |
|
|
|
|
|
|
|
|
|
|
def add_experiences( |
|
|
|
self, |
|
|
|
name_behavior_id: str, |
|
|
|
curr_info: BrainInfo, |
|
|
|
next_info: BrainInfo, |
|
|
|
take_action_outputs: ActionInfoOutputs, |
|
|
|
|
|
|
:param name_behavior_id: string policy identifier. |
|
|
|
:param curr_info: current BrainInfo. |
|
|
|
:param next_info: next BrainInfo. |
|
|
|
:param take_action_outputs: The outputs of the Policy's get_action method. |
|
|
|
|
|
|
def process_experiences( |
|
|
|
self, current_info: BrainInfo, next_info: BrainInfo |
|
|
|
self, name_behavior_id: str, current_info: BrainInfo, next_info: BrainInfo |
|
|
|
:param name_behavior_id: string policy identifier. |
|
|
|
:param current_info: current BrainInfo. |
|
|
|
:param next_info: next BrainInfo. |
|
|
|
""" |
|
|
|
|
|
|
""" |
|
|
|
raise UnityTrainerException("The update_model method was not implemented.") |
|
|
|
|
|
|
|
def add_policy(self, brain_parameters: BrainParameters) -> None: |
|
|
|
def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy: |
|
|
|
Adds policy to trainers list of policies |
|
|
|
Creates policy |
|
|
|
Adds policy to trainers list of policies |
|
|
|
Gets policy from trainers list of policies |
|
|
|
raise UnityTrainerException("The update_model method was not implemented.") |
|
|
|
return self.policies[brain_name] |