|
|
|
|
|
|
self.num_batch[behavior_name] = self._dict_settings[behavior_name].num_batch |
|
|
|
|
|
|
|
active_hyps = self._dict_settings[behavior_name].active_learning |
|
|
|
if active_hyps: |
|
|
|
if active_hyps: # use active learning |
|
|
|
else: |
|
|
|
else: # use uniform random sampling |
|
|
|
print("num batch", self.num_batch) |
|
|
|
|
|
|
|
""" |
|
|
|
converts array to dictionary so it can be passed to c# side through agent parameter channel |
|
|
|
""" |
|
|
|
task = {} |
|
|
|
for i, name in enumerate(self.param_names[behavior_name]): |
|
|
|
task[name] = tau[i] |
|
|
|
|
|
|
""" |
|
|
|
converts a dictionary description of the task to a vector representation and adds the time parameter. |
|
|
|
""" |
|
|
|
tau = [] |
|
|
|
for name in self.param_names[behavior_name]: |
|
|
|
tau.append(task[name]) |
|
|
|
|
|
|
def get_tasks(self, behavior_name, num_samples) -> Dict[str, ParameterRandomizationSettings]: |
|
|
|
""" |
|
|
|
TODO |
|
|
|
Samples task parameters to pass to agents |
|
|
|
""" |
|
|
|
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names |
|
|
|
current_time = self.t[behavior_name] + 1 |
|
|
|
|
|
|
return tasks_repeated |
|
|
|
|
|
|
|
def add_run(self, behavior_name, tau, perf): |
|
|
|
""" |
|
|
|
adds a finished run to the buffer organized by tau |
|
|
|
""" |
|
|
|
""" |
|
|
|
Compiles performances that have been completed |
|
|
|
""" |
|
|
|
taus = [] |
|
|
|
perfs = [] |
|
|
|
t = self.t[behavior_name] |
|
|
|
|
|
|
def update(self, behavior_name: str, task_perfs: List[Tuple[Dict, float]] |
|
|
|
) -> Tuple[bool, bool]: |
|
|
|
""" |
|
|
|
TODO |
|
|
|
Updates the model of the task performance |
|
|
|
""" |
|
|
|
|
|
|
|
must_reset = False |
|
|
|