|
|
|
|
|
|
self._taskSamplers = {} |
|
|
|
self.report_buffer = [] |
|
|
|
self.num_repeat = {name: 1 for name in self.behavior_names} |
|
|
|
self.task_completed = {name: defaultdict(list) for name in self.behavior_names} |
|
|
|
self.num_batch = {name: 1 for name in self.behavior_names} |
|
|
|
|
|
|
|
for behavior_name in self.behavior_names: |
|
|
|
lows = [] |
|
|
|
|
|
|
lows.append(low) |
|
|
|
highs.append(high) |
|
|
|
task_ranges = torch.tensor([lows, highs]).float().T |
|
|
|
num_repeat = self._dict_settings[behavior_name].num_repeat |
|
|
|
self.num_repeat[behavior_name] = num_repeat |
|
|
|
self.num_repeat[behavior_name] = self._dict_settings[behavior_name].num_repeat |
|
|
|
self.num_batch[behavior_name] = self._dict_settings[behavior_name].num_batch |
|
|
|
|
|
|
|
active_hyps = self._dict_settings[behavior_name].active_learning |
|
|
|
if active_hyps: |
|
|
|
self._taskSamplers[behavior_name] = ActiveLearningTaskSampler(task_ranges, |
|
|
|
|
|
|
num_batch=active_hyps.num_batch |
|
|
|
print("num batch", self.num_batch) |
|
|
|
self.t = {name: 0.0 for name in self.behavior_names} |
|
|
|
self.counter = {name: 0 for name in self.behavior_names} |
|
|
|
|
|
|
|
|
|
|
current_time = self.t[behavior_name] + 1 |
|
|
|
|
|
|
|
if isinstance(self._taskSamplers[behavior_name], ActiveLearningTaskSampler): |
|
|
|
taus = self._taskSamplers[behavior_name].get_design_points(num_points=num_samples, time=current_time).data.numpy().tolist() |
|
|
|
num_points = max(num_samples, self.num_batch[behavior_name]) |
|
|
|
taus = self._taskSamplers[behavior_name].get_design_points(num_points=num_points, time=current_time).data.numpy().tolist() |
|
|
|
else: |
|
|
|
taus = self._taskSamplers[behavior_name](num_samples).tolist() |
|
|
|
# print("sampled taus", current_time, taus) |
|
|
|
|
|
|
for i in range(self.num_repeat[behavior_name]): |
|
|
|
tasks_repeated.extend(tasks) |
|
|
|
|
|
|
|
def add_run(self, behavior_name, tau, perf): |
|
|
|
k = tuple(tau.data.numpy().flatten()[:-1].tolist()) |
|
|
|
self.task_completed[behavior_name][k].append(perf) |
|
|
|
|
|
|
|
def get_data(self, behavior_name, last=True): |
|
|
|
taus = [] |
|
|
|
perfs = [] |
|
|
|
t = self.t[behavior_name] |
|
|
|
for k, v in self.task_completed[behavior_name].items(): |
|
|
|
tau = torch.tensor(k + (t,)).float() |
|
|
|
taus.append(tau) |
|
|
|
if last: |
|
|
|
perf = v[-1] |
|
|
|
else: |
|
|
|
perf = np.mean(v) |
|
|
|
perfs.append(perf) |
|
|
|
|
|
|
|
X = torch.stack(taus, dim=0) |
|
|
|
Y = torch.tensor(perfs).float().reshape(-1, 1) |
|
|
|
return X, Y |
|
|
|
|
|
|
|
|
|
|
|
def update(self, behavior_name: str, task_perfs: List[Tuple[Dict, float]] |
|
|
|
) -> Tuple[bool, bool]: |
|
|
|
""" |
|
|
|
|
|
|
updated = False |
|
|
|
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names |
|
|
|
if isinstance(self._taskSamplers[behavior_name], ActiveLearningTaskSampler): |
|
|
|
updated = True |
|
|
|
taus = [] |
|
|
|
perfs = [] |
|
|
|
perfs.append(perf) |
|
|
|
self.t[behavior_name] = self.t[behavior_name] + 1 |
|
|
|
# perfs.append(perf) |
|
|
|
# self.t[behavior_name] = self.t[behavior_name] + 1 |
|
|
|
taus.append(tau) |
|
|
|
|
|
|
|
X = torch.stack(taus, dim=0) |
|
|
|
Y = torch.tensor(perfs).float().reshape(-1, 1) |
|
|
|
# taus.append(tau) |
|
|
|
self.add_run(behavior_name, tau, perf) |
|
|
|
|
|
|
|
if self.counter[behavior_name] >= self.num_repeat: |
|
|
|
refit = True |
|
|
|
self.counter[behavior_name] = 0 |
|
|
|
else: |
|
|
|
refit = False |
|
|
|
self._taskSamplers[behavior_name].update_model(X, Y, refit=refit) |
|
|
|
M = self.num_repeat[behavior_name] * self.num_batch[behavior_name] |
|
|
|
if self.counter[behavior_name] >= M: |
|
|
|
updated = True |
|
|
|
self.t[behavior_name] += 1 |
|
|
|
X, Y = self.get_data(behavior_name, last=True) |
|
|
|
self.task_completed[behavior_name] = defaultdict(list) |
|
|
|
self._taskSamplers[behavior_name].update_model(X, Y, refit=True) |
|
|
|
|
|
|
|
return updated, must_reset |
|
|
|
|
|
|
|