浏览代码

moved batch outside of active learner

/active-variablespeed
Scott Jordan 4 年前
当前提交
f9748b70
共有 3 个文件被更改,包括 48 次插入27 次删除
  1. 9
      ml-agents/mlagents/trainers/active_learning.py
  2. 4
      ml-agents/mlagents/trainers/settings.py
  3. 62
      ml-agents/mlagents/trainers/task_manager.py

9
ml-agents/mlagents/trainers/active_learning.py


self.num_mc = num_mc
self.beta = beta
self.raw_samples = raw_samples
self.num_batch = num_batch
self.num_restarts = num_restarts
self.xdim = ranges.shape[0] + 1
self.model = None

if state_dict is not None:
self.model.load_state_dict(state_dict)
fit_gpytorch_model(mll)
else:
self.model.set_train_data(self.X, self.Y)
# elif self.model is not None:
# self.model.set_train_data(self.X, self.Y)
def get_design_points(self, num_points:int=1, time=None, get_batch=True):
if get_batch:
num_points = min(num_points, self.num_batch)
def get_design_points(self, num_points:int=1, time=None):
if not self.model or time < self.warmup_steps:
return sample_random_points(self.bounds, num_points)

4
ml-agents/mlagents/trainers/settings.py


beta:float=1.96
raw_samples:int=128
num_restarts:int=1
num_batch:int=16
@attr.s(auto_attribs=True)
class TaskParameterSettings:

repeat:int=8
num_repeat:int=8
num_batch:int=16
@staticmethod
def structure(d: Mapping, t: type) -> Dict[str, "TaskParameterSettings"]:

62
ml-agents/mlagents/trainers/task_manager.py


self._taskSamplers = {}
self.report_buffer = []
self.num_repeat = {name: 1 for name in self.behavior_names}
self.task_completed = {name: defaultdict(list) for name in self.behavior_names}
self.num_batch = {name: 1 for name in self.behavior_names}
for behavior_name in self.behavior_names:
lows = []

lows.append(low)
highs.append(high)
task_ranges = torch.tensor([lows, highs]).float().T
num_repeat = self._dict_settings[behavior_name].num_repeat
self.num_repeat[behavior_name] = num_repeat
self.num_repeat[behavior_name] = self._dict_settings[behavior_name].num_repeat
self.num_batch[behavior_name] = self._dict_settings[behavior_name].num_batch
active_hyps = self._dict_settings[behavior_name].active_learning
if active_hyps:
self._taskSamplers[behavior_name] = ActiveLearningTaskSampler(task_ranges,

num_batch=active_hyps.num_batch
print("num batch", self.num_batch)
self.t = {name: 0.0 for name in self.behavior_names}
self.counter = {name: 0 for name in self.behavior_names}

current_time = self.t[behavior_name] + 1
if isinstance(self._taskSamplers[behavior_name], ActiveLearningTaskSampler):
taus = self._taskSamplers[behavior_name].get_design_points(num_points=num_samples, time=current_time).data.numpy().tolist()
num_points = max(num_samples, self.num_batch[behavior_name])
taus = self._taskSamplers[behavior_name].get_design_points(num_points=num_points, time=current_time).data.numpy().tolist()
else:
taus = self._taskSamplers[behavior_name](num_samples).tolist()
# print("sampled taus", current_time, taus)

for i in range(self.num_repeat[behavior_name]):
tasks_repeated.extend(tasks)
def add_run(self, behavior_name, tau, perf):
k = tuple(tau.data.numpy().flatten()[:-1].tolist())
self.task_completed[behavior_name][k].append(perf)
def get_data(self, behavior_name, last=True):
taus = []
perfs = []
t = self.t[behavior_name]
for k, v in self.task_completed[behavior_name].items():
tau = torch.tensor(k + (t,)).float()
taus.append(tau)
if last:
perf = v[-1]
else:
perf = np.mean(v)
perfs.append(perf)
X = torch.stack(taus, dim=0)
Y = torch.tensor(perfs).float().reshape(-1, 1)
return X, Y
def update(self, behavior_name: str, task_perfs: List[Tuple[Dict, float]]
) -> Tuple[bool, bool]:
"""

updated = False
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names
if isinstance(self._taskSamplers[behavior_name], ActiveLearningTaskSampler):
updated = True
taus = []
perfs = []
perfs.append(perf)
self.t[behavior_name] = self.t[behavior_name] + 1
# perfs.append(perf)
# self.t[behavior_name] = self.t[behavior_name] + 1
taus.append(tau)
X = torch.stack(taus, dim=0)
Y = torch.tensor(perfs).float().reshape(-1, 1)
# taus.append(tau)
self.add_run(behavior_name, tau, perf)
if self.counter[behavior_name] >= self.num_repeat:
refit = True
self.counter[behavior_name] = 0
else:
refit = False
self._taskSamplers[behavior_name].update_model(X, Y, refit=refit)
M = self.num_repeat[behavior_name] * self.num_batch[behavior_name]
if self.counter[behavior_name] >= M:
updated = True
self.t[behavior_name] += 1
X, Y = self.get_data(behavior_name, last=True)
self.task_completed[behavior_name] = defaultdict(list)
self._taskSamplers[behavior_name].update_model(X, Y, refit=True)
return updated, must_reset

正在加载...
取消
保存