浏览代码
Updated task manager
Updated task manager
active learning is no optional and defaults to uniform sampling of tasks. Renamed ActiveLearningTaskManager to just TaskManager/active-variablespeed
Scott Jordan
5 年前
当前提交
78f8a9a2
共有 11 个文件被更改,包括 227 次插入 和 141 次删除
-
10Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
-
2config/ppo/WalkerStaticVariableSpeed.yaml
-
22ml-agents/mlagents/trainers/active_learning.py
-
3ml-agents/mlagents/trainers/agent_processor.py
-
4ml-agents/mlagents/trainers/learn.py
-
61ml-agents/mlagents/trainers/settings.py
-
4ml-agents/mlagents/trainers/trainer_controller.py
-
1ml-agents/mlagents/trainers/trainer_util.py
-
40config/ppo/WalkerStaticVariableSpeedActive.yaml
-
121ml-agents/mlagents/trainers/task_manager.py
-
100ml-agents/mlagents/trainers/active_learning_manager.py
|
|||
behaviors: |
|||
WalkerStaticVariableSpeed: |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
gamma: 0.995 |
|||
strength: 1.0 |
|||
keep_checkpoints: 5 |
|||
max_steps: 30000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|||
agent_parameters: |
|||
WalkerStaticVariableSpeed: |
|||
active_learner: |
|||
warmup_steps: 30 |
|||
capacity: 600 |
|||
num_mc: 500 |
|||
beta: 1.96 |
|||
raw_samples: 128 |
|||
num_restarts: 1 |
|||
targetWalkingSpeed: |
|||
sampler_type: uniform |
|||
sampler_parameters: |
|||
min_value: 0.1 |
|||
max_value: 10 |
|
|||
from typing import Dict, List, Tuple, Optional |
|||
from mlagents.trainers.settings import ( |
|||
TaskParameterSettings, |
|||
ParameterRandomizationSettings, |
|||
) |
|||
from collections import defaultdict |
|||
from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType |
|||
|
|||
from mlagents_envs.logging_util import get_logger |
|||
|
|||
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager |
|||
from mlagents.trainers.active_learning import ActiveLearningTaskSampler |
|||
|
|||
logger = get_logger(__name__) |
|||
|
|||
import torch |
|||
import numpy as np |
|||
|
|||
class TaskManager: |
|||
def __init__( |
|||
self, |
|||
settings: Optional[Dict[str, TaskParameterSettings]] = None, |
|||
restore: bool = False, |
|||
): |
|||
""" |
|||
EnvironmentParameterManager manages all the environment parameters of a training |
|||
session. It determines when parameters should change and gives access to the |
|||
current sampler of each parameter. |
|||
:param settings: A dictionary from environment parameter to |
|||
EnvironmentParameterSettings. |
|||
:param restore: If true, the EnvironmentParameterManager will use the |
|||
GlobalTrainingStatus to try and reload the lesson status of each environment |
|||
parameter. |
|||
""" |
|||
if settings is None: |
|||
settings = {} |
|||
self._dict_settings = settings |
|||
|
|||
self.behavior_names = list(self._dict_settings.keys()) |
|||
self.param_names = {name: list(self._dict_settings[name].parameters.keys()) for name in self.behavior_names} |
|||
self._taskSamplers = {} |
|||
|
|||
for behavior_name in self.behavior_names: |
|||
lows = [] |
|||
highs = [] |
|||
parameters = self._dict_settings[behavior_name].parameters |
|||
for parameter_name in self.param_names[behavior_name]: |
|||
low = parameters[parameter_name].min_value |
|||
high = parameters[parameter_name].max_value |
|||
lows.append(low) |
|||
highs.append(high) |
|||
task_ranges = torch.tensor([lows, highs]).float().T |
|||
active_hyps = self._dict_settings[behavior_name].active_learning |
|||
if active_hyps: |
|||
self._taskSamplers[behavior_name] = ActiveLearningTaskSampler(task_ranges, |
|||
warmup_steps=active_hyps.warmup_steps, capacity=active_hyps.capacity, |
|||
num_mc=active_hyps.num_mc, beta=active_hyps.beta, |
|||
raw_samples=active_hyps.raw_samples, num_restarts=active_hyps.num_restarts |
|||
) |
|||
else: |
|||
self._taskSamplers[behavior_name] = lambda n: uniform_sample(task_ranges, n) |
|||
self.t = {name: 0.0 for name in self.behavior_names} |
|||
|
|||
def _make_task(self, behavior_name, tau): |
|||
task = {} |
|||
for i, name in enumerate(self.param_names[behavior_name]): |
|||
task[name] = tau[i] |
|||
return task |
|||
|
|||
def _build_tau(self, behavior_name, task, time): |
|||
tau = [] |
|||
for name in self.param_names[behavior_name]: |
|||
tau.append(task[name]) |
|||
tau.append(time) |
|||
return torch.tensor(tau).float() |
|||
|
|||
def get_tasks(self, behavior_name, num_samples) -> Dict[str, ParameterRandomizationSettings]: |
|||
""" |
|||
TODO |
|||
""" |
|||
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names |
|||
current_time = self.t[behavior_name] + 1 |
|||
|
|||
if isinstance(self._taskSamplers[behavior_name], ActiveLearningTaskSampler): |
|||
taus = self._taskSamplers[behavior_name].get_design_points(num_points=num_samples, time=current_time).data.numpy().tolist() |
|||
else: |
|||
taus = self._taskSamplers[behavior_name](num_samples).tolist() |
|||
|
|||
tasks = [self._make_task(behavior_name, tau) for tau in taus] |
|||
return tasks |
|||
|
|||
def update(self, behavior_name: str, task_perfs: List[Tuple[Dict, float]] |
|||
) -> Tuple[bool, bool]: |
|||
""" |
|||
TODO |
|||
""" |
|||
|
|||
must_reset = False |
|||
updated = False |
|||
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names |
|||
if isinstance(self._taskSamplers[behavior_name], ActiveLearningTaskSampler): |
|||
updated = True |
|||
taus = [] |
|||
perfs = [] |
|||
for task, perf in task_perfs: |
|||
perfs.append(perf) |
|||
self.t[behavior_name] = self.t[behavior_name] + 1 |
|||
tau = self._build_tau(behavior_name, task, self.t[behavior_name]) |
|||
taus.append(tau) |
|||
|
|||
X = torch.stack(taus, dim=0) |
|||
Y = torch.tensor(perfs).float() |
|||
self._taskSamplers[behavior_name].update_model(X, Y, refit=True) |
|||
|
|||
return updated, must_reset |
|||
|
|||
def uniform_sample(ranges, num_samples): |
|||
low = ranges[:, 0] |
|||
high = ranges[:, 1] |
|||
points = np.random.uniform(low=low, high=high, size=num_samples).reshape(num_samples, -1) |
|||
return points |
|
|||
from typing import Dict, List, Tuple, Optional |
|||
from mlagents.trainers.settings import ( |
|||
AgentParameterSettings, |
|||
ParameterRandomizationSettings, |
|||
) |
|||
from collections import defaultdict |
|||
from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType |
|||
|
|||
from mlagents_envs.logging_util import get_logger |
|||
|
|||
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager |
|||
from mlagents.trainers.active_learning import ActiveLearningTaskSampler |
|||
|
|||
logger = get_logger(__name__) |
|||
|
|||
import torch |
|||
|
|||
class ActiveLearningTaskManager: |
|||
def __init__( |
|||
self, |
|||
settings: Optional[Dict[str,AgentParameterSettings]] = None, |
|||
restore: bool = False, |
|||
): |
|||
""" |
|||
EnvironmentParameterManager manages all the environment parameters of a training |
|||
session. It determines when parameters should change and gives access to the |
|||
current sampler of each parameter. |
|||
:param settings: A dictionary from environment parameter to |
|||
EnvironmentParameterSettings. |
|||
:param restore: If true, the EnvironmentParameterManager will use the |
|||
GlobalTrainingStatus to try and reload the lesson status of each environment |
|||
parameter. |
|||
""" |
|||
if settings is None: |
|||
settings = {} |
|||
self._dict_settings = settings |
|||
|
|||
self.behavior_names = list(self._dict_settings.keys()) |
|||
self.param_names = {name: list(self._dict_settings[name].parameters.keys()) for name in self.behavior_names} |
|||
self._taskSamplers = {} |
|||
for behavior_name in self.behavior_names: |
|||
lows = [] |
|||
highs = [] |
|||
parameters = self._dict_settings[behavior_name].parameters |
|||
for parameter_name in self.param_names[behavior_name]: |
|||
low = parameters[parameter_name].min_value |
|||
high = parameters[parameter_name].max_value |
|||
lows.append(low) |
|||
highs.append(high) |
|||
|
|||
task_ranges = torch.tensor([lows, highs]).float().T |
|||
self._taskSamplers[behavior_name] = ActiveLearningTaskSampler(task_ranges) |
|||
self.t = {name: 0.0 for name in self.behavior_names} |
|||
|
|||
def _make_task(self, behavior_name, tau): |
|||
task = {} |
|||
for i, name in enumerate(self.param_names[behavior_name]): |
|||
task[name] = tau[i] |
|||
return task |
|||
|
|||
def _build_tau(self, behavior_name, task, time): |
|||
tau = [] |
|||
for name in self.param_names[behavior_name]: |
|||
tau.append(task[name]) |
|||
tau.append(time) |
|||
return torch.tensor(tau).float() |
|||
|
|||
def get_tasks(self, behavior_name, num_samples) -> Dict[str, ParameterRandomizationSettings]: |
|||
""" |
|||
TODO |
|||
""" |
|||
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names |
|||
current_time = self.t[behavior_name] + 1 |
|||
|
|||
taus = self._taskSamplers[behavior_name].get_design_points(num_points=num_samples, time=current_time).data.numpy().tolist() |
|||
tasks = [self._make_task(behavior_name, tau) for tau in taus] |
|||
return tasks |
|||
|
|||
def update(self, behavior_name: str, task_perfs: List[Tuple[Dict, float]] |
|||
) -> Tuple[bool, bool]: |
|||
""" |
|||
TODO |
|||
""" |
|||
|
|||
must_reset = False |
|||
updated = True |
|||
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names |
|||
taus = [] |
|||
perfs = [] |
|||
for task, perf in task_perfs: |
|||
perfs.append(perf) |
|||
self.t[behavior_name] = self.t[behavior_name] + 1 |
|||
tau = self._build_tau(behavior_name, task, self.t[behavior_name]) |
|||
taus.append(tau) |
|||
|
|||
X = torch.stack(taus, dim=0) |
|||
Y = torch.tensor(perfs).float() |
|||
self._taskSamplers[behavior_name].update_model(X, Y, refit=True) |
|||
|
|||
return updated, must_reset |
撰写
预览
正在加载...
取消
保存
Reference in new issue