浏览代码

Updated task manager

active learning is no optional and defaults to uniform sampling of tasks. 
Renamed ActiveLearningTaskManager to just TaskManager
/active-variablespeed
Scott Jordan 4 年前
当前提交
78f8a9a2
共有 11 个文件被更改,包括 227 次插入141 次删除
  1. 10
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  2. 2
      config/ppo/WalkerStaticVariableSpeed.yaml
  3. 22
      ml-agents/mlagents/trainers/active_learning.py
  4. 3
      ml-agents/mlagents/trainers/agent_processor.py
  5. 4
      ml-agents/mlagents/trainers/learn.py
  6. 61
      ml-agents/mlagents/trainers/settings.py
  7. 4
      ml-agents/mlagents/trainers/trainer_controller.py
  8. 1
      ml-agents/mlagents/trainers/trainer_util.py
  9. 40
      config/ppo/WalkerStaticVariableSpeedActive.yaml
  10. 121
      ml-agents/mlagents/trainers/task_manager.py
  11. 100
      ml-agents/mlagents/trainers/active_learning_manager.py

10
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
SetTaskParameters();
}
/// <summary>

//Set our goal walking speed
// targetWalkingSpeed =
// randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : targetWalkingSpeed;
SetTaskParameters();
}
/// <summary>

void FixedUpdate()
{
SetTaskParameters();
UpdateOrientationObjects();
var cubeForward = m_OrientationCube.transform.forward;

m_JdController.bodyPartsDict[hips].rb.mass = m_ResetParams.GetWithDefault("hip_mass", 8);
}
public void SetTaskParameters()
{
targetWalkingSpeed = GetParameterWithDefault("targetWalkingSpeed", targetWalkingSpeed);
}
targetWalkingSpeed = GetParameterWithDefault("targetWalkingSpeed", 10.0f);
// target_headheight = GetParameterWithDefault("target_height", 0.5497f);
SetTorsoMass();
}
}

2
config/ppo/WalkerStaticVariableSpeed.yaml


targetWalkingSpeed:
sampler_type: uniform
sampler_parameters:
min_value: 0.0
min_value: 0.1
max_value: 10

22
ml-agents/mlagents/trainers/active_learning.py


return MultivariateNormal(mean_x, covar_x)
class ActiveLearningTaskSampler(object):
def __init__(self,ranges):
def __init__(self,ranges, warmup_steps=30, capacity=600, num_mc=500, beta=1.96, raw_samples=128, num_restarts=1):
self.warmup_steps = warmup_steps
self.capacity = capacity
self.num_mc = num_mc
self.beta = beta
self.raw_samples = raw_samples
self.num_restarts = num_restarts
self.xdim = ranges.shape[0] + 1
self.model = None
self.mll = None

self.Y = new_Y.float()
state_dict = None
T = 12*50
T = self.capacity
if self.X.shape[0] < 5: # TODO seems to throw an error if only one sample is present. Refitting should probably only happen every N data points anyways
if self.X.shape[0] < self.warmup_steps: # TODO seems to throw an error if only one sample is present. Refitting should probably only happen every N data points anyways
return None
if refit:

# self.model = self.model.condition_on_observations(new_X, new_Y) # TODO: might be faster than setting the data need to test
def get_design_points(self, num_points:int=1, time=None):
if not self.model or time < 30:
if not self.model or time < self.warmup_steps:
return sample_random_points(self.bounds, num_points)
if not time:

bounds[:, -1] = time
num_mc = 500
num_mc = self.num_mc
qeisp = qEISP(self.model, mc_points=mc_points, beta=1.96)
qeisp = qEISP(self.model, mc_points=mc_points, beta=self.beta)
raw_samples=128,
raw_samples=self.raw_samples,
num_restarts=1,
num_restarts=self.num_restarts,
return_best_only=True,
)
return candidates

3
ml-agents/mlagents/trainers/agent_processor.py


for traj_queue in self.trajectory_queues:
traj_queue.put(trajectory)
self.experience_buffers[global_id] = []
self.publish_task_performance_queue(self.episode_tasks[global_id], self.episode_rewards[global_id])
self.publish_task_performance_queue(self.episode_tasks[global_id], self.episode_rewards[global_id])
self._clean_agent_data(global_id)
def _clean_agent_data(self, global_id: str) -> None:

4
ml-agents/mlagents/trainers/learn.py


from mlagents import tf_utils
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.active_learning_manager import ActiveLearningTaskManager
from mlagents.trainers.task_manager import TaskManager
from mlagents.trainers.trainer_util import TrainerFactory, handle_existing_directories
from mlagents.trainers.stats import (
TensorboardWriter,

options.environment_parameters, run_seed, restore=checkpoint_settings.resume
)
task_parameter_manager = ActiveLearningTaskManager(
task_parameter_manager = TaskManager(
options.agent_parameters, restore=checkpoint_settings.resume
)

61
ml-agents/mlagents/trainers/settings.py


return True, smoothing
return False, smoothing
class Lesson:
"""
Gathers the data of one lesson for one environment parameter including its name,
the condition that must be fullfiled for the lesson to be completed and a sampler
for the environment parameter. If the completion_criteria is None, then this is
the last lesson in the curriculum.
"""
value: ParameterRandomizationSettings
name: str
completion_criteria: Optional[CompletionCriteriaSettings] = attr.ib(default=None)
class ActiveLearnerSettings:
warmup_steps:int=30
capacity:int=600
num_mc:int=50
beta:float=1.96
raw_samples:int=128
num_restarts:int=1
class AgentParameterSettings:
class TaskParameterSettings:
active_learning: Optional[ActiveLearnerSettings] = None
def structure(d: Mapping, t: type):# -> Dict[str, AgentParameterSettings]:
def structure(d: Mapping, t: type) -> Dict[str, "TaskParameterSettings"]:
"""
Helper method to structure a Dict of EnvironmentParameterSettings class. Meant
to be registered with cattr.register_structure_hook() and called with

raise TrainerConfigError(
f"Unsupported agent environment parameter settings {d}."
)
d_final: Dict[str, AgentParameterSettings] = {}
d_final: Dict[str, TaskParameterSettings] = {}
activelearner_settings = None
sampler = ParameterRandomizationSettings.structure(
agent_parameter_config, ParameterRandomizationSettings
)
tmp_settings[agent_parameter] = sampler
d_final[behavior_name] = AgentParameterSettings(parameters=tmp_settings)
if agent_parameter == "active_learner":
activelearner_settings = ActiveLearnerSettings(**agent_parameter_config)
else:
sampler = ParameterRandomizationSettings.structure(
agent_parameter_config, ParameterRandomizationSettings
)
tmp_settings[agent_parameter] = sampler
d_final[behavior_name] = TaskParameterSettings(parameters=tmp_settings, active_learning=activelearner_settings)
# settings = AgentParameterSettings(parameters=d_final)
@attr.s(auto_attribs=True)
class Lesson:
"""
Gathers the data of one lesson for one environment parameter including its name,
the condition that must be fullfiled for the lesson to be completed and a sampler
for the environment parameter. If the completion_criteria is None, then this is
the last lesson in the curriculum.
"""
value: ParameterRandomizationSettings
name: str
completion_criteria: Optional[CompletionCriteriaSettings] = attr.ib(default=None)
@attr.s(auto_attribs=True)
class EnvironmentParameterSettings:

env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings)
engine_settings: EngineSettings = attr.ib(factory=EngineSettings)
environment_parameters: Optional[Dict[str, EnvironmentParameterSettings]] = None
agent_parameters: Optional[Dict[str, AgentParameterSettings]] = None
agent_parameters: Optional[Dict[str, TaskParameterSettings]] = None
checkpoint_settings: CheckpointSettings = attr.ib(factory=CheckpointSettings)
# These are options that are relevant to the run itself, and not the engine or environment.

Dict[str, EnvironmentParameterSettings], EnvironmentParameterSettings.structure
)
cattr.register_structure_hook(
Dict[str, AgentParameterSettings], AgentParameterSettings.structure
Dict[str, TaskParameterSettings], TaskParameterSettings.structure
)
cattr.register_structure_hook(Lesson, strict_to_cls)
cattr.register_structure_hook(

4
ml-agents/mlagents/trainers/trainer_controller.py


)
from mlagents.trainers.trainer import Trainer
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.active_learning_manager import ActiveLearningTaskManager
from mlagents.trainers.task_manager import TaskManager
from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.agent_processor import AgentManager

output_path: str,
run_id: str,
param_manager: EnvironmentParameterManager,
task_manager: ActiveLearningTaskManager,
task_manager: TaskManager,
train: bool,
training_seed: int,
):

1
ml-agents/mlagents/trainers/trainer_util.py


from mlagents_envs.logging_util import get_logger
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.active_learning_manager import ActiveLearningTaskManager
from mlagents.trainers.exception import TrainerConfigError
from mlagents.trainers.trainer import Trainer
from mlagents.trainers.exception import UnityTrainerException

40
config/ppo/WalkerStaticVariableSpeedActive.yaml


behaviors:
WalkerStaticVariableSpeed:
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 30000000
time_horizon: 1000
summary_freq: 30000
threaded: true
agent_parameters:
WalkerStaticVariableSpeed:
active_learner:
warmup_steps: 30
capacity: 600
num_mc: 500
beta: 1.96
raw_samples: 128
num_restarts: 1
targetWalkingSpeed:
sampler_type: uniform
sampler_parameters:
min_value: 0.1
max_value: 10

121
ml-agents/mlagents/trainers/task_manager.py


from typing import Dict, List, Tuple, Optional
from mlagents.trainers.settings import (
TaskParameterSettings,
ParameterRandomizationSettings,
)
from collections import defaultdict
from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.active_learning import ActiveLearningTaskSampler
logger = get_logger(__name__)
import torch
import numpy as np
class TaskManager:
def __init__(
self,
settings: Optional[Dict[str, TaskParameterSettings]] = None,
restore: bool = False,
):
"""
EnvironmentParameterManager manages all the environment parameters of a training
session. It determines when parameters should change and gives access to the
current sampler of each parameter.
:param settings: A dictionary from environment parameter to
EnvironmentParameterSettings.
:param restore: If true, the EnvironmentParameterManager will use the
GlobalTrainingStatus to try and reload the lesson status of each environment
parameter.
"""
if settings is None:
settings = {}
self._dict_settings = settings
self.behavior_names = list(self._dict_settings.keys())
self.param_names = {name: list(self._dict_settings[name].parameters.keys()) for name in self.behavior_names}
self._taskSamplers = {}
for behavior_name in self.behavior_names:
lows = []
highs = []
parameters = self._dict_settings[behavior_name].parameters
for parameter_name in self.param_names[behavior_name]:
low = parameters[parameter_name].min_value
high = parameters[parameter_name].max_value
lows.append(low)
highs.append(high)
task_ranges = torch.tensor([lows, highs]).float().T
active_hyps = self._dict_settings[behavior_name].active_learning
if active_hyps:
self._taskSamplers[behavior_name] = ActiveLearningTaskSampler(task_ranges,
warmup_steps=active_hyps.warmup_steps, capacity=active_hyps.capacity,
num_mc=active_hyps.num_mc, beta=active_hyps.beta,
raw_samples=active_hyps.raw_samples, num_restarts=active_hyps.num_restarts
)
else:
self._taskSamplers[behavior_name] = lambda n: uniform_sample(task_ranges, n)
self.t = {name: 0.0 for name in self.behavior_names}
def _make_task(self, behavior_name, tau):
task = {}
for i, name in enumerate(self.param_names[behavior_name]):
task[name] = tau[i]
return task
def _build_tau(self, behavior_name, task, time):
tau = []
for name in self.param_names[behavior_name]:
tau.append(task[name])
tau.append(time)
return torch.tensor(tau).float()
def get_tasks(self, behavior_name, num_samples) -> Dict[str, ParameterRandomizationSettings]:
"""
TODO
"""
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names
current_time = self.t[behavior_name] + 1
if isinstance(self._taskSamplers[behavior_name], ActiveLearningTaskSampler):
taus = self._taskSamplers[behavior_name].get_design_points(num_points=num_samples, time=current_time).data.numpy().tolist()
else:
taus = self._taskSamplers[behavior_name](num_samples).tolist()
tasks = [self._make_task(behavior_name, tau) for tau in taus]
return tasks
def update(self, behavior_name: str, task_perfs: List[Tuple[Dict, float]]
) -> Tuple[bool, bool]:
"""
TODO
"""
must_reset = False
updated = False
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names
if isinstance(self._taskSamplers[behavior_name], ActiveLearningTaskSampler):
updated = True
taus = []
perfs = []
for task, perf in task_perfs:
perfs.append(perf)
self.t[behavior_name] = self.t[behavior_name] + 1
tau = self._build_tau(behavior_name, task, self.t[behavior_name])
taus.append(tau)
X = torch.stack(taus, dim=0)
Y = torch.tensor(perfs).float()
self._taskSamplers[behavior_name].update_model(X, Y, refit=True)
return updated, must_reset
def uniform_sample(ranges, num_samples):
low = ranges[:, 0]
high = ranges[:, 1]
points = np.random.uniform(low=low, high=high, size=num_samples).reshape(num_samples, -1)
return points

100
ml-agents/mlagents/trainers/active_learning_manager.py


from typing import Dict, List, Tuple, Optional
from mlagents.trainers.settings import (
AgentParameterSettings,
ParameterRandomizationSettings,
)
from collections import defaultdict
from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.active_learning import ActiveLearningTaskSampler
logger = get_logger(__name__)
import torch
class ActiveLearningTaskManager:
def __init__(
self,
settings: Optional[Dict[str,AgentParameterSettings]] = None,
restore: bool = False,
):
"""
EnvironmentParameterManager manages all the environment parameters of a training
session. It determines when parameters should change and gives access to the
current sampler of each parameter.
:param settings: A dictionary from environment parameter to
EnvironmentParameterSettings.
:param restore: If true, the EnvironmentParameterManager will use the
GlobalTrainingStatus to try and reload the lesson status of each environment
parameter.
"""
if settings is None:
settings = {}
self._dict_settings = settings
self.behavior_names = list(self._dict_settings.keys())
self.param_names = {name: list(self._dict_settings[name].parameters.keys()) for name in self.behavior_names}
self._taskSamplers = {}
for behavior_name in self.behavior_names:
lows = []
highs = []
parameters = self._dict_settings[behavior_name].parameters
for parameter_name in self.param_names[behavior_name]:
low = parameters[parameter_name].min_value
high = parameters[parameter_name].max_value
lows.append(low)
highs.append(high)
task_ranges = torch.tensor([lows, highs]).float().T
self._taskSamplers[behavior_name] = ActiveLearningTaskSampler(task_ranges)
self.t = {name: 0.0 for name in self.behavior_names}
def _make_task(self, behavior_name, tau):
task = {}
for i, name in enumerate(self.param_names[behavior_name]):
task[name] = tau[i]
return task
def _build_tau(self, behavior_name, task, time):
tau = []
for name in self.param_names[behavior_name]:
tau.append(task[name])
tau.append(time)
return torch.tensor(tau).float()
def get_tasks(self, behavior_name, num_samples) -> Dict[str, ParameterRandomizationSettings]:
"""
TODO
"""
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names
current_time = self.t[behavior_name] + 1
taus = self._taskSamplers[behavior_name].get_design_points(num_points=num_samples, time=current_time).data.numpy().tolist()
tasks = [self._make_task(behavior_name, tau) for tau in taus]
return tasks
def update(self, behavior_name: str, task_perfs: List[Tuple[Dict, float]]
) -> Tuple[bool, bool]:
"""
TODO
"""
must_reset = False
updated = True
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names
taus = []
perfs = []
for task, perf in task_perfs:
perfs.append(perf)
self.t[behavior_name] = self.t[behavior_name] + 1
tau = self._build_tau(behavior_name, task, self.t[behavior_name])
taus.append(tau)
X = torch.stack(taus, dim=0)
Y = torch.tensor(perfs).float()
self._taskSamplers[behavior_name].update_model(X, Y, refit=True)
return updated, must_reset
正在加载...
取消
保存