浏览代码

Added comments and new yaml files for variable speed walker

/active-variablespeed
Scott Jordan 4 年前
当前提交
e33168d6
共有 5 个文件被更改,包括 33 次插入20 次删除
  1. 2
      config/ppo/WalkerStaticVariableSpeed.yaml
  2. 8
      config/ppo/WalkerStaticVariableSpeedActive.yaml
  3. 5
      ml-agents/mlagents/trainers/active_learning.py
  4. 16
      ml-agents/mlagents/trainers/settings.py
  5. 22
      ml-agents/mlagents/trainers/task_manager.py

2
config/ppo/WalkerStaticVariableSpeed.yaml


threaded: true
agent_parameters:
WalkerStaticVariableSpeed:
num_repeat: 1
num_batch: 1
targetWalkingSpeed:
sampler_type: uniform
sampler_parameters:

8
config/ppo/WalkerStaticVariableSpeedActive.yaml


threaded: true
agent_parameters:
WalkerStaticVariableSpeed:
num_repeat: 8
num_batch: 16
warmup_steps: 30
warmup_steps: 600
num_mc: 500
num_mc: 100
raw_samples: 128
raw_samples: 100
num_restarts: 1
targetWalkingSpeed:
sampler_type: uniform

5
ml-agents/mlagents/trainers/active_learning.py


maximize: bool = True,
) -> None:
r"""q-Espected Improvement of Skill Performance.
Args:
model: A fitted model.
beta: value to trade off between upper confidence bound and mean of fantasized performance.

xdims = train_X.shape[-1]
self.Kspatial = ScaleKernel(RBFKernel(active_dims=torch.tensor(list(range(xdims-1)))))
self.Ktime = ScaleKernel(RBFKernel(active_dims=torch.tensor([xdims-1])))
# Kspatial = ScaleKernel(RBFKernel())
# Ktime = ScaleKernel(RBFKernel())
# self.covar_module = ScaleKernel(RBFKernel()) # AdditiveKernel(Kspatial, ProductKernel(Kspatial, Ktime))
self.covar_module = AdditiveKernel(self.Kspatial, ProductKernel(self.Kspatial, self.Ktime))
self.to(train_X) # make sure we're on the right device/dtype

16
ml-agents/mlagents/trainers/settings.py


@attr.s(auto_attribs=True)
class ActiveLearnerSettings:
warmup_steps:int=30
capacity:int=600
num_mc:int=50
beta:float=1.96
raw_samples:int=128
num_restarts:int=1
warmup_steps:int=30 # number of data points before active learning is used
capacity:int=600 # maximum number of data points to store
num_mc:int=50 # number of monte-carlo points to intergrate over task distribution
beta:float=1.96 # upper confidence bound parameter ucb = mean + beta * std
raw_samples:int=128 # number of task samples to generate before selecting one to optimized
num_restarts:int=1 # how many different task parameters to try and optimize at once before choosing the best.
@attr.s(auto_attribs=True)
class TaskParameterSettings:

num_repeat:Optional[int]=1
num_batch:Optional[int]=1
num_repeat:Optional[int]=1 # number of times to repeat a sampled skill
num_batch:Optional[int]=1 # minimum number of skills to get at once
@staticmethod
def structure(d: Mapping, t: type) -> Dict[str, "TaskParameterSettings"]:

22
ml-agents/mlagents/trainers/task_manager.py


self.num_batch[behavior_name] = self._dict_settings[behavior_name].num_batch
active_hyps = self._dict_settings[behavior_name].active_learning
if active_hyps:
if active_hyps: # use active learning
else:
else: # use uniform random sampling
print("num batch", self.num_batch)
"""
converts array to dictionary so it can be passed to c# side through agent parameter channel
"""
task = {}
for i, name in enumerate(self.param_names[behavior_name]):
task[name] = tau[i]

"""
converts a dictionary description of the task to a vector representation and adds the time parameter.
"""
tau = []
for name in self.param_names[behavior_name]:
tau.append(task[name])

def get_tasks(self, behavior_name, num_samples) -> Dict[str, ParameterRandomizationSettings]:
"""
TODO
Samples task parameters to pass to agents
"""
behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names
current_time = self.t[behavior_name] + 1

return tasks_repeated
def add_run(self, behavior_name, tau, perf):
"""
adds a finished run to the buffer organized by tau
"""
"""
Compiles performances that have been completed
"""
taus = []
perfs = []
t = self.t[behavior_name]

def update(self, behavior_name: str, task_perfs: List[Tuple[Dict, float]]
) -> Tuple[bool, bool]:
"""
TODO
Updates the model of the task performance
"""
must_reset = False

正在加载...
取消
保存