Added comments and new yaml files for variable speed walker

4 年前 · e33168d6
--- a/config/ppo/WalkerStaticVariableSpeed.yaml
+++ b/config/ppo/WalkerStaticVariableSpeed.yaml
    threaded: true
 agent_parameters:
  WalkerStaticVariableSpeed:
+    num_repeat: 1
+    num_batch: 1
    targetWalkingSpeed:
      sampler_type: uniform
      sampler_parameters:
--- a/config/ppo/WalkerStaticVariableSpeedActive.yaml
+++ b/config/ppo/WalkerStaticVariableSpeedActive.yaml
    threaded: true
 agent_parameters:
  WalkerStaticVariableSpeed:
+    num_repeat: 8
+    num_batch: 16
-      warmup_steps: 30
+      warmup_steps: 600
-      num_mc: 500
+      num_mc: 100
-      raw_samples: 128
+      raw_samples: 100
      num_restarts: 1
    targetWalkingSpeed:
      sampler_type: uniform
--- a/ml-agents/mlagents/trainers/active_learning.py
+++ b/ml-agents/mlagents/trainers/active_learning.py
        maximize: bool = True,
    ) -> None:
        r"""q-Espected Improvement of Skill Performance. 
-
+        
        Args:
            model: A fitted model.
            beta: value to trade off between upper confidence bound and mean of fantasized performance.
        xdims = train_X.shape[-1]
        self.Kspatial = ScaleKernel(RBFKernel(active_dims=torch.tensor(list(range(xdims-1)))))
        self.Ktime = ScaleKernel(RBFKernel(active_dims=torch.tensor([xdims-1])))
-        # Kspatial = ScaleKernel(RBFKernel())
-        # Ktime = ScaleKernel(RBFKernel())
-        # self.covar_module = ScaleKernel(RBFKernel()) # AdditiveKernel(Kspatial, ProductKernel(Kspatial, Ktime))
        self.covar_module = AdditiveKernel(self.Kspatial, ProductKernel(self.Kspatial, self.Ktime))
        self.to(train_X)  # make sure we're on the right device/dtype
        
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py

@attr.s(auto_attribs=True)
 class ActiveLearnerSettings:
-    warmup_steps:int=30
-    capacity:int=600
-    num_mc:int=50
-    beta:float=1.96
-    raw_samples:int=128
-    num_restarts:int=1
+    warmup_steps:int=30     # number of data points before active learning is used
+    capacity:int=600        # maximum number of data points to store
+    num_mc:int=50           # number of monte-carlo points to intergrate over task distribution
+    beta:float=1.96         # upper confidence bound parameter ucb = mean + beta * std
+    raw_samples:int=128     # number of task samples to generate before selecting one to optimized
+    num_restarts:int=1      # how many different task parameters to try and optimize at once before choosing the best. 

@attr.s(auto_attribs=True)
 class TaskParameterSettings:
-    num_repeat:Optional[int]=1
-    num_batch:Optional[int]=1
+    num_repeat:Optional[int]=1  # number of times to repeat a sampled skill
+    num_batch:Optional[int]=1   # minimum number of skills to get at once

    @staticmethod
    def structure(d: Mapping, t: type) -> Dict[str, "TaskParameterSettings"]:
--- a/ml-agents/mlagents/trainers/task_manager.py
+++ b/ml-agents/mlagents/trainers/task_manager.py
            self.num_batch[behavior_name] = self._dict_settings[behavior_name].num_batch
            
            active_hyps = self._dict_settings[behavior_name].active_learning
-            if active_hyps:
+            if active_hyps:  # use active learning
-            else:
+            else:  # use uniform random sampling
-        print("num batch", self.num_batch)
+
+        """
+        converts array to dictionary so it can be passed to c# side through agent parameter channel
+        """
        task = {}
        for i, name in enumerate(self.param_names[behavior_name]):
            task[name] = tau[i]
+        """
+        converts a dictionary description of the task to a vector representation and adds the time parameter. 
+        """
        tau = []
        for name in self.param_names[behavior_name]:
            tau.append(task[name])
    def get_tasks(self, behavior_name, num_samples) -> Dict[str, ParameterRandomizationSettings]:
        """
-        TODO
+        Samples task parameters to pass to agents
        """
        behavior_name = [bname for bname in self.behavior_names if bname in behavior_name][0] # TODO make work with actual behavior names
        current_time = self.t[behavior_name] + 1
        return tasks_repeated

    def add_run(self, behavior_name, tau, perf):
+        """
+        adds a finished run to the buffer organized by tau
+        """
+        """
+        Compiles performances that have been completed
+        """
        taus = []
        perfs = []
        t = self.t[behavior_name]
    def update(self, behavior_name: str, task_perfs: List[Tuple[Dict, float]]
    ) -> Tuple[bool, bool]:
        """
-        TODO
+        Updates the model of the task performance
        """

        must_reset = False