Hotfix 0.3.0c (#618)

Fixes the following issues: * Missing component reference in BananaRL environment. * Neural Network for multiple visual observations was not properly generated. * Episode time-out value estimate bootstrapping used incorrect observation as input.
7 年前 · 237b41f9
--- a/docs/Getting-Started-with-Balance-Ball.md
+++ b/docs/Getting-Started-with-Balance-Ball.md

 To summarize, go to your command line, enter the `ml-agents` directory and type: 

-```python
+```
 python3 python/learn.py <env_file_path> --run-id=<run-identifier> --train 
 ```
 **Note**: If you're using Anaconda, don't forget to activate the ml-agents environment first.
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
 ![Banana](images/banana.png)

 * Set-up: A multi-agent environment where agents compete to collect bananas. 
-* Goal: The agents must learn to move to as many yellow bananas as possible while avoiding red bananas.
-* Agents: The environment contains 10 agents linked to a single brain.
+* Goal: The agents must learn to move to as many yellow bananas as possible while avoiding blue bananas.
+* Agents: The environment contains 5 agents linked to a single brain.
-    * -1 for interaction with red banana.
+    * -1 for interaction with blue banana.
-    * Vector Observation space: (Continuous) 51 corresponding to velocity of agent, plus ray-based perception of objects around agent's forward direction.
+    * Vector Observation space: (Continuous) 53 corresponding to velocity of agent (2), whether agent is frozen and/or shot its laser (2), plus ray-based perception of objects around agent's forward direction (49; 7 raycast angles with 7 measurements for each).
-    * Visual Observations (Optional): First-person view for each agent. 
+    * Visual Observations (Optional; None by default): First-person view for each agent. 
 * Reset Parameters: None

 ## Hallway
--- a/python/trainer_config.yaml
+++ b/python/trainer_config.yaml

 Ball3DBrain:
    normalize: true
+    batch_size: 1200
+    buffer_size: 12000
+    summary_freq: 1000
+    time_horizon: 1000
+    gamma: 0.995
+    beta: 0.001

 BouncerBrain:
    normalize: true
--- a/python/unitytrainers/bc/trainer.py
+++ b/python/unitytrainers/bc/trainer.py
                        self.episode_steps[agent_id] = 0
                    self.episode_steps[agent_id] += 1

-    def process_experiences(self, info: AllBrainInfo):
+    def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
-        :param info: Current AllBrainInfo
+        :param current_info: Current AllBrainInfo
+        :param next_info: Next AllBrainInfo
-        info_teacher = info[self.brain_to_imitate]
+        info_teacher = next_info[self.brain_to_imitate]
        for l in range(len(info_teacher.agents)):
            if ((info_teacher.local_done[l] or
                 len(self.training_buffer[info_teacher.agents[l]]['actions']) > self.trainer_parameters[
                                                          training_length=self.sequence_length)
                self.training_buffer[agent_id].reset_agent()

-        info_student = info[self.brain_name]
+        info_student = next_info[self.brain_name]
        for l in range(len(info_student.agents)):
            if info_student.local_done[l]:
                agent_id = info_student.agents[l]
--- a/python/unitytrainers/models.py
+++ b/python/unitytrainers/models.py
                                     kernel_initializer=c_layers.variance_scaling_initializer(1.0))
        return hidden

-    def create_visual_encoder(self, h_size, activation, num_layers):
+    def create_visual_encoder(self, image_input, h_size, activation, num_layers):
+        :param image_input: The placeholder for the image input to use.
-        conv1 = tf.layers.conv2d(self.visual_in[-1], 16, kernel_size=[8, 8], strides=[4, 4],
+        conv1 = tf.layers.conv2d(image_input, 16, kernel_size=[8, 8], strides=[4, 4],
                                 activation=tf.nn.elu)
        conv2 = tf.layers.conv2d(conv1, 32, kernel_size=[4, 4], strides=[2, 2],
                                 activation=tf.nn.elu)
            hidden_state, hidden_visual = None, None
            if brain.number_visual_observations > 0:
                for j in range(brain.number_visual_observations):
-                    encoded_visual = self.create_visual_encoder(h_size, activation_fn, num_layers)
+                    encoded_visual = self.create_visual_encoder(self.visual_in[j], h_size, activation_fn, num_layers)
                    visual_encoders.append(encoded_visual)
                hidden_visual = tf.concat(visual_encoders, axis=1)
            if brain.vector_observation_space_size > 0:
--- a/python/unitytrainers/ppo/trainer.py
+++ b/python/unitytrainers/ppo/trainer.py
                        self.episode_steps[agent_id] = 0
                    self.episode_steps[agent_id] += 1

-
-    def process_experiences(self, all_info: AllBrainInfo):
+    def process_experiences(self, current_info: AllBrainInfo, new_info: AllBrainInfo):
-        :param all_info: Dictionary of all current brains and corresponding BrainInfo.
+        :param current_info: Dictionary of all current brains and corresponding BrainInfo.
+        :param new_info: Dictionary of all next brains and corresponding BrainInfo.
-        info = all_info[self.brain_name]
+        info = new_info[self.brain_name]
+        last_info = current_info[self.brain_name]
        for l in range(len(info.agents)):
            agent_actions = self.training_buffer[info.agents[l]]['actions']
            if ((info.local_done[l] or len(agent_actions) > self.trainer_parameters['time_horizon'])
                else:
-                    feed_dict = {self.model.batch_size: len(info.vector_observations), self.model.sequence_length: 1}
+                    if info.max_reached[l]:
+                        bootstrapping_info = last_info
+                    else:
+                        bootstrapping_info = info
+                    feed_dict = {self.model.batch_size: len(bootstrapping_info.vector_observations), self.model.sequence_length: 1}
-                        for i in range(len(info.visual_observations)):
-                            feed_dict[self.model.visual_in[i]] = info.visual_observations[i]
+                        for i in range(len(bootstrapping_info.visual_observations)):
+                            feed_dict[self.model.visual_in[i]] = bootstrapping_info.visual_observations[i]
-                        feed_dict[self.model.vector_in] = info.vector_observations
+                        feed_dict[self.model.vector_in] = bootstrapping_info.vector_observations
-                        if info.memories.shape[1] == 0:
-                            info.memories = np.zeros((len(info.vector_observations), self.m_size))
-                        feed_dict[self.model.memory_in] = info.memories
+                        if bootstrapping_info.memories.shape[1] == 0:
+                            bootstrapping_info.memories = np.zeros((len(bootstrapping_info.vector_observations), self.m_size))
+                        feed_dict[self.model.memory_in] = bootstrapping_info.memories
-                        feed_dict[self.model.prev_action] = np.reshape(info.previous_vector_actions, [-1])
+                        feed_dict[self.model.prev_action] = np.reshape(bootstrapping_info.previous_vector_actions, [-1])
                    value_next = self.sess.run(self.model.value, feed_dict)[l]
                agent_id = info.agents[l]

--- a/python/unitytrainers/trainer.py
+++ b/python/unitytrainers/trainer.py
        """
        raise UnityTrainerException("The add_experiences method was not implemented.")

-    def process_experiences(self, info: AllBrainInfo):
+    def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
-        :param info: Dictionary of all current brains and corresponding BrainInfo.
+        :param current_info: Dictionary of all current-step brains and corresponding BrainInfo.
+        :param next_info: Dictionary of all next-step brains and corresponding BrainInfo.
        """
        raise UnityTrainerException("The process_experiences method was not implemented.")

--- a/python/unitytrainers/trainer_controller.py
+++ b/python/unitytrainers/trainer_controller.py

                    for brain_name, trainer in self.trainers.items():
                        trainer.add_experiences(curr_info, new_info, take_action_outputs[brain_name])
-                    curr_info = new_info
-                    for brain_name, trainer in self.trainers.items():
-                        trainer.process_experiences(curr_info)
+                        trainer.process_experiences(curr_info, new_info)
-                        # Write training statistics to tensorboard.
+                        # Write training statistics to Tensorboard.
                        trainer.write_summary(self.env.curriculum.lesson_number)
                        if self.train_model and trainer.get_step <= trainer.get_max_steps:
                            trainer.increment_step()
                    if global_step % self.save_freq == 0 and global_step != 0 and self.train_model:
                        # Save Tensorflow model
                        self._save_model(sess, steps=global_step, saver=saver)
-
+                    curr_info = new_info
                # Final save Tensorflow model
                if global_step != 0 and self.train_model:
                    self._save_model(sess,  steps=global_step, saver=saver)
--- a/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/BananaRL.unity
+++ b/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/BananaRL.unity
 --- !u!104 &2
 RenderSettings:
  m_ObjectHideFlags: 0
-  serializedVersion: 9
+  serializedVersion: 8
  m_Fog: 0
  m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1}
  m_FogMode: 3
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
  m_IndirectSpecularColor: {r: 0, g: 0, b: 0, a: 1}
-  m_UseRadianceAmbientProbe: 0
-  m_GIWorkflowMode: 0
+  m_GIWorkflowMode: 1
  m_GISettings:
    serializedVersion: 2
    m_BounceScale: 1
    m_EnableBakedLightmaps: 1
    m_EnableRealtimeLightmaps: 1
  m_LightmapEditorSettings:
-    serializedVersion: 10
+    serializedVersion: 9
-    m_AtlasSize: 1024
+    m_TextureWidth: 1024
+    m_TextureHeight: 1024
    m_AO: 1
    m_AOMaxDistance: 1
    m_CompAOExponent: 1
      objectReference: {fileID: 0}
    - target: {fileID: 1819751139121548, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
      propertyPath: m_IsActive
-      value: 0
+      value: 1
+    - target: {fileID: 114508049814297234, guid: 38400a68c4ea54b52998e34ee238d1a7,
+        type: 2}
+      propertyPath: myAcademyObj
+      value: 
+      objectReference: {fileID: 1574236047}
    m_RemovedComponents: []
  m_ParentPrefab: {fileID: 100100000, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
  m_IsPrefabParent: 0
      objectReference: {fileID: 0}
    - target: {fileID: 1819751139121548, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
      propertyPath: m_IsActive
-      value: 0
+      value: 1
+    - target: {fileID: 114508049814297234, guid: 38400a68c4ea54b52998e34ee238d1a7,
+        type: 2}
+      propertyPath: myAcademyObj
+      value: 
+      objectReference: {fileID: 1574236047}
    m_RemovedComponents: []
  m_ParentPrefab: {fileID: 100100000, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
  m_IsPrefabParent: 0
  m_TargetEye: 3
  m_HDR: 1
  m_AllowMSAA: 1
-  m_AllowDynamicResolution: 0
  m_ForceIntoRT: 1
  m_OcclusionCulling: 1
  m_StereoConvergence: 10
      objectReference: {fileID: 0}
    - target: {fileID: 1819751139121548, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
      propertyPath: m_IsActive
-      value: 0
+      value: 1
+    - target: {fileID: 114508049814297234, guid: 38400a68c4ea54b52998e34ee238d1a7,
+        type: 2}
+      propertyPath: myAcademyObj
+      value: 
+      objectReference: {fileID: 1574236047}
    m_RemovedComponents: []
  m_ParentPrefab: {fileID: 100100000, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
  m_IsPrefabParent: 0