Working continuous updates

5 年前 · bc04f9dc
--- a/ml-agents/mlagents/trainers/ppo/models.py
+++ b/ml-agents/mlagents/trainers/ppo/models.py
            num_layers = 1
        if brain.vector_action_space_type == "continuous":
            self.create_cc_actor(h_size, num_layers, vis_encode_type)
-            self.entropy = tf.ones_like(tf.reshape(self.entropy, [-1])) * self.entropy
        else:
            self.create_dc_actor_critic(h_size, num_layers, vis_encode_type)


        self.all_log_probs = tf.identity(all_probs, name="action_probs")

-        self.entropy = 0.5 * tf.reduce_mean(
+        single_dim_entropy = 0.5 * tf.reduce_mean(
+        # Make entropy the right shape
+        self.entropy = tf.ones_like(tf.reshape(mu[:, 0], [-1])) * single_dim_entropy

        # We keep these tensors the same name, but use new nodes to keep code parallelism with discrete control.
        self.log_probs = tf.reduce_sum(
--- a/ml-agents/mlagents/trainers/ppo/optimizer.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer.py
        :param out_dict: Output dictionary mapping names to nodes.
        :return: Dictionary mapping names to input data.
        """
-        print(feed_dict)
        network_out = self.sess.run(list(out_dict.values()), feed_dict=feed_dict)
        run_out = dict(zip(list(out_dict.keys()), network_out))
        return run_out