Merge pull request #4385 from Unity-Technologies/release_2_verified-barracuda-1.0.2

update verified brach with barracuda 1.0.2
4 年前 · f7373172
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
        # flake8-tidy-imports is used for banned-modules, not actually tidying
        additional_dependencies: [flake8-comprehensions==3.2.2, flake8-tidy-imports==4.1.0, flake8-bugbear==20.1.4]

+-   repo: https://github.com/asottile/pyupgrade
+    rev: v2.7.0
+    hooks:
+    -   id: pyupgrade
+        args: [--py3-plus, --py36-plus]
+        exclude: >
+            (?x)^(
+                .*barracuda.py|
+                .*_pb2.py|
+                .*_pb2_grpc.py
+            )$
+
 -   repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v2.5.0
    hooks:
--- a/.yamato/com.unity.ml-agents-test.yml
+++ b/.yamato/com.unity.ml-agents-test.yml
    image: {{ platform.image }}
    flavor: {{ platform.flavor}}
  commands:
-    - python -m pip install unity-downloader-cli --extra-index-url https://artifactory.eu-cph-1.unityops.net/api/pypi/common-python/simple
+    - python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/unity-pypi-local/simple --upgrade
    - unity-downloader-cli -u trunk -c editor --wait --fast
    - npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
    - upm-ci package test -u {{ editor.version }} --package-path com.unity.ml-agents {{ editor.coverageOptions }}
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
  variables:
    UNITY_VERSION: {{ editor.version }}
  commands:
-    - pip install pyyaml
+    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
    - python -u -m ml-agents.tests.yamato.setup_venv
    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
  dependencies:
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "Project/**"
-        - "ml-agents/**"
-        - "ml-agents-envs/**"
-        - ".yamato/gym-interface-test.yml"
-      except:
-        - "*.md"
-        - "com.unity.ml-agents/*.md"
-        - "com.unity.ml-agents/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR
+      pull_request.changes.any match "Project/**" OR
+      pull_request.changes.any match "ml-agents/**" OR
+      pull_request.changes.any match "ml-agents-envs/**" OR
+      pull_request.changes.any match "gym-unity/**" OR
+      pull_request.changes.any match ".yamato/gym-interface-test.yml") AND
+      NOT pull_request.changes.all match "**/*.md"
 {% endfor %}
--- a/.yamato/protobuf-generation-test.yml
+++ b/.yamato/protobuf-generation-test.yml
      nuget install Grpc.Tools -Version $GRPC_VERSION -OutputDirectory protobuf-definitions/
      python3 -m venv venv
      . venv/bin/activate
-      pip install --upgrade pip
-      pip install grpcio-tools==1.13.0  --progress-bar=off
-      pip install mypy-protobuf==1.16.0 --progress-bar=off
+      pip install --upgrade pip --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      pip install grpcio==1.28.1 grpcio-tools==1.13.0 protobuf==3.11.3 six==1.14.0 mypy-protobuf==1.16.0  --progress-bar=off --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
      cd protobuf-definitions
      chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/protoc
      chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/grpc_csharp_plugin
--- a/.yamato/python-ll-api-test.yml
+++ b/.yamato/python-ll-api-test.yml
  variables:
    UNITY_VERSION: {{ editor.version }}
  commands:
-    - pip install pyyaml
+    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py 
+    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer
    cancel_old_ci: true
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "Project/**"
-        - "ml-agents/**"
-        - "ml-agents-envs/**"
-        - ".yamato/python-ll-api-test.yml"
-      except:
-        - "*.md"
-        - "com.unity.ml-agents/*.md"
-        - "com.unity.ml-agents/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR
+      pull_request.changes.any match "Project/**" OR
+      pull_request.changes.any match "ml-agents/**" OR
+      pull_request.changes.any match "ml-agents-envs/**" OR
+      pull_request.changes.any match ".yamato/python-ll-api-test.yml") AND
+      NOT pull_request.changes.all match "**/*.md"
 {% endfor %}
--- a/.yamato/standalone-build-test.yml
+++ b/.yamato/standalone-build-test.yml
  variables:
    UNITY_VERSION: {{ editor.version }}
  commands:
-    - pip install pyyaml
+    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
    - python -u -m ml-agents.tests.yamato.standalone_build_tests
    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity 
--- a/.yamato/training-int-tests.yml
+++ b/.yamato/training-int-tests.yml
  variables:
    UNITY_VERSION: {{ editor.version }}
  commands:
-    - pip install pyyaml
+    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
    - python -u -m ml-agents.tests.yamato.training_int_tests
    # Backwards-compatibility tests.
    # If we make a breaking change to the communication protocol, these will need
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 and this project adheres to
 [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

+## [1.0.4] - 2020-08-19
+### Minor Changes
+#### com.unity.ml-agents (C#)
+- Update Barracuda to 1.0.2. (#4385)
+- Explicitly call out dependencies in package.json.
+### Bug Fixes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
 ## [1.0.3] - 2020-07-07
 ### Minor Changes
 #### com.unity.ml-agents (C#)
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
        /// Unity package version of com.unity.ml-agents.
        /// This must match the version string in package.json and is checked in a unit test.
        /// </summary>
-        internal const string k_PackageVersion = "1.0.3";
+        internal const string k_PackageVersion = "1.0.4";

        const int k_EditorTrainingPort = 5004;

--- a/com.unity.ml-agents/package.json
+++ b/com.unity.ml-agents/package.json
 {
  "name": "com.unity.ml-agents",
  "displayName": "ML Agents",
-  "version": "1.0.3",
+  "version": "1.0.4",
-    "com.unity.barracuda": "1.0.1"
+    "com.unity.barracuda": "1.0.2",
+    "com.unity.modules.imageconversion": "1.0.0",
+    "com.unity.modules.jsonserialize": "1.0.0",
+    "com.unity.modules.physics": "1.0.0",
+    "com.unity.modules.physics2d": "1.0.0"
  }
 }
--- a/gym-unity/setup.py
+++ b/gym-unity/setup.py
        tag = os.getenv("CIRCLE_TAG")

        if tag != EXPECTED_TAG:
-            info = "Git tag: {0} does not match the expected tag of this app: {1}".format(
+            info = "Git tag: {} does not match the expected tag of this app: {}".format(
                tag, EXPECTED_TAG
            )
            sys.exit(info)
    author_email="ML-Agents@unity3d.com",
    url="https://github.com/Unity-Technologies/ml-agents",
    packages=find_packages(),
-    install_requires=["gym", "mlagents_envs=={}".format(VERSION)],
+    install_requires=["gym", f"mlagents_envs=={VERSION}"],
    cmdclass={"verify": VerifyVersionCommand},
 )
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
        :returns: The DecisionStep
        """
        if agent_id not in self.agent_id_to_index:
-            raise KeyError(
-                "agent_id {} is not present in the DecisionSteps".format(agent_id)
-            )
+            raise KeyError(f"agent_id {agent_id} is not present in the DecisionSteps")
        agent_index = self._agent_id_to_index[agent_id]  # type: ignore
        agent_obs = []
        for batched_obs in self.obs:
        specific agent
        """
        if agent_id not in self.agent_id_to_index:
-            raise KeyError(
-                "agent_id {} is not present in the TerminalSteps".format(agent_id)
-            )
+            raise KeyError(f"agent_id {agent_id} is not present in the TerminalSteps")
        agent_index = self._agent_id_to_index[agent_id]  # type: ignore
        agent_obs = []
        for batched_obs in self.obs:
--- a/ml-agents-envs/mlagents_envs/communicator.py
+++ b/ml-agents-envs/mlagents_envs/communicator.py
 from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto


-class Communicator(object):
+class Communicator:
    def __init__(self, worker_id=0, base_port=5005):
        """
        Python side of the communication. Must be used in pair with the right Unity Communicator equivalent.
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
            for _sc in side_channels:
                if _sc.channel_id in self.side_channels:
                    raise UnityEnvironmentException(
-                        "There cannot be two side channels with the same channel id {0}.".format(
+                        "There cannot be two side channels with the same channel id {}.".format(
                            _sc.channel_id
                        )
                    )
            .replace(".x86", "")
        )
        true_filename = os.path.basename(os.path.normpath(env_path))
-        logger.debug("The true file name is {}".format(true_filename))
+        logger.debug(f"The true file name is {true_filename}")

        if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
            return None
                f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
            )
        else:
-            logger.debug("This is the launch string {}".format(launch_string))
+            logger.debug(f"This is the launch string {launch_string}")
            # Launch Unity environment
            subprocess_args = [launch_string]
            if no_graphics:
    def _assert_behavior_exists(self, behavior_name: str) -> None:
        if behavior_name not in self._env_specs:
            raise UnityActionException(
-                "The group {0} does not correspond to an existing agent group "
+                "The group {} does not correspond to an existing agent group "
                "in the environment".format(behavior_name)
            )

        expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
        if action.shape != expected_shape:
            raise UnityActionException(
-                "The behavior {0} needs an input of dimension {1} for "
+                "The behavior {} needs an input of dimension {} for "
-                "dimension {2}".format(behavior_name, expected_shape, action.shape)
+                "dimension {}".format(behavior_name, expected_shape, action.shape)
            )
        if action.dtype != expected_type:
            action = action.astype(expected_type)
        expected_shape = (spec.action_size,)
        if action.shape != expected_shape:
            raise UnityActionException(
-                f"The Agent {0} with BehaviorName {1} needs an input of dimension "
-                f"{2} but received input of dimension {3}".format(
-                    agent_id, behavior_name, expected_shape, action.shape
-                )
+                f"The Agent {agent_id} with BehaviorName {behavior_name} needs an input of dimension "
+                f"{expected_shape} but received input of dimension {action.shape}"
            )
        expected_type = np.float32 if spec.is_action_continuous() else np.int32
        if action.dtype != expected_type:
                )
            if len(message_data) != message_len:
                raise UnityEnvironmentException(
-                    "The message received by the side channel {0} was "
+                    "The message received by the side channel {} was "
                    "unexpectedly short. Make sure your Unity Environment "
                    "sending side channel data properly.".format(channel_id)
                )
            else:
                logger.warning(
                    "Unknown side channel data received. Channel type "
-                    ": {0}.".format(channel_id)
+                    ": {}.".format(channel_id)
                )

    @staticmethod
--- a/ml-agents-envs/mlagents_envs/exception.py
+++ b/ml-agents-envs/mlagents_envs/exception.py

    def __init__(self, worker_id):
        message = self.MESSAGE_TEMPLATE.format(str(worker_id))
-        super(UnityWorkerInUseException, self).__init__(message)
+        super().__init__(message)
--- a/ml-agents-envs/mlagents_envs/rpc_communicator.py
+++ b/ml-agents-envs/mlagents_envs/rpc_communicator.py
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            s.bind(("localhost", port))
-        except socket.error:
+        except OSError:
            raise UnityWorkerInUseException(self.worker_id)
        finally:
            s.close()
--- a/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py
        FLOAT = 0

    def __init__(self) -> None:
-        channel_id = uuid.UUID(("534c891e-810f-11ea-a9d0-822485860400"))
+        channel_id = uuid.UUID("534c891e-810f-11ea-a9d0-822485860400")
        super().__init__(channel_id)

    def on_message_received(self, msg: IncomingMessage) -> None:
--- a/ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py
    def __init__(self, channel_id: uuid.UUID = None) -> None:
        self._float_properties: Dict[str, float] = {}
        if channel_id is None:
-            channel_id = uuid.UUID(("60ccf7d0-4f7e-11ea-b238-784f4387d1f7"))
+            channel_id = uuid.UUID("60ccf7d0-4f7e-11ea-b238-784f4387d1f7")
        super().__init__(channel_id)

    def on_message_received(self, msg: IncomingMessage) -> None:
--- a/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
    sender = RawBytesChannel(guid)
    receiver = RawBytesChannel(guid)

-    sender.send_raw_data("foo".encode("ascii"))
-    sender.send_raw_data("bar".encode("ascii"))
+    sender.send_raw_data(b"foo")
+    sender.send_raw_data(b"bar")

    data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
    UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
--- a/ml-agents-envs/setup.py
+++ b/ml-agents-envs/setup.py
        tag = os.getenv("CIRCLE_TAG")

        if tag != EXPECTED_TAG:
-            info = "Git tag: {0} does not match the expected tag of this app: {1}".format(
+            info = "Git tag: {} does not match the expected tag of this app: {}".format(
                tag, EXPECTED_TAG
            )
            sys.exit(info)
--- a/ml-agents/mlagents/trainers/buffer.py
+++ b/ml-agents/mlagents/trainers/buffer.py
        super().__init__()

    def __str__(self):
-        return ", ".join(["'{0}' : {1}".format(k, str(self[k])) for k in self.keys()])
+        return ", ".join(["'{}' : {}".format(k, str(self[k])) for k in self.keys()])

    def reset_agent(self) -> None:
        """
            key_list = list(self.keys())
        if not self.check_length(key_list):
            raise BufferException(
-                "The length of the fields {0} were not of same length".format(key_list)
+                f"The length of the fields {key_list} were not of same length"
            )
        for field_key in key_list:
            target_buffer[field_key].extend(
--- a/ml-agents/mlagents/trainers/components/bc/model.py
+++ b/ml-agents/mlagents/trainers/components/bc/model.py
 from mlagents.trainers.policy.tf_policy import TFPolicy


-class BCModel(object):
+class BCModel:
    def __init__(
        self, policy: TFPolicy, learning_rate: float = 3e-4, anneal_steps: int = 0
    ):
--- a/ml-agents/mlagents/trainers/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/components/bc/module.py
        for k in param_keys:
            if k not in config_dict:
                raise UnityTrainerException(
-                    "The required pre-training hyper-parameter {0} was not defined. Please check your \
+                    "The required pre-training hyper-parameter {} was not defined. Please check your \
                    trainer YAML file.".format(
                        k
                    )
--- a/ml-agents/mlagents/trainers/components/reward_signals/init.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/init.py
        for k in param_keys:
            if k not in config_dict:
                raise UnityTrainerException(
-                    "The hyper-parameter {0} could not be found for {1}.".format(
+                    "The hyper-parameter {} could not be found for {}.".format(
                        k, cls.__name__
                    )
                )
--- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
 from mlagents.trainers.policy.tf_policy import TFPolicy


-class CuriosityModel(object):
+class CuriosityModel:
    def __init__(
        self, policy: TFPolicy, encoding_size: int = 128, learning_rate: float = 3e-4
    ):
                    self.encoding_size,
                    ModelUtils.swish,
                    1,
-                    "curiosity_stream_{}_visual_obs_encoder".format(i),
+                    f"curiosity_stream_{i}_visual_obs_encoder",
                    False,
                )

                    ModelUtils.swish,
                    1,
-                    "curiosity_stream_{}_visual_obs_encoder".format(i),
+                    f"curiosity_stream_{i}_visual_obs_encoder",
                    True,
                )
                visual_encoders.append(encoded_visual)
--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
 EPSILON = 1e-7


-class GAILModel(object):
+class GAILModel:
    def __init__(
        self,
        policy: TFPolicy,
                    self.encoding_size,
                    ModelUtils.swish,
                    1,
-                    "gail_stream_{}_visual_obs_encoder".format(i),
+                    f"gail_stream_{i}_visual_obs_encoder",
                    False,
                )

                    ModelUtils.swish,
                    1,
-                    "gail_stream_{}_visual_obs_encoder".format(i),
+                    f"gail_stream_{i}_visual_obs_encoder",
                    True,
                )
                visual_policy_encoders.append(encoded_policy_visual)
--- a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
    """
    rcls = NAME_TO_CLASS.get(name)
    if not rcls:
-        raise UnityTrainerException("Unknown reward signal type {0}".format(name))
+        raise UnityTrainerException(f"Unknown reward signal type {name}")
-            "Unknown parameters given for reward signal {0}".format(name)
+            f"Unknown parameters given for reward signal {name}"
        )
    return class_inst
--- a/ml-agents/mlagents/trainers/curriculum.py
+++ b/ml-agents/mlagents/trainers/curriculum.py
                for key in parameters:
                    config[key] = parameters[key][self.lesson_num]
                logger.info(
-                    "{0} lesson changed. Now in lesson {1}: {2}".format(
+                    "{} lesson changed. Now in lesson {}: {}".format(
                        self.brain_name,
                        self.lesson_num,
                        ", ".join([str(x) + " -> " + str(config[x]) for x in config]),
        try:
            with open(config_path) as data_file:
                return Curriculum._load_curriculum(data_file)
-        except IOError:
-            raise CurriculumLoadingError(
-                "The file {0} could not be found.".format(config_path)
-            )
+        except OSError:
+            raise CurriculumLoadingError(f"The file {config_path} could not be found.")
-            raise CurriculumLoadingError(
-                "There was an error decoding {}".format(config_path)
-            )
+            raise CurriculumLoadingError(f"There was an error decoding {config_path}")

    @staticmethod
    def _load_curriculum(fp: TextIO) -> Dict:
--- a/ml-agents/mlagents/trainers/ghost/controller.py
+++ b/ml-agents/mlagents/trainers/ghost/controller.py
        """
        self._queue.append(self._learning_team)
        self._learning_team = self._queue.popleft()
-        logger.debug(
-            "Learning team {} swapped on step {}".format(self._learning_team, step)
-        )
+        logger.debug(f"Learning team {self._learning_team} swapped on step {step}")
        self._changed_training_team = True

    # Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py
        :param run_id: The identifier of the current run
        """

-        super(GhostTrainer, self).__init__(
+        super().__init__(
            brain_name, trainer_parameters, training, run_id, reward_buff_cap
        )

--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
            )
        else:
            raise UnityTrainerException(
-                "The learning rate schedule {} is invalid.".format(lr_schedule)
+                f"The learning rate schedule {lr_schedule} is invalid."
            )
        return learning_rate

                    h_size,
                    activation=activation,
                    reuse=reuse,
-                    name="hidden_{}".format(i),
+                    name=f"hidden_{i}",
                    kernel_initializer=tf.initializers.variance_scaling(1.0),
                )
        return hidden
        """
        value_heads = {}
        for name in stream_names:
-            value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
+            value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
            value_heads[name] = value
        value = tf.reduce_mean(list(value_heads.values()), 0)
        return value_heads, value
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
            self.sequence_length = trainer_parameters["sequence_length"]
            if self.m_size == 0:
                raise UnityPolicyException(
-                    "The memory size for brain {0} is 0 even "
+                    "The memory size for brain {} is 0 even "
-                    "The memory size for brain {0} is {1} "
+                    "The memory size for brain {} is {} "
                    "but it must be divisible by 2.".format(
                        brain.brain_name, self.m_size
                    )
            ckpt = tf.train.get_checkpoint_state(model_path)
            if ckpt is None:
                raise UnityPolicyException(
-                    "The model {0} could not be loaded. Make "
+                    "The model {} could not be loaded. Make "
                    "sure you specified the right "
                    "--run-id and that the previous run you are loading from had the same "
                    "behavior names.".format(model_path)
            except tf.errors.NotFoundError:
                raise UnityPolicyException(
-                    "The model {0} was found but could not be loaded. Make "
+                    "The model {} was found but could not be loaded. Make "
                    "sure the model is from the same version of ML-Agents, has the same behavior parameters, "
                    "and is using the same trainer configuration as the current run.".format(
                        model_path
                    )
                )
            else:
-                logger.info(
-                    "Resuming training from step {}.".format(self.get_current_step())
-                )
+                logger.info(f"Resuming training from step {self.get_current_step()}.")

    def initialize_or_load(self):
        # If there is an initialize path, load from that. Else, load from the set model path.
--- a/ml-agents/mlagents/trainers/ppo/optimizer.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer.py
        self.old_values = {}
        for name in value_heads.keys():
            returns_holder = tf.placeholder(
-                shape=[None], dtype=tf.float32, name="{}_returns".format(name)
+                shape=[None], dtype=tf.float32, name=f"{name}_returns"
-                shape=[None], dtype=tf.float32, name="{}_value_estimate".format(name)
+                shape=[None], dtype=tf.float32, name=f"{name}_value_estimate"
            )
            self.returns_holders[name] = returns_holder
            self.old_values[name] = old_value
            self.all_old_log_probs: mini_batch["action_probs"],
        }
        for name in self.reward_signals:
-            feed_dict[self.returns_holders[name]] = mini_batch[
-                "{}_returns".format(name)
-            ]
-            feed_dict[self.old_values[name]] = mini_batch[
-                "{}_value_estimates".format(name)
-            ]
+            feed_dict[self.returns_holders[name]] = mini_batch[f"{name}_returns"]
+            feed_dict[self.old_values[name]] = mini_batch[f"{name}_value_estimates"]

        if self.policy.output_pre is not None and "actions_pre" in mini_batch:
            feed_dict[self.policy.output_pre] = mini_batch["actions_pre"]
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        :param seed: The seed the model will be initialized with
        :param run_id: The identifier of the current run
        """
-        super(PPOTrainer, self).__init__(
+        super().__init__(
            brain_name, trainer_parameters, training, run_id, reward_buff_cap
        )
        self.param_keys = [
            trajectory.done_reached and not trajectory.max_step_reached,
        )
        for name, v in value_estimates.items():
-            agent_buffer_trajectory["{}_value_estimates".format(name)].extend(v)
+            agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
            self._stats_reporter.add_stat(
                self.optimizer.reward_signals[name].value_name, np.mean(v)
            )
            evaluate_result = reward_signal.evaluate_batch(
                agent_buffer_trajectory
            ).scaled_reward
-            agent_buffer_trajectory["{}_rewards".format(name)].extend(evaluate_result)
+            agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
            # Report the reward signals
            self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

        for name in self.optimizer.reward_signals:
            bootstrap_value = value_next[name]

-            local_rewards = agent_buffer_trajectory[
-                "{}_rewards".format(name)
-            ].get_batch()
+            local_rewards = agent_buffer_trajectory[f"{name}_rewards"].get_batch()
-                "{}_value_estimates".format(name)
+                f"{name}_value_estimates"
            ].get_batch()
            local_advantage = get_gae(
                rewards=local_rewards,
            )
            local_return = local_advantage + local_value_estimates
            # This is later use as target for the different value estimates
-            agent_buffer_trajectory["{}_returns".format(name)].set(local_return)
-            agent_buffer_trajectory["{}_advantage".format(name)].set(local_advantage)
+            agent_buffer_trajectory[f"{name}_returns"].set(local_return)
+            agent_buffer_trajectory[f"{name}_advantage"].set(local_advantage)
            tmp_advantages.append(local_advantage)
            tmp_returns.append(local_return)

--- a/ml-agents/mlagents/trainers/sac/network.py
+++ b/ml-agents/mlagents/trainers/sac/network.py
        """
        self.value_heads = {}
        for name in stream_names:
-            value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
+            value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
            self.value_heads[name] = value
        self.value = tf.reduce_mean(list(self.value_heads.values()), 0)


            q1_heads = {}
            for name in stream_names:
-                _q1 = tf.layers.dense(q1_hidden, num_outputs, name="{}_q1".format(name))
+                _q1 = tf.layers.dense(q1_hidden, num_outputs, name=f"{name}_q1")
                q1_heads[name] = _q1

            q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)

            q2_heads = {}
            for name in stream_names:
-                _q2 = tf.layers.dense(q2_hidden, num_outputs, name="{}_q2".format(name))
+                _q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
                q2_heads[name] = _q2

            q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
--- a/ml-agents/mlagents/trainers/sac/optimizer.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer.py
                )

            rewards_holder = tf.placeholder(
-                shape=[None], dtype=tf.float32, name="{}_rewards".format(name)
+                shape=[None], dtype=tf.float32, name=f"{name}_rewards"
            )
            self.rewards_holders[name] = rewards_holder

            self.policy.mask_input: batch["masks"] * burn_in_mask,
        }
        for name in self.reward_signals:
-            feed_dict[self.rewards_holders[name]] = batch["{}_rewards".format(name)]
+            feed_dict[self.rewards_holders[name]] = batch[f"{name}_rewards"]

        if self.policy.use_continuous_act:
            feed_dict[self.policy_network.external_action_in] = batch["actions"]
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
        filename = os.path.join(
            self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
        )
-        logger.info("Saving Experience Replay Buffer to {}".format(filename))
+        logger.info(f"Saving Experience Replay Buffer to {filename}")
        with open(filename, "wb") as file_object:
            self.update_buffer.save_to_file(file_object)

        filename = os.path.join(
            self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
        )
-        logger.info("Loading Experience Replay Buffer from {}".format(filename))
+        logger.info(f"Loading Experience Replay Buffer from {filename}")
        with open(filename, "rb+") as file_object:
            self.update_buffer.load_from_file(file_object)
        logger.info(

        batch_update_stats: Dict[str, list] = defaultdict(list)
        while self.step / self.update_steps > self.steps_per_update:
-            logger.debug("Updating SAC policy at step {}".format(self.step))
+            logger.debug(f"Updating SAC policy at step {self.step}")
            buffer = self.update_buffer
            if (
                self.update_buffer.num_experiences
                )
                # Get rewards for each reward
                for name, signal in self.optimizer.reward_signals.items():
-                    sampled_minibatch[
-                        "{}_rewards".format(name)
-                    ] = signal.evaluate_batch(sampled_minibatch).scaled_reward
+                    sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
+                        sampled_minibatch
+                    ).scaled_reward

                update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
                for stat_name, value in update_stats.items():
            # Get minibatches for reward signal update if needed
            reward_signal_minibatches = {}
            for name, signal in self.optimizer.reward_signals.items():
-                logger.debug("Updating {} at step {}".format(name, self.step))
+                logger.debug(f"Updating {name} at step {self.step}")
                # Some signals don't need a minibatch to be sampled - so we don't!
                if signal.update_dict:
                    reward_signal_minibatches[name] = buffer.sample_mini_batch(
--- a/ml-agents/mlagents/trainers/sampler_class.py
+++ b/ml-agents/mlagents/trainers/sampler_class.py
        for param_name, cur_param_dict in self.reset_param_dict.items():
            if "sampler-type" not in cur_param_dict:
                raise SamplerException(
-                    "'sampler_type' argument hasn't been supplied for the {0} parameter".format(
+                    "'sampler_type' argument hasn't been supplied for the {} parameter".format(
                        param_name
                    )
                )
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
            )
            if self.self_play and "Self-play/ELO" in values:
                elo_stats = values["Self-play/ELO"]
-                logger.info("{} ELO: {:0.3f}. ".format(category, elo_stats.mean))
+                logger.info(f"{category} ELO: {elo_stats.mean:0.3f}. ")
        else:
            logger.info(
                "{}: Step: {}. No episode was completed since last summary. {}".format(
    ) -> None:
        if property_type == StatsPropertyType.HYPERPARAMETERS:
            logger.info(
-                """Hyperparameters for behavior name {0}: \n{1}""".format(
+                """Hyperparameters for behavior name {}: \n{}""".format(
                    category, self._dict_to_str(value, 0)
                )
            )
                [
                    "\t"
                    + "  " * num_tabs
-                    + "{0}:\t{1}".format(
+                    + "{}:\t{}".format(
                        x, self._dict_to_str(param_dict[x], num_tabs + 1)
                    )
                    for x in param_dict
        self._maybe_create_summary_writer(category)
        for key, value in values.items():
            summary = tf.Summary()
-            summary.value.add(tag="{}".format(key), simple_value=value.mean)
+            summary.value.add(tag=f"{key}", simple_value=value.mean)
            self.summary_writers[category].add_summary(summary, step)
            self.summary_writers[category].flush()

        for file_name in os.listdir(directory_name):
            if file_name.startswith("events.out"):
                logger.warning(
-                    "{} was left over from a previous run. Deleting.".format(file_name)
+                    f"{file_name} was left over from a previous run. Deleting."
                )
                full_fname = os.path.join(directory_name, file_name)
                try:
                s_op = tf.summary.text(
                    name,
                    tf.convert_to_tensor(
-                        ([[str(x), str(input_dict[x])] for x in input_dict])
+                        [[str(x), str(input_dict[x])] for x in input_dict]
                    ),
                )
                s = sess.run(s_op)
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
 def default_reward_processor(rewards, last_n_rewards=5):
    rewards_to_use = rewards[-last_n_rewards:]
    # For debugging tests
-    print("Last {} rewards:".format(last_n_rewards), rewards_to_use)
+    print(f"Last {last_n_rewards} rewards:", rewards_to_use)
    return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()


--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
    """

    def __init__(self, *args, **kwargs):
-        super(RLTrainer, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
        # Make sure we have at least one reward_signal
        if not self.trainer_parameters["reward_signals"]:
            raise UnityTrainerException(
--- a/ml-agents/mlagents/trainers/trainer/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/trainer.py
        for k in self.param_keys:
            if k not in self.trainer_parameters:
                raise UnityTrainerException(
-                    "The hyper-parameter {0} could not be found for the {1} trainer of "
-                    "brain {2}.".format(k, self.__class__, self.brain_name)
+                    "The hyper-parameter {} could not be found for the {} trainer of "
+                    "brain {}.".format(k, self.__class__, self.brain_name)
                )

    @property
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 from mlagents.trainers.agent_processor import AgentManager


-class TrainerController(object):
+class TrainerController:
    def __init__(
        self,
        trainer_factory: TrainerFactory,
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
    try:
        with open(config_path) as data_file:
            return _load_config(data_file)
-    except IOError:
+    except OSError:
        abs_path = os.path.abspath(config_path)
        raise TrainerConfigError(f"Config file could not be found at {abs_path}.")
    except UnicodeDecodeError:
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
-from io import open
 import os
 import sys

        tag = os.getenv("CIRCLE_TAG")

        if tag != EXPECTED_TAG:
-            info = "Git tag: {0} does not match the expected tag of this app: {1}".format(
+            info = "Git tag: {} does not match the expected tag of this app: {}".format(
                tag, EXPECTED_TAG
            )
            sys.exit(info)
        # Test-only dependencies should go in test_requirements.txt, not here.
        "grpcio>=1.11.0",
        "h5py>=2.9.0",
-        "mlagents_envs=={}".format(VERSION),
+        f"mlagents_envs=={VERSION}",
        "numpy>=1.13.3,<2.0",
        "Pillow>=4.2.1",
        "protobuf>=3.6",
--- a/ml-agents/tests/yamato/check_coverage_percent.py
+++ b/ml-agents/tests/yamato/check_coverage_percent.py
-from __future__ import print_function
 import sys
 import os

            summary_xml = os.path.join(dirpath, SUMMARY_XML_FILENAME)
            break
    if not summary_xml:
-        print("Couldn't find {} in root directory".format(SUMMARY_XML_FILENAME))
+        print(f"Couldn't find {SUMMARY_XML_FILENAME} in root directory")
        sys.exit(1)

    with open(summary_xml) as f:
--- a/ml-agents/tests/yamato/scripts/run_gym.py
+++ b/ml-agents/tests/yamato/scripts/run_gym.py

        if len(env.observation_space.shape) == 1:
            # Examine the initial vector observation
-            print("Agent observations look like: \n{}".format(initial_observations))
+            print(f"Agent observations look like: \n{initial_observations}")

        for _episode in range(10):
            env.reset()
                actions = env.action_space.sample()
                obs, reward, done, _ = env.step(actions)
                episode_rewards += reward
-            print("Total reward this episode: {}".format(episode_rewards))
+            print(f"Total reward this episode: {episode_rewards}")
    finally:
        env.close()

--- a/ml-agents/tests/yamato/scripts/run_llapi.py
+++ b/ml-agents/tests/yamato/scripts/run_llapi.py
                if tracked_agent in terminal_steps:
                    episode_rewards += terminal_steps[tracked_agent].reward
                    done = True
-            print("Total reward this episode: {}".format(episode_rewards))
+            print(f"Total reward this episode: {episode_rewards}")
    finally:
        env.close()

--- a/ml-agents/tests/yamato/yamato_utils.py
+++ b/ml-agents/tests/yamato/yamato_utils.py
    if extra_packages:
        pip_commands += extra_packages
    for cmd in pip_commands:
+        pip_index_url = "--index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple"
-            f"source {venv_path}/bin/activate; python -m pip install -q {cmd}",
+            f"source {venv_path}/bin/activate; python -m pip install -q {cmd} {pip_index_url}",
            shell=True,
        )
    return venv_path
--- a/test_requirements.txt
+++ b/test_requirements.txt
 # Test-only dependencies should go here, not in setup.py
 pytest>4.0.0,<6.0.0
 pytest-cov==2.6.1
-pytest-xdist
+pytest-xdist==1.34.0

 # onnx doesn't currently have a wheel for 3.8
 tf2onnx>=1.5.5;python_version<'3.8'
--- a/utils/validate_versions.py
+++ b/utils/validate_versions.py


 def set_package_version(new_version: str) -> None:
-    with open(UNITY_PACKAGE_JSON_PATH, "r") as f:
+    with open(UNITY_PACKAGE_JSON_PATH) as f:
        package_json = json.load(f)
    if "version" in package_json:
        package_json["version"] = new_version