浏览代码

Merge branch 'master' into hh/develop/crawler-ragdoll-updates

/hh-develop-ragdoll-testing
HH 5 年前
当前提交
0fdac847
共有 97 个文件被更改,包括 5883 次插入10870 次删除
  1. 8
      DevProject/Packages/manifest.json
  2. 4
      DevProject/ProjectSettings/ProjectVersion.txt
  3. 495
      Project/Assets/ML-Agents/Examples/3DBall/TFModels/3DBall.nn
  4. 586
      Project/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallHard.nn
  5. 13
      Project/Assets/ML-Agents/Examples/Basic/TFModels/Basic.nn
  6. 149
      Project/Assets/ML-Agents/Examples/Bouncer/TFModels/Bouncer.nn
  7. 682
      Project/Assets/ML-Agents/Examples/FoodCollector/TFModels/FoodCollector.nn
  8. 1001
      Project/Assets/ML-Agents/Examples/GridWorld/TFModels/GridWorld.nn
  9. 999
      Project/Assets/ML-Agents/Examples/Hallway/TFModels/Hallway.nn
  10. 1001
      Project/Assets/ML-Agents/Examples/PushBlock/TFModels/PushBlock.nn
  11. 1001
      Project/Assets/ML-Agents/Examples/Pyramids/TFModels/Pyramids.nn
  12. 564
      Project/Assets/ML-Agents/Examples/Reacher/TFModels/Reacher.nn
  13. 1001
      Project/Assets/ML-Agents/Examples/Soccer/TFModels/Goalie.nn
  14. 1001
      Project/Assets/ML-Agents/Examples/Soccer/TFModels/SoccerTwos.nn
  15. 1001
      Project/Assets/ML-Agents/Examples/Soccer/TFModels/Striker.nn
  16. 1001
      Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn
  17. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
  18. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
  19. 1001
      Project/Assets/ML-Agents/Examples/WallJump/TFModels/BigWallJump.nn
  20. 1001
      Project/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJump.nn
  21. 1001
      Project/Assets/ML-Agents/Examples/Worm/TFModels/WormDynamic.nn
  22. 1001
      Project/Assets/ML-Agents/Examples/Worm/TFModels/WormStatic.nn
  23. 5
      README.md
  24. 10
      com.unity.ml-agents/CHANGELOG.md
  25. 7
      com.unity.ml-agents/Runtime/Academy.cs
  26. 2
      com.unity.ml-agents/Runtime/Agent.cs
  27. 1
      config/imitation/CrawlerStatic.yaml
  28. 1
      config/imitation/FoodCollector.yaml
  29. 1
      config/imitation/Hallway.yaml
  30. 1
      config/imitation/PushBlock.yaml
  31. 1
      config/ppo/3DBall.yaml
  32. 1
      config/ppo/3DBallHard.yaml
  33. 1
      config/ppo/3DBall_randomize.yaml
  34. 1
      config/ppo/Basic.yaml
  35. 1
      config/ppo/Bouncer.yaml
  36. 1
      config/ppo/CrawlerDynamic.yaml
  37. 1
      config/ppo/CrawlerStatic.yaml
  38. 1
      config/ppo/FoodCollector.yaml
  39. 1
      config/ppo/GridWorld.yaml
  40. 1
      config/ppo/Hallway.yaml
  41. 1
      config/ppo/PushBlock.yaml
  42. 1
      config/ppo/Pyramids.yaml
  43. 1
      config/ppo/Reacher.yaml
  44. 1
      config/ppo/SoccerTwos.yaml
  45. 2
      config/ppo/StrikersVsGoalie.yaml
  46. 1
      config/ppo/Tennis.yaml
  47. 1
      config/ppo/VisualHallway.yaml
  48. 1
      config/ppo/VisualPushBlock.yaml
  49. 1
      config/ppo/VisualPyramids.yaml
  50. 1
      config/ppo/WalkerDynamic.yaml
  51. 1
      config/ppo/WalkerStatic.yaml
  52. 2
      config/ppo/WallJump.yaml
  53. 2
      config/ppo/WallJump_curriculum.yaml
  54. 1
      config/ppo/WormDynamic.yaml
  55. 1
      config/ppo/WormStatic.yaml
  56. 1
      config/sac/3DBall.yaml
  57. 1
      config/sac/3DBallHard.yaml
  58. 1
      config/sac/Basic.yaml
  59. 1
      config/sac/Bouncer.yaml
  60. 1
      config/sac/CrawlerDynamic.yaml
  61. 1
      config/sac/CrawlerStatic.yaml
  62. 1
      config/sac/FoodCollector.yaml
  63. 1
      config/sac/GridWorld.yaml
  64. 1
      config/sac/Hallway.yaml
  65. 1
      config/sac/PushBlock.yaml
  66. 1
      config/sac/Pyramids.yaml
  67. 1
      config/sac/Reacher.yaml
  68. 1
      config/sac/Tennis.yaml
  69. 1
      config/sac/VisualHallway.yaml
  70. 1
      config/sac/VisualPushBlock.yaml
  71. 1
      config/sac/VisualPyramids.yaml
  72. 1
      config/sac/WalkerDynamic.yaml
  73. 1
      config/sac/WalkerStatic.yaml
  74. 2
      config/sac/WallJump.yaml
  75. 1
      config/sac/WormDynamic.yaml
  76. 1
      config/sac/WormStatic.yaml
  77. 4
      docs/Learning-Environment-Design-Agents.md
  78. 6
      ml-agents/mlagents/trainers/ghost/trainer.py
  79. 1
      ml-agents/mlagents/trainers/learn.py
  80. 4
      ml-agents/mlagents/trainers/policy/nn_policy.py
  81. 5
      ml-agents/mlagents/trainers/policy/tf_policy.py
  82. 7
      ml-agents/mlagents/trainers/ppo/trainer.py
  83. 15
      ml-agents/mlagents/trainers/sac/trainer.py
  84. 3
      ml-agents/mlagents/trainers/settings.py
  85. 8
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  86. 2
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  87. 23
      ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
  88. 16
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  89. 6
      ml-agents/mlagents/trainers/tests/test_policy.py
  90. 2
      ml-agents/mlagents/trainers/tests/test_ppo.py
  91. 2
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  92. 10
      ml-agents/mlagents/trainers/tests/test_sac.py
  93. 1
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  94. 14
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  95. 12
      ml-agents/mlagents/trainers/trainer/trainer.py
  96. 13
      ml-agents/mlagents/trainers/trainer_util.py
  97. 8
      utils/make_readme_table.py

8
DevProject/Packages/manifest.json


"dependencies": {
"com.unity.2d.sprite": "1.0.0",
"com.unity.2d.tilemap": "1.0.0",
"com.unity.ads": "2.0.8",
"com.unity.ads": "3.4.4",
"com.unity.analytics": "3.3.5",
"com.unity.coding": "0.1.0-preview.13",
"com.unity.collab-proxy": "1.2.16",

"com.unity.multiplayer-hlapi": "1.0.4",
"com.unity.package-manager-doctools": "1.1.1-preview.3",
"com.unity.package-validation-suite": "0.7.15-preview",
"com.unity.package-validation-suite": "0.11.0-preview",
"com.unity.test-framework": "1.1.11",
"com.unity.test-framework": "1.1.13",
"com.unity.xr.legacyinputhelpers": "1.3.8",
"com.unity.xr.legacyinputhelpers": "1.3.11",
"com.unity.modules.ai": "1.0.0",
"com.unity.modules.androidjni": "1.0.0",
"com.unity.modules.animation": "1.0.0",

4
DevProject/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2019.3.3f1
m_EditorVersionWithRevision: 2019.3.3f1 (7ceaae5f7503)
m_EditorVersion: 2019.3.11f1
m_EditorVersionWithRevision: 2019.3.11f1 (ceef2d848e70)

495
Project/Assets/ML-Agents/Examples/3DBall/TFModels/3DBall.nn
文件差异内容过多而无法显示
查看文件

586
Project/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallHard.nn
文件差异内容过多而无法显示
查看文件

13
Project/Assets/ML-Agents/Examples/Basic/TFModels/Basic.nn


vector_observation���� action_masks����policy_1/concat_2/concatactionaction_output_shape������?action_output_shape memory_sizeversion_numberis_continuous_controlpolicy_1/add_2/ypolicy_1/Sum/reduction_indicespolicy_1/add/y$policy/main_graph_0/hidden_0/BiasAdd�����?vector_observation#policy/main_graph_0/hidden_0/kernel�!policy/main_graph_0/hidden_0/bias� policy/main_graph_0/hidden_0/Mul2 �����?$policy/main_graph_0/hidden_0/BiasAddpolicy_1/dense/MatMul�����? policy/main_graph_0/hidden_0/Mulpolicy/dense/kernel�<policy_1/dense/MatMul/patch:0�policy_1/strided_slice������? action_maskspolicy_1/Softmax2�����?policy_1/dense/MatMul policy_1/addd�����?policy_1/Softmaxpolicy_1/add/y policy_1/Mulf�����? policy_1/addpolicy_1/strided_slice policy_1/Sum������? policy_1/Mulpolicy_1/Sum/reduction_indicespolicy_1/truedivg�����? policy_1/Mul policy_1/Sumpolicy_1/add_2d�����?policy_1/truedivpolicy_1/add_2/ypolicy_1/Log_12r�����?policy_1/add_2policy_1/concat_2/concat2�����?policy_1/Log_1action2�����?policy_1/concat_2/concat@@@���3�?���3�k��8�>�:���{i��#���Ld�n�<�B$�=6_��ސ��8�þ�Z=��>���;�/�O�>�����q�=VU�����=�l��e,�-X7������xQ>>�K��$>46���a><9�>�(>�(O>�(>�X�<��=�p��D��m��>���(z� ���x��;W�}J�;~7���:���G���%�=�> 8�<;��ݾ=��U>;H<8B%����Å2���?���=g0ѽ ������$־ڜ^>w����.��c��؟=Tgx�C��;�o:�z���)�=������eٳ>�f�9ˡ=���<
� ��)��R��=/)�<��z>��H�$5���>�����}y��l�>��[��@�;�Ӵ=�0�=8׾T;>яV�?h���>Y�k=��=�꽕!�>5�=P O�~���L�J=��̾�м.�<�W?�;��[.��h���?k�Q���b>� u=�;��+?�<IE�>��>wz�=�� � ��s�^>��O>�_>Wo�>t����f����%V>��c<B��>�1c�Q�,�j�<=���>�̭��G�$��<�� >0m¾����j�x�q��>�Y�=��>�^1=��Ͼ<�<ξTP��}:�j�>C�>��=��$>J��>ߖ>��>ג�>�g>�>������7=�|=�J?Q�=��> �j>�P>-�l>೘�W^�]�)>��w>� �=���*X��|�>4��?�X>�]�ώ�M�N��>�>B��8�)`��/ =�G�=���>�2�>���� >�>�_�>���<}�n��f>`�!��(Q�qr1�C�6��.�>��>}���%�Ͼ�Ɖ>�ܘ�N��ӶG<f1��d�?0�K>�꾀"k��Ϭ>���>^�?��'�+�e(��s��c��>7���J����&����>��e�D�(=]����q�rJ�>.y>�(����eϪ>�(>;�,==5�/�>S%>2} >n#n=�C=�i>y���d��=��ν�|�8[)���0�=L��
$�J�:>E�>��.��!��4�y=+r�>�|���t��}����~����>*%��+>0L��fB>[g�� ����>>��=$ݽ=މ?>�z@�y v�L��>�2h>Bl#>L(��#vZ>�ر���<���=s%�>jx?���=p�<3ś=�5^���>��=�D->��)>)�˽g ����s?H���ε�xc���Ҿ �M?hRӾ�D�S��P*,?�Ծ���f#�K��mC&?�(@?Pꬾ�ാ�x�>Z�?J��>��پk���s�u��&�����>] ��鸉���"���@?-r��Ҝ�x���F}���5�>�T�>��+�����(.?���>U���4n�=�w��`�*�$5>q6�>O�Z�=��=�k�=�L�<&q= �u>d#
=M���&����ꖾy:;>��²{>6ƀ<:�h�G^�������T���1�>N��[�s�� =�:!>o{>4���A䛾lo�� �]>{�v�`n�c=4���f?�>M���0m>�X>����Ҷ�<'�>����NV����|>�ꑾh�ɾ4#�悞<'K�>����~�=HO�>N8B��Ui�K �=u��=e`?��+�ǟ;���.�?�)�DP\?�-���@�;-�9V\?N�<��\2��5�ؗ'���e?��W?bl+�\�/��5[?��]?�O����A? L?^?�;E�'� ?n�>{�8��) ?�� ?��?�Ik?��>�F6�OI ����.@?)~?��>��8���?�-?�$@�=�>�>?)�4��_�+���9?��?�?��?��M?L?�7�h�?�<?8�@ ?�?G ;�vI&�`<�?�B?d��U��s�??o �>CI
?ݪ5��?��?��?��� �~��>�=?wC�f��L�W?
vector_observation���� action_masks����policy_1/concat_2/concatactionaction_output_shape������?
action_output_shape memory_sizeversion_numbertrainer_patch_versiontrainer_minor_versiontrainer_major_versionis_continuous_controlpolicy_1/add_2/ypolicy_1/Sum/reduction_indicespolicy_1/add/y $policy/main_graph_0/hidden_0/BiasAdd�����?vector_observation#policy/main_graph_0/hidden_0/kernel
�!policy/main_graph_0/hidden_0/bias� policy/main_graph_0/hidden_0/Mul2 �����?$policy/main_graph_0/hidden_0/BiasAddpolicy_1/dense/MatMul�����? policy/main_graph_0/hidden_0/Mulpolicy/dense/kernel�<policy_1/dense/MatMul/patch:0�policy_1/strided_slice������? action_maskspolicy_1/Softmax2�����?policy_1/dense/MatMul policy_1/addd�����?policy_1/Softmaxpolicy_1/add/y policy_1/Mulf�����? policy_1/addpolicy_1/strided_slice policy_1/Sum������? policy_1/Mulpolicy_1/Sum/reduction_indicespolicy_1/truedivg�����? policy_1/Mul policy_1/Sumpolicy_1/add_2d�����?policy_1/truedivpolicy_1/add_2/ypolicy_1/Log_12r�����?policy_1/add_2policy_1/concat_2/concat2�����?policy_1/Log_1action2�����?policy_1/concat_2/concat@@@�A���3�?���3��ƽl >_jX=�P��
��z�>����0>�����K�>s�>��Y>&�j>�M�����=,�����"����>�P�����>�ɹ<����8��c�==�����:)�y�x�>|\-<���xdO���"����{?=���=[�1>�l>>_����->�8���8��ž= �m>M�/>����H� =/��Nh���b_�4��>�]�܌<��>�Ӥ>�E��D>�T=�zO�����A�;Qd���Ͼ�{|��D;>Z�`�/�aV���o�>TJ�=Sa��xİ�5!)>�8�=� 侥Z�='�.�O�<��5n�A��|�=�����#��jfs>1x=��<)���g����K�>>z>���>���=�< Lb���>�ݘ>X=C=^JѾ��վ*�>�L�>�����`��%T�(Qz��'�>��k�����C�|>0�D�S:3<̋�>�D^=K8�Y^�>kX����f=b�=�׍�J9�=������=t:��
q�>�h`� �>�J��F��4.�� ��[@�;��+>}=��m>�В<xxW��ᔾ+�Q���>��=���>���>E�=��j�>�a�<����Ə<���u��� ,��FF��VG��� T�m^���&<�+g=Z ^>��u��>tFy>�f�=/{M>� ���վv��x���n)�=9�A>s�J���@>*� ��:�>��>A��P���q�>qI=�Z���H���l���e>��[>O�þ��:�G����(�)��>���>����G�����>��^=
�?�ٻ&��B�$>AW5>�٧���?s�=�E�>č�>�-/<��¾�{��#!O��u0>z�<>���>��=+� �r|�kV�fo�>��=F@Z>�j=V �> �6b��U�6='�?sզ��2�;M4��@�C>��?�-? �=x/>��ؾ�g5=c��=�v����c>$Bi>m�����I>ؖ��ou<=�o��cb����ؽ�m�EQ�S�K=�7�=�[��Q�>/*��7��>�j��#|>D�����|��=���W���� �>���=M�6=�W�>�<�>��=����>(���o��>U)�>���>�����-��2�>F>�<ؾ[�7?*��>��E>>1?i��>S�>�# ?� ?���>uTJ�W#�>+Y澷&)?�D?��L?���x���d��>��>:'�>�u?� ?r_�>�9@?j����>��>���Z �=��7��Ϟ�@�d�v>�>9O�>��z>H���ᝡ���p=��=J��>��?i(�>8���>%>��%��樾9���n����r>6^->�Х=Ng>�r>�ad>,{����>1��<1����!>ev�Z �=+-�� ���A����<��:>�1f�{J���c=��!� ���]3>�]R�+��<��C>DY0�
Ty=��)��R�>C�>�[�= ޽mU�>?��=�(��:��λ�&.{=��f��3/��kK��\�=W�ʻ1��>�����4�=k�)������ ��=P�#���,���A�sJ,��ՙ>�kY�>����ܼ~I>%�S���ս�k� վW�g>�f>
ƙ����l��..R>��ʽ�k�=�=����a�P��IO?��I?�C?��F?t�B?;.�CM?o(��I?o�F?VGD?h�(����SH?�{H?��>��D?�G?�A?E7F?7���"Ǿk0/?W ��ž�-,?X��� k���!?ߥ߾�����,?Ӿ �⾡�?�T?�.�>��"��N�Y�˾5n0?�?�>*��> %��P��� ��>.?�l�b�����?���� " ��L-?�H�>x?L"�n��>%~�>�g'��X ����^�??��<�ݾ��+?LD!<.V<��,������Q"?,�|� ���,?G��������?���G��W�-?

149
Project/Assets/ML-Agents/Examples/Bouncer/TFModels/Bouncer.nn
文件差异内容过多而无法显示
查看文件

682
Project/Assets/ML-Agents/Examples/FoodCollector/TFModels/FoodCollector.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/GridWorld/TFModels/GridWorld.nn
文件差异内容过多而无法显示
查看文件

999
Project/Assets/ML-Agents/Examples/Hallway/TFModels/Hallway.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/PushBlock/TFModels/PushBlock.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Pyramids/TFModels/Pyramids.nn
文件差异内容过多而无法显示
查看文件

564
Project/Assets/ML-Agents/Examples/Reacher/TFModels/Reacher.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Soccer/TFModels/Goalie.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Soccer/TFModels/SoccerTwos.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Soccer/TFModels/Striker.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/WallJump/TFModels/BigWallJump.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJump.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Worm/TFModels/WormDynamic.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Worm/TFModels/WormStatic.nn
文件差异内容过多而无法显示
查看文件

5
README.md


| **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **Release 2** | **May 20, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_2)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_2/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_2.zip)** |
| **Release 1** | April 30, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_1.zip) |
| **Release 3** | **June 10, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_3)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_3_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_3.zip)** |
| **Release 2** | May 20, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_2) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_2_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_2.zip) |
| **Release 1** | April 30, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_1_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_1.zip) |
| **0.15.1** | March 30, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.1.zip) |
| **0.15.0** | March 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip) |
| **0.14.1** | February 26, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.1.zip) |

10
com.unity.ml-agents/CHANGELOG.md


- `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
- `beta` and `epsilon` in `PPO` are no longer decayed by default but follow the same schedule as learning rate. (#3940)
- `get_behavior_names()` and `get_behavior_spec()` on UnityEnvironment were replaced by the `behavior_specs` property. (#3946)
- The first version of the Unity Environment Registry (Experimental) has been released. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Unity-Environment-Registry.md)(#3967)
- The first version of the Unity Environment Registry (Experimental) has been released. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/release_3_docs/docs/Unity-Environment-Registry.md)(#3967)
- `use_visual` and `allow_multiple_visual_obs` in the `UnityToGymWrapper` constructor
were replaced by `allow_multiple_obs` which allows one or more visual observations and
vector observations to be used simultaneously. (#3981) Thank you @shakenes !

- The format for trainer configuration has changed, and the "default" behavior has been deprecated.
See the [Migration Guide](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Migrating.md) for more details. (#3936)
See the [Migration Guide](https://github.com/Unity-Technologies/ml-agents/blob/release_3_docs/docs/Migrating.md) for more details. (#3936)
- Training artifacts (trained models, summaries) are now found in the `results/`
directory. (#3829)
- When using Curriculum, the current lesson will resume if training is quit and resumed. As such,

- Introduced the `SideChannelManager` to register, unregister and access side
channels. (#3807)
- `Academy.FloatProperties` was replaced by `Academy.EnvironmentParameters`.
See the [Migration Guide](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Migrating.md)
See the [Migration Guide](https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Migrating.md)
for more details on upgrading. (#3807)
- `SideChannel.OnMessageReceived` is now a protected method (was public)
- SideChannel IncomingMessages methods now take an optional default argument,

`--load`. (#3705)
- The Jupyter notebooks have been removed from the repository. (#3704)
- The multi-agent gym option was removed from the gym wrapper. For multi-agent
scenarios, use the [Low Level Python API](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md). (#3681)
scenarios, use the [Low Level Python API](https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Python-API.md). (#3681)
[Low Level Python API](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md)
[Low Level Python API](https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Python-API.md)
documentation for more information. If you use `mlagents-learn` for training, this should be a
transparent change. (#3681)
- Added ability to start training (initialize model weights) from a previous run

7
com.unity.ml-agents/Runtime/Academy.cs


port = port
}
);
Communicator.QuitCommandReceived += OnQuitCommandReceived;
Communicator.ResetCommandReceived += OnResetCommand;
}
if (Communicator != null)

);
Communicator = null;
}
}
if (Communicator != null)
{
Communicator.QuitCommandReceived += OnQuitCommandReceived;
Communicator.ResetCommandReceived += OnResetCommand;
}
// If a communicator is enabled/provided, then we assume we are in

2
com.unity.ml-agents/Runtime/Agent.cs


/// <seealso cref="OnActionReceived(float[])"/> function, which receives this array and
/// implements the corresponding agent behavior. See [Actions] for more information
/// about agent actions.
/// Note : Do not create a new float array of action in the `Heuristic()` method,
/// as this will prevent writing floats to the original action array.
///
/// An agent calls this `Heuristic()` function to make a decision when you set its behavior
/// type to <see cref="BehaviorType.HeuristicOnly"/>. The agent also calls this function if

1
config/imitation/CrawlerStatic.yaml


use_actions: false
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 1000

1
config/imitation/FoodCollector.yaml


use_actions: false
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
output_path: default
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64

1
config/imitation/Hallway.yaml


use_actions: false
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 64

1
config/imitation/PushBlock.yaml


use_actions: false
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
output_path: default
keep_checkpoints: 5
max_steps: 15000000
time_horizon: 64

1
config/ppo/3DBall.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 1000

1
config/ppo/3DBallHard.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 1000

1
config/ppo/3DBall_randomize.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 1000

1
config/ppo/Basic.yaml


extrinsic:
gamma: 0.9
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 3

1
config/ppo/Bouncer.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 4000000
time_horizon: 64

1
config/ppo/CrawlerDynamic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 1000

1
config/ppo/CrawlerStatic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 1000

1
config/ppo/FoodCollector.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64

1
config/ppo/GridWorld.yaml


extrinsic:
gamma: 0.9
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 5

1
config/ppo/Hallway.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 64

1
config/ppo/PushBlock.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64

1
config/ppo/Pyramids.yaml


strength: 0.02
encoding_size: 256
learning_rate: 0.0003
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 128

1
config/ppo/Reacher.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 1000

1
config/ppo/SoccerTwos.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000

2
config/ppo/StrikersVsGoalie.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000

extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000

1
config/ppo/Tennis.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000

1
config/ppo/VisualHallway.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 64

1
config/ppo/VisualPushBlock.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 3000000
time_horizon: 64

1
config/ppo/VisualPyramids.yaml


strength: 0.01
encoding_size: 256
learning_rate: 0.0003
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 128

1
config/ppo/WalkerDynamic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 25000000
time_horizon: 1000

1
config/ppo/WalkerStatic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 1000

2
config/ppo/WallJump.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 128

extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 128

2
config/ppo/WallJump_curriculum.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 128

extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 128

1
config/ppo/WormDynamic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 3500000
time_horizon: 1000

1
config/ppo/WormStatic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 3500000
time_horizon: 1000

1
config/sac/3DBall.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 1000

1
config/sac/3DBallHard.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 1000

1
config/sac/Basic.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 10

1
config/sac/Bouncer.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 1000000
time_horizon: 64

1
config/sac/CrawlerDynamic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 1000

1
config/sac/CrawlerStatic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 3000000
time_horizon: 1000

1
config/sac/FoodCollector.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64

1
config/sac/GridWorld.yaml


extrinsic:
gamma: 0.9
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 5

1
config/sac/Hallway.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 64

1
config/sac/PushBlock.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64

1
config/sac/Pyramids.yaml


use_actions: true
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 128

1
config/sac/Reacher.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 1000

1
config/sac/Tennis.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 64

1
config/sac/VisualHallway.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 64

1
config/sac/VisualPushBlock.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 3000000
time_horizon: 64

1
config/sac/VisualPyramids.yaml


use_actions: true
use_vail: false
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 128

1
config/sac/WalkerDynamic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 15000000
time_horizon: 1000

1
config/sac/WalkerStatic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 1000

2
config/sac/WallJump.yaml


extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 15000000
time_horizon: 128

extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 128

1
config/sac/WormDynamic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 1000

1
config/sac/WormStatic.yaml


extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 3000000
time_horizon: 1000

4
docs/Learning-Environment-Design-Agents.md


- `Agent.Heuristic()` - When the `Behavior Type` is set to `Heuristic Only` in
the Behavior Parameters of the Agent, the Agent will use the `Heuristic()`
method to generate the actions of the Agent. As such, the `Heuristic()` method
writes to a provided array of floats.
writes to the array of floats provided to the Heuristic method as argument.
__Note__: Do not create a new float array of action in the `Heuristic()` method,
as this will prevent writing floats to the original action array.
As a concrete example, here is how the Ball3DAgent class implements these methods:

6
ml-agents/mlagents/trainers/ghost/trainer.py


reward_buff_cap,
trainer_settings,
training,
run_id,
artifact_path,
):
"""
Creates a GhostTrainer.

:param reward_buff_cap: Max reward history to track in the reward buffer
:param trainer_settings: The parameters for the trainer.
:param training: Whether the trainer is set for training.
:param run_id: The identifier of the current run
:param artifact_path: Path to store artifacts from this trainer.
brain_name, trainer_settings, training, run_id, reward_buff_cap
brain_name, trainer_settings, training, artifact_path, reward_buff_cap
)
self.trainer = trainer

1
ml-agents/mlagents/trainers/learn.py


)
trainer_factory = TrainerFactory(
options.behaviors,
checkpoint_settings.run_id,
write_path,
not checkpoint_settings.inference,
checkpoint_settings.resume,

4
ml-agents/mlagents/trainers/policy/nn_policy.py


brain: BrainParameters,
trainer_params: TrainerSettings,
is_training: bool,
model_path: str,
load: bool,
tanh_squash: bool = False,
reparameterize: bool = False,

:param trainer_params: Defined training parameters.
:param is_training: Whether the model should be trained.
:param load: Whether a pre-trained model will be loaded or a new one created.
:param model_path: Path where the model should be saved and loaded.
super().__init__(seed, brain, trainer_params, load)
super().__init__(seed, brain, trainer_params, model_path, load)
self.grads = None
self.update_batch: Optional[tf.Operation] = None
num_layers = self.network_settings.num_layers

5
ml-agents/mlagents/trainers/policy/tf_policy.py


seed: int,
brain: BrainParameters,
trainer_settings: TrainerSettings,
model_path: str,
load: bool = False,
):
"""

:param trainer_settings: The trainer parameters.
:param model_path: Where to load/save the model.
:param load: If True, load model from model_path. Otherwise, create new model.
"""
self.m_size = 0

self.use_continuous_act = brain.vector_action_space_type == "continuous"
if self.use_continuous_act:
self.num_branches = self.brain.vector_action_space_size[0]
self.model_path = self.trainer_settings.output_path
self.model_path = model_path
self.initialize_path = self.trainer_settings.init_path
self.keep_checkpoints = self.trainer_settings.keep_checkpoints
self.graph = tf.Graph()

7
ml-agents/mlagents/trainers/ppo/trainer.py


training: bool,
load: bool,
seed: int,
run_id: str,
artifact_path: str,
):
"""
Responsible for collecting experiences and training PPO model.

:param training: Whether the trainer is set for training.
:param load: Whether the model should be loaded.
:param seed: The seed the model will be initialized with
:param run_id: The identifier of the current run
:param artifact_path: The directory within which to store artifacts from this trainer.
brain_name, trainer_settings, training, run_id, reward_buff_cap
brain_name, trainer_settings, training, artifact_path, reward_buff_cap
)
self.hyperparameters: PPOSettings = cast(
PPOSettings, self.trainer_settings.hyperparameters

brain_parameters,
self.trainer_settings,
self.is_training,
self.artifact_path,
self.load,
condition_sigma_on_obs=False, # Faster training for PPO
create_tf_graph=False, # We will create the TF graph in the Optimizer

15
ml-agents/mlagents/trainers/sac/trainer.py


training: bool,
load: bool,
seed: int,
run_id: str,
artifact_path: str,
):
"""
Responsible for collecting experiences and training SAC model.

:param training: Whether the trainer is set for training.
:param load: Whether the model should be loaded.
:param seed: The seed the model will be initialized with
:param run_id: The identifier of the current run
:param artifact_path: The directory within which to store artifacts from this trainer.
brain_name, trainer_settings, training, run_id, reward_buff_cap
brain_name, trainer_settings, training, artifact_path, reward_buff_cap
)
self.load = load

"""
Save the training buffer's update buffer to a pickle file.
"""
filename = os.path.join(
self.trainer_settings.output_path, "last_replay_buffer.hdf5"
)
filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5")
logger.info("Saving Experience Replay Buffer to {}".format(filename))
with open(filename, "wb") as file_object:
self.update_buffer.save_to_file(file_object)

Loads the last saved replay buffer from a file.
"""
filename = os.path.join(
self.trainer_settings.output_path, "last_replay_buffer.hdf5"
)
filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5")
logger.info("Loading Experience Replay Buffer from {}".format(filename))
with open(filename, "rb+") as file_object:
self.update_buffer.load_from_file(file_object)

brain_parameters,
self.trainer_settings,
self.is_training,
self.artifact_path,
self.load,
tanh_squash=True,
reparameterize=True,

3
ml-agents/mlagents/trainers/settings.py


factory=lambda: {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
)
init_path: Optional[str] = None
output_path: str = "default"
keep_checkpoints: int = 5
checkpoint_interval: int = 500000
max_steps: int = 500000

REWARD: str = "reward"
measure: str = attr.ib(default=MeasureType.REWARD)
thresholds: List[int] = attr.ib(factory=list)
thresholds: List[float] = attr.ib(factory=list)
min_lesson_length: int = 0
signal_smoothing: bool = True
parameters: Dict[str, List[float]] = attr.ib(kw_only=True)

8
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_policy_conversion(tmpdir, rnn, visual, discrete):
tf.reset_default_graph()
dummy_config = TrainerSettings(output_path=os.path.join(tmpdir, "test"))
dummy_config = TrainerSettings()
dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
dummy_config,
use_rnn=rnn,
model_path=os.path.join(tmpdir, "test"),
use_discrete=discrete,
use_visual=visual,
)
policy.save_model(1000)
settings = SerializationSettings(

2
ml-agents/mlagents/trainers/tests/test_bcmodule.py


NetworkSettings.MemorySettings() if use_rnn else None
)
policy = NNPolicy(
0, mock_brain, trainer_config, False, False, tanhresample, tanhresample
0, mock_brain, trainer_config, False, "test", False, tanhresample, tanhresample
)
with policy.graph.as_default():
bc_module = BCModule(

23
ml-agents/mlagents/trainers/tests/test_meta_curriculum.py


import pytest
from unittest.mock import patch, Mock, call
import yaml
import cattr
from mlagents.trainers.meta_curriculum import MetaCurriculum

@pytest.fixture
def reward_buff_sizes():
return {"Brain1": 7, "Brain2": 8}
def test_convert_from_dict():
config = yaml.safe_load(
"""
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
param1: [0.0, 4.0, 6.0, 8.0]
"""
)
should_be_config = CurriculumSettings(
thresholds=[0.1, 0.3, 0.5],
min_lesson_length=100,
signal_smoothing=True,
measure=CurriculumSettings.MeasureType.PROGRESS,
parameters={"param1": [0.0, 4.0, 6.0, 8.0]},
)
assert cattr.structure(config, CurriculumSettings) == should_be_config
def test_curriculum_config(param_name="test_param1", min_lesson_length=100):

16
ml-agents/mlagents/trainers/tests/test_nn_policy.py


use_rnn: bool = False,
use_discrete: bool = True,
use_visual: bool = False,
model_path: str = "",
load: bool = False,
seed: int = 0,
) -> NNPolicy:

trainer_settings.network_settings.memory = (
NetworkSettings.MemorySettings() if use_rnn else None
)
policy = NNPolicy(seed, mock_brain, trainer_settings, False, load)
policy = NNPolicy(seed, mock_brain, trainer_settings, False, model_path, load)
return policy

trainer_params = TrainerSettings(output_path=path1)
policy = create_policy_mock(trainer_params)
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params, model_path=path1)
policy.initialize_or_load()
policy._set_step(2000)
policy.save_model(2000)

# Try load from this path
policy2 = create_policy_mock(trainer_params, load=True, seed=1)
policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1)
policy2.initialize_or_load()
_compare_two_policies(policy, policy2)
assert policy2.get_current_step() == 2000

trainer_params.init_path = path1
policy3 = create_policy_mock(trainer_params, load=False, seed=2)
policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2)
policy3.initialize_or_load()
_compare_two_policies(policy2, policy3)

# Test write_stats
with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
path1 = tempfile.mkdtemp()
trainer_params = TrainerSettings(output_path=path1)
policy = create_policy_mock(trainer_params)
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params, model_path=path1)
policy.initialize_or_load()
policy._check_model_version(
"0.0.0"

brain_params,
TrainerSettings(network_settings=NetworkSettings(normalize=True)),
False,
"testdir",
False,
)

6
ml-agents/mlagents/trainers/tests/test_policy.py


def test_take_action_returns_empty_with_no_agents():
test_seed = 3
policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings())
policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings(), "output")
# Doesn't really matter what this is
dummy_groupspec = BehaviorSpec([(1,)], "continuous", 1)
no_agent_step = DecisionSteps.empty(dummy_groupspec)

def test_take_action_returns_nones_on_missing_values():
test_seed = 3
policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings())
policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings(), "output")
policy.evaluate = MagicMock(return_value={})
policy.save_memories = MagicMock()
step_with_agents = DecisionSteps(

def test_take_action_returns_action_info_when_available():
test_seed = 3
policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings())
policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings(), "output")
policy_eval_out = {
"action": np.array([1.0], dtype=np.float32),
"memory_out": np.array([[2.5]], dtype=np.float32),

2
ml-agents/mlagents/trainers/tests/test_ppo.py


else None
)
policy = NNPolicy(
0, mock_brain, trainer_settings, False, False, create_tf_graph=False
0, mock_brain, trainer_settings, False, "test", False, create_tf_graph=False
)
optimizer = PPOOptimizer(policy, trainer_settings)
return optimizer

2
ml-agents/mlagents/trainers/tests/test_reward_signals.py


else None
)
policy = NNPolicy(
0, mock_brain, trainer_settings, False, False, create_tf_graph=False
0, mock_brain, trainer_settings, False, "test", False, create_tf_graph=False
)
if trainer_settings.trainer_type == TrainerType.SAC:
optimizer = SACOptimizer(policy, trainer_settings)

10
ml-agents/mlagents/trainers/tests/test_sac.py


else None
)
policy = NNPolicy(
0, mock_brain, trainer_settings, False, False, create_tf_graph=False
0, mock_brain, trainer_settings, False, "test", False, create_tf_graph=False
)
optimizer = SACOptimizer(policy, trainer_settings)
return optimizer

)
trainer_params = dummy_config
trainer_params.hyperparameters.save_replay_buffer = True
trainer = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, False, 0, 0)
trainer = SACTrainer(
mock_brain.brain_name, 1, trainer_params, True, False, 0, "testdir"
)
policy = trainer.create_policy(mock_brain.brain_name, mock_brain)
trainer.add_policy(mock_brain.brain_name, policy)

# Wipe Trainer and try to load
trainer2 = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, True, 0, 0)
trainer2 = SACTrainer(
mock_brain.brain_name, 1, trainer_params, True, True, 0, "testdir"
)
policy = trainer2.create_policy(mock_brain.brain_name, mock_brain)
trainer2.add_policy(mock_brain.brain_name, policy)

1
ml-agents/mlagents/trainers/tests/test_simple_rl.py


env_manager = SimpleEnvManager(env, EnvironmentParametersChannel())
trainer_factory = TrainerFactory(
trainer_config=trainer_config,
run_id=run_id,
output_path=dir,
train_model=True,
load_model=False,

14
ml-agents/mlagents/trainers/tests/test_trainer_util.py


brain_params_mock = BrainParametersMock()
BrainParametersMock.return_value.brain_name = "testbrain"
external_brains = {"testbrain": BrainParametersMock()}
run_id = "testrun"
output_path = "results_dir"
train_model = True
load_model = False

expected_config = PPO_CONFIG
def mock_constructor(
self, brain, reward_buff_cap, trainer_settings, training, load, seed, run_id
self,
brain,
reward_buff_cap,
trainer_settings,
training,
load,
seed,
artifact_path,
):
assert brain == brain_params_mock.brain_name
assert trainer_settings == expected_config

assert seed == seed
assert run_id == run_id
assert artifact_path == os.path.join(output_path, brain_name)
run_id=run_id,
output_path=output_path,
train_model=train_model,
load_model=load_model,

trainer_factory = trainer_util.TrainerFactory(
trainer_config=no_default_config,
run_id="testrun",
output_path="output_path",
train_model=True,
load_model=False,

12
ml-agents/mlagents/trainers/trainer/trainer.py


brain_name: str,
trainer_settings: TrainerSettings,
training: bool,
run_id: str,
artifact_path: str,
:dict trainer_settings: The parameters for the trainer (dictionary).
:bool training: Whether the trainer is set for training.
:str run_id: The identifier of the current run
:int reward_buff_cap:
:param trainer_settings: The parameters for the trainer (dictionary).
:param training: Whether the trainer is set for training.
:param artifact_path: The directory within which to store artifacts from this trainer
:param reward_buff_cap:
self.run_id = run_id
self.trainer_settings = trainer_settings
self._threaded = trainer_settings.threaded
self._stats_reporter = StatsReporter(brain_name)

self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
self.step: int = 0
self.artifact_path = artifact_path
self.summary_freq = self.trainer_settings.summary_freq
@property

13
ml-agents/mlagents/trainers/trainer_util.py


def __init__(
self,
trainer_config: Dict[str, TrainerSettings],
run_id: str,
output_path: str,
train_model: bool,
load_model: bool,

multi_gpu: bool = False,
):
self.trainer_config = trainer_config
self.run_id = run_id
self.output_path = output_path
self.init_path = init_path
self.train_model = train_model

return initialize_trainer(
self.trainer_config[brain_name],
brain_name,
self.run_id,
self.output_path,
self.train_model,
self.load_model,

def initialize_trainer(