浏览代码

Merge pull request #2157 from Unity-Technologies/release-v0.8.2

Release v0.8.2
/hotfix-v0.9.2a
GitHub 6 年前
当前提交
d5f6b7f8
共有 181 个文件被更改,包括 11863 次插入13407 次删除
  1. 23
      .circleci/config.yml
  2. 5
      .gitignore
  3. 5
      CONTRIBUTING.md
  4. 2
      UnitySDK/Assets/ML-Agents/Editor/HeuristicBrainEditor.cs
  5. 4
      UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
  6. 1001
      UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallHardLearning.nn
  7. 2
      UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallHardLearning.nn.meta
  8. 985
      UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallLearning.nn
  9. 2
      UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallLearning.nn.meta
  10. 646
      UnitySDK/Assets/ML-Agents/Examples/BananaCollectors/TFModels/BananaLearning.nn
  11. 2
      UnitySDK/Assets/ML-Agents/Examples/BananaCollectors/TFModels/BananaLearning.nn.meta
  12. 21
      UnitySDK/Assets/ML-Agents/Examples/Basic/TFModels/BasicLearning.nn
  13. 2
      UnitySDK/Assets/ML-Agents/Examples/Basic/TFModels/BasicLearning.nn.meta
  14. 286
      UnitySDK/Assets/ML-Agents/Examples/Bouncer/TFModels/BouncerLearning.nn
  15. 2
      UnitySDK/Assets/ML-Agents/Examples/Bouncer/TFModels/BouncerLearning.nn.meta
  16. 1001
      UnitySDK/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerDynamicLearning.nn
  17. 2
      UnitySDK/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerDynamicLearning.nn.meta
  18. 1001
      UnitySDK/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerStaticLearning.nn
  19. 2
      UnitySDK/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerStaticLearning.nn.meta
  20. 1001
      UnitySDK/Assets/ML-Agents/Examples/GridWorld/TFModels/GridWorldLearning.nn
  21. 2
      UnitySDK/Assets/ML-Agents/Examples/GridWorld/TFModels/GridWorldLearning.nn.meta
  22. 1001
      UnitySDK/Assets/ML-Agents/Examples/Hallway/TFModels/HallwayLearning.nn
  23. 2
      UnitySDK/Assets/ML-Agents/Examples/Hallway/TFModels/HallwayLearning.nn.meta
  24. 1001
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/TFModels/PushBlockLearning.nn
  25. 2
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/TFModels/PushBlockLearning.nn.meta
  26. 1001
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/TFModels/PyramidsLearning.nn
  27. 2
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/TFModels/PyramidsLearning.nn.meta
  28. 1001
      UnitySDK/Assets/ML-Agents/Examples/Reacher/TFModels/ReacherLearning.nn
  29. 2
      UnitySDK/Assets/ML-Agents/Examples/Reacher/TFModels/ReacherLearning.nn.meta
  30. 5
      UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Materials/BlueAgent.mat
  31. 5
      UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Materials/Wall.mat
  32. 1001
      UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/GoalieLearning.nn
  33. 2
      UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/GoalieLearning.nn.meta
  34. 1001
      UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/StrikerLearning.nn
  35. 2
      UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/StrikerLearning.nn.meta
  36. 1001
      UnitySDK/Assets/ML-Agents/Examples/Tennis/TFModels/TennisLearning.nn
  37. 2
      UnitySDK/Assets/ML-Agents/Examples/Tennis/TFModels/TennisLearning.nn.meta
  38. 2
      UnitySDK/Assets/ML-Agents/Examples/Walker/TFModels/WalkerLearning.nn.meta
  39. 1001
      UnitySDK/Assets/ML-Agents/Examples/WallJump/TFModels/BigWallJumpLearning.nn
  40. 2
      UnitySDK/Assets/ML-Agents/Examples/WallJump/TFModels/BigWallJumpLearning.nn.meta
  41. 1001
      UnitySDK/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJumpLearning.nn
  42. 2
      UnitySDK/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJumpLearning.nn.meta
  43. 111
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda.md
  44. 1000
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Barracuda.dll
  45. 918
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Activation.compute
  46. 944
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/BarracudaReferenceImpl.compute
  47. 68
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Broadcast.compute
  48. 596
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Conv.compute
  49. 632
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/ConvOld.compute
  50. 438
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Dense.compute
  51. 30
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/DenseFP16.compute
  52. 944
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Experimental.compute
  53. 214
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/FastNV.compute
  54. 484
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Generic.compute
  55. 44
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Random.cginc
  56. 480
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Tensor.cginc
  57. 112
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/TexConv.compute
  58. 57
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/ReleaseNotes.md
  59. 2
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/package.json
  60. 27
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  61. 48
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs
  62. 31
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs
  63. 23
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/BarracudaModelParamLoader.cs
  64. 21
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
  65. 15
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs
  66. 15
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs
  67. 11
      UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs
  68. 1
      config/trainer_config.yaml
  69. 5
      docs/Basic-Guide.md
  70. 2
      docs/Glossary.md
  71. 11
      docs/Learning-Environment-Create-New.md
  72. 10
      docs/Learning-Environment-Design-Agents.md
  73. 3
      docs/Learning-Environment-Design-Heuristic-Brains.md
  74. 19
      docs/Learning-Environment-Examples.md
  75. 4
      docs/Learning-Environment-Executable.md
  76. 1
      docs/Readme.md
  77. 2
      docs/Training-on-Microsoft-Azure.md
  78. 12
      gym-unity/README.md
  79. 157
      gym-unity/gym_unity/envs/unity_env.py
  80. 67
      gym-unity/gym_unity/tests/test_gym.py
  81. 21
      gym-unity/setup.py
  82. 4
      ml-agents-envs/mlagents/envs/base_unity_environment.py
  83. 141
      ml-agents-envs/mlagents/envs/brain.py
  84. 1
      ml-agents-envs/mlagents/envs/communicator.py
  85. 212
      ml-agents-envs/mlagents/envs/communicator_objects/agent_action_proto_pb2.py
  86. 387
      ml-agents-envs/mlagents/envs/communicator_objects/agent_info_proto_pb2.py
  87. 298
      ml-agents-envs/mlagents/envs/communicator_objects/brain_parameters_proto_pb2.py
  88. 61
      ml-agents-envs/mlagents/envs/communicator_objects/command_proto_pb2.py
  89. 72
      ml-agents-envs/mlagents/envs/communicator_objects/custom_action_pb2.py
  90. 72
      ml-agents-envs/mlagents/envs/communicator_objects/custom_observation_pb2.py
  91. 72
      ml-agents-envs/mlagents/envs/communicator_objects/custom_reset_parameters_pb2.py
  92. 198
      ml-agents-envs/mlagents/envs/communicator_objects/demonstration_meta_proto_pb2.py
  93. 223
      ml-agents-envs/mlagents/envs/communicator_objects/engine_configuration_proto_pb2.py
  94. 250
      ml-agents-envs/mlagents/envs/communicator_objects/environment_parameters_proto_pb2.py
  95. 123
      ml-agents-envs/mlagents/envs/communicator_objects/header_pb2.py
  96. 148
      ml-agents-envs/mlagents/envs/communicator_objects/resolution_proto_pb2.py
  97. 62
      ml-agents-envs/mlagents/envs/communicator_objects/space_type_proto_pb2.py
  98. 148
      ml-agents-envs/mlagents/envs/communicator_objects/unity_input_pb2.py
  99. 182
      ml-agents-envs/mlagents/envs/communicator_objects/unity_message_pb2.py

23
.circleci/config.yml


version: 2.1
version: 2.0
jobs:
build:

command: |
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip
pip install --upgrade setuptools
pip install pytest-cov==2.6.1 codacy-coverage==1.3.11
pip install black pytest-cov==2.6.1 codacy-coverage==1.3.11
cd ../gym-unity && pip install -e .
- save_cache:

- run:
name: Run Tests for ml-agents and gym_unity
command: |

python-codacy-coverage -r coverage.xml
- run:
name: Check Code Style for ml-agents and gym_unity using black
command: |
. venv/bin/activate
black --check ml-agents
black --check ml-agents-envs
black --check gym-unity
- run:
name: Verify there are no hidden/missing metafiles.
# Renaming files or deleting files can leave metafiles behind that makes Unity very unhappy.
command: |
. venv/bin/activate
python utils/validate_meta_files.py
- store_test_results:
path: test-reports

5
.gitignore


/UnitySDK/[Uu]nity[Pp]ackage[Mm]anager/
/UnitySDK/Assets/AssetStoreTools*
/UnitySDK/Assets/Plugins*
/UnitySDK/Assets/Gizmos*
/UnitySDK/Assets/Demonstrations*
# Tensorflow Model Info

*.pyc
*.idea/misc.xml
*.idea/modules.xml
*.idea/
*.iml
*.cache
*/build/

# Ignore PyPi build files.
dist/
build/
# Python virtual environment
venv/

5
CONTRIBUTING.md


## Style Guide
When performing changes to the codebase, ensure that you follow the style guide
of the file you're modifying. For Python, we follow
[PEP 8](https://www.python.org/dev/peps/pep-0008/).
For C#, we will soon be adding a formal style guide for our repository.
When performing changes to the codebase, please ensure that all python code is reformatted using the [black](https://github.com/ambv/black) formatter. For C#, we will soon be requirements for style and formatting.

2
UnitySDK/Assets/ML-Agents/Editor/HeuristicBrainEditor.cs


{
Debug.LogError(
"Instance of " + brain.decisionScript.name + " couldn't be created. " +
"The the script class needs to derive from Decision.");
"The script class needs to derive from Decision.");
brain.decisionScript = null;
}
}

4
UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs


[Test]
public void TestSanitization()
{
const string dirtyString = "abc123&!@";
const string dirtyString = "abc1234567&!@";
var cleanString = DemonstrationRecorder.SanitizeName(dirtyString);
var cleanString = DemonstrationRecorder.SanitizeName(dirtyString, 6);
Assert.AreNotEqual(dirtyString, cleanString);
Assert.AreEqual(cleanString, knownCleanString);
}

1001
UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallHardLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallHardLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

985
UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

646
UnitySDK/Assets/ML-Agents/Examples/BananaCollectors/TFModels/BananaLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/BananaCollectors/TFModels/BananaLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

21
UnitySDK/Assets/ML-Agents/Examples/Basic/TFModels/BasicLearning.nn


vector_observation���� action_masks���� action_probsconcat_1actionvalue_estimatestrided_slice_1/stack_2������?strided_slice_1/stack_2strided_slice_1/stack_1������?strided_slice_1/stack_1strided_slice_1/stack������?strided_slice_1/stackstrided_slice/stack_2������?strided_slice/stack_2strided_slice/stack_1������?strided_slice/stack_1strided_slice/stack������?strided_slice/stack
action_output_shape������?action_output_shape  memory_size������? memory_size version_number������?version_numberis_continuous_control������?is_continuous_controladd_1/y������?add_1/ySum/reduction_indices������?Sum/reduction_indicesadd/y������?add/ymain_graph_0/hidden_0/BiasAdd�����?vector_observationmain_graph_0/hidden_0/kernel�main_graph_0/hidden_0/bias�main_graph_0/hidden_0/Mul2 �����?main_graph_0/hidden_0/BiasAdd dense/MatMul�����?main_graph_0/hidden_0/Mul dense/kernel�<dense/MatMul/patch:0� action_probs2�����? dense/MatMulSoftmax2�����? action_probsaddd�����?Softmaxadd/yMulf�����?add action_masksSum������?MulSum/reduction_indicestruedivg�����?MulSumadd_1d�����?truedivadd_1/yLog_12r�����?add_1concat_12�����?Log_1action2�����?Log_1dense_1/BiasAdd�����?main_graph_0/hidden_0/Muldense_1/kernel� dense_1/bias
value_estimate2�����?dense_1/BiasAdd�?�?@@�?�?@@@@@���.�?���.�)= �=��X����>��>���d?)>PXx>.���Ӿ�Q��d?>�8�>Y�&=:[:>ۙa>��>��f>A
=95����>��>d�����D���=���>^��sN����,=�=ܴ�>B��Q�>�Kξԙ��۹[>ۺ�����>0��K�>�0�қ�=���;T���>���>�>ʚ`>f�>>bS�>ɴJ>��>�Ƣ�$�j���@���6�P���>L�+Y$��Ԫ�p��>��Ǽ!�ž�jf>k����G�����>��=�T���ث���iܾQy�>��k>[�>lN8�<'=0�R��#��1�
�J>��پ���>x��;�NN>�I^>��8=Ԣ��7�~= ʋ>�B�;+��>:�&>�CR��v�����>�0c��h>w��>2Ѿ�F����>�1��@ȶ>y�\=]�"=���Tn�=�)�>K�>3��>ѩ��� ��u|M>�B��xj��>�c�>�k����>NO�>8�����j<�>����W���<�>�Zþח�>��>�>L�e�Y��>��=�J�=�x+��[!�-1�>3�q=�z���?(����mί>x�5����>�ߴ���ؼo���Z7��8O>�m����=F�5>������C�zcݾ@��JqL>DP�B�o������D?�h=r�? ����������쩾"���������?������q"���Ɇ�͂>>�:eH���i�ΰ�>;���˕>ZG >��R<o����->X������Z��> nA<�v����> �H������F=SR0>У�=�hC���Ⱦ�~s��}>pj潦/����4��5#���>��O=C8�>�Y��[�>8@,>!�>��=>�Y����>����"�>S�m>�z�>l�<��>H��^0+�-�V�~ؾ�j�=� ���>}�=��?r=i��;Z?c�>��%���I>M ��k>U��>�樺%����p�>�����s���S�Kp)�5�?!��)��>�!?^��>4��T�=�Q�>��L�"u�>�Ͼ-��>buo>0�?����q��<�_��z���h����cD? � ��C?���>3z�>�|~���5?���>ۗ��|�&��G�s��=��$?�4?o��5�'?�D�@h��,�������C&?������>���>��?� ���c
?�q?j�_=�膾y���>�H>���>� ˾��>��f����?0���\���'?�br���>p�7?8?�?��<�>�d�>}���H������9?Ϛ�>�1�>�짾�x?����]���b��1��VA�>�ޞ��c�>��7?�;)?Sϟ�j=c>gǸ>�)��z���Hʾ����>�M ?p� ?�hT��"Ͼ�S=��f>�%=� O���Q���=��>�!Ѽ [�>�x��{>O�j>q嶾̇>i�=�i¼��>E�a���4>��4>�ۅ��W<>U����_,>��>�Ѧ;
J���4>C��>i�>����.���|�=�݃��H:>H�����˼�ܽ��=fG2� �w� +�=y9�=�F>^��=�� �&>�����l>�޲�[�<[��>A�>vf���P��8�s�4�;1F1>m�<ƨ7?������C& ���ݾh�&?ˎ���4?33?��!?��J5.?�b6?s��5���J ���"?^�&?��-?
�� ����
��$?a��>H��>(� ����>�s�>Yb�� �>e��>�2 ��5�>�7�>eF��7ھ�Q
���?W ?T.�>g���$���m�;?��ᾚ0߾H�?G��_���љ?C�>-ɽ>�� ������Hm/?�������a=?�A�>*w�>x) ��^z>�a >%���3��>h?M����kv����?=���
��xV?�/쾷����?
��>�S?�����$>��r����>JI��F�5?r6e�� 0�3�->D�G�u�f�܂�9S��0�=�C�>�T����o�6�>y �>׹2>2����^=
vector_observation���� action_masks���� action_probsconcat_1actionvalue_estimateaction_output_shape������?action_output_shape memory_sizeversion_numberis_continuous_controlSum/reduction_indicesadd/yadd_1/ymain_graph_0/hidden_0/BiasAdd�����?vector_observationmain_graph_0/hidden_0/kernel�main_graph_0/hidden_0/bias�main_graph_0/hidden_0/Mul2 �����?main_graph_0/hidden_0/BiasAdd dense/MatMul�����?main_graph_0/hidden_0/Mul dense/kernel�<dense/MatMul/patch:0� action_probs2�����? dense/MatMul strided_slice������? action_probsstrided_slice_1������? action_masksSoftmax2�����? strided_sliceaddd�����?Softmaxadd/yMulf�����?addstrided_slice_1Sum������?MulSum/reduction_indicestruedivg�����?MulSumadd_1d�����?truedivadd_1/yLog_12r�����?add_1concat_12�����?Log_1action2�����?Log_1dense_1/BiasAdd�����?main_graph_0/hidden_0/Muldense_1/kernel� dense_1/bias�value_estimate2�����?dense_1/BiasAdd@@@�?���.���.��|���g=�>ׯ;v�F����>z�<����9���P��]�����:��q�=[���o�IZ����a�\�͈�=�����|�w�B��T[>Y))���>�[����>�ɾ1@�����K\�=
��>)3ν�>�ڱ�,���ݳ>�م>��=��=P8>G@�=rF�>6��� ]>��?�����-����=3�w�p�>�U�<&1V>^/p>=���7X�>(`D>.�B=/ž�s���ڑ�nc/>��y>� �>=�@���YY>�L�>�B��=[>�,Ѿ8Se�y�������X�>���>|�=tҽ�믾�����>�>�<˾���>��>�Sþ�\�>w��>���>�Z>���`
=Ȋ >F?㾒���c�=w���oy���������A�<I��=S�-=���=�C[�ƕ�=�����c#�<�=[,�>��>1G:�1Z���TN=k��=@.ؾ'��>�y̽�G�=�\>P׾*n}>��S>�ñ�^�=�����)>ImB=b�O�{��=�`4<�rv�S�n>�>����U���#>5C���:�>��]M�>Y U��=i#�>¯德���"Ɵ>�ȽD�� P�>�m>0:�����>�
>�S���Ɇ>* /�1�Ѿ(6>�ˠ�c����L���$�>~Ő�h���L>�P�<�ث���L>�">iʊ>vL,�"�=*(���5�?�?-,��l'(=4��=���.�־���Z���"R�=h[��Nh����u>��->'%N>&K>g'�=�� �Z@=��R����='����ns>���=�/>��>��=�}�>������>�J���>ɽ$����\tu=���=�l�=���� [�=��B>�B�>�?���>��<x|�=�׵>�wʾ�y�E��>��>�����о,�R?D-ʾ����¾s�R>����1���y�>���=��P<�� ?\�4�1��>���>��ʾ\��/5>ğ>w�7�;G��f�R?~Cx=����'���?V��-���
?J.���q�>�r ?R�C��?%�<ʑ
�y���" ?���>��H�X�$D)?;����v2�Jk&����>����e��=�M;?���pL�>ʹW>0��P�>�X?'�&��\۾N�>�.?A⾖��e��>�,���Q��?��� �>�Ѿ���?8�>����>r�?�뾙��<6?熽��$��b�?wɁ>�־ۮ�|??W�0J���ľ���>�U�����x,I>*䈾0*�>��>�p־zQ#>Z��> ⦾h���{�?c5�>��.��|��H?�~'���۾t.��(�'?�Wξ �۾���>��վ[�>QӖ>�"�SK>�n>Ą����>C�
���w>�l6��/��O�E>M�,��Ế��H��Ȃ�!m��|վYD��d�>����`I>�&��p$�':�>��=b�2<�h�<�3�=�X�=y�� �羖2�=_xf�@X;Z��>F��=Y����8����$�xe����Ͻ����>�� >᣽z���_�>��>?;��>w�ͽ�N ��Ǥ>����`)��"-��w>i��>$D�+�>�(���3�����h#J?�� ���)� +?�()?n���:���B?C���� %��)�%�7?� �4� �D?� �!N?�B?cG ���(?k�
��X�cp2?�?J��>� &�ɺ�>���>>�M���:oؾ��?Q� �g��#?�>���>������>��>|r�u��ݾ��l-?�{�>���>���u��>���>W���)?�1�>h]6�V,��뾜]&?%n ?�[�>s��s��>5��>U��������_�1?Yp�>�"�>rh�9���.ܾ8b'?e������A?"6�>�|�>E��j
��C6����?�>g>����x���]�@�;i��� �EH���=>�}�=)i�>�/�><���k�澯�.>����x��=>�\�E��1�7'�=

2
UnitySDK/Assets/ML-Agents/Examples/Basic/TFModels/BasicLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

286
UnitySDK/Assets/ML-Agents/Examples/Bouncer/TFModels/BouncerLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/Bouncer/TFModels/BouncerLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerDynamicLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerDynamicLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerStaticLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerStaticLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/GridWorld/TFModels/GridWorldLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/GridWorld/TFModels/GridWorldLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/Hallway/TFModels/HallwayLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/Hallway/TFModels/HallwayLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/PushBlock/TFModels/PushBlockLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/PushBlock/TFModels/PushBlockLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/Pyramids/TFModels/PyramidsLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/Pyramids/TFModels/PyramidsLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/Reacher/TFModels/ReacherLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/Reacher/TFModels/ReacherLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

5
UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Materials/BlueAgent.mat


m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _SpecGlossMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
m_Floats:
- _BumpScale: 1
- _Cutoff: 0.5

m_Colors:
- _Color: {r: 0.10980392, g: 0.6039216, b: 1, a: 1}
- _EmissionColor: {r: 0, g: 0, b: 0, a: 1}
- _SpecColor: {r: 0.2, g: 0.2, b: 0.2, a: 1}

5
UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Materials/Wall.mat


m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
- _SpecGlossMap:
m_Texture: {fileID: 0}
m_Scale: {x: 1, y: 1}
m_Offset: {x: 0, y: 0}
m_Floats:
- _BumpScale: 1
- _Cutoff: 0.5

m_Colors:
- _Color: {r: 0.5, g: 0.5, b: 0.5, a: 1}
- _EmissionColor: {r: 0, g: 0, b: 0, a: 1}
- _SpecColor: {r: 0.2, g: 0.2, b: 0.2, a: 1}

1001
UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/GoalieLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/GoalieLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/StrikerLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/StrikerLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/Tennis/TFModels/TennisLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/Tennis/TFModels/TennisLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

2
UnitySDK/Assets/ML-Agents/Examples/Walker/TFModels/WalkerLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/WallJump/TFModels/BigWallJumpLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/WallJump/TFModels/BigWallJumpLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

1001
UnitySDK/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJumpLearning.nn
文件差异内容过多而无法显示
查看文件

2
UnitySDK/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJumpLearning.nn.meta


userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 83221ad3db87f4b3b91b041047cb2bc5, type: 3}
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

111
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda.md


### Load Model into Barracuda
Once you have your TensorFlow (or ONNX) model converted, you can load resulting Barracuda file via `ModelLoader`:
```C#
var model = ModelLoader.LoadFromStreamingAssets(modelName + ".bytes");
var model = ModelLoader.LoadFromStreamingAssets(modelName + ".nn");
```
Another option is to use editor model importer. Just add public `NNModel` field to your C# script and assing ``.nn`` model file via editor UI:
```C#
public NNModel modelSource;
<..>
var model = ModelLoader.Load(modelSource);
var worker = BarracudaWorkerFactory.CreateWorker(BarracudaWorkerFactory.Type.ComputeFast, model)
var worker = BarracudaWorkerFactory.CreateWorker(BarracudaWorkerFactory.Type.ComputePrecompiled, model)
```
### Execute the model

Execution is asynchronous for GPU backends. Currently implementation is synchronous for CPU backends, however it is good to assume that execution will be async for all backends in the future.
### Fetch outputs
If model has only single output, then simple `worker.Fetch()` can be used, otherwise output names should be provided.
If model has only single output, then simple `worker.Peek()` can be used, otherwise output names should be provided.
var O = worker.Fetch(outputName);
var O = worker.Peek(outputName);
_Note:_ ``Peek()`` does not take ownership of the tensor. If you expect to keep tensor for longer time use ``Fetch()``
### Cleanup
As a Barracuda client you are responsible to `Dispose` _worker_, _inputs_ and _outputs_ you fetched. This is necessary to properly free GPU resources.

### Texture as output
If you want to use Barracuda execution results further in the graphics pipeline, you can copy data from `Tensor` into `RenderTexture` without stalling CPU or GPU:
```C#
var tensor = worker.Fetch();
var tensor = worker.Peek();
var texture = BarracudaTextureUtils.TensorToRenderTexture(tensor);
```
If you wish, you can reuse the same `RenderTexture` multiple times:

var tensor = worker.Fetch();
var tensor = worker.Peek();
BarracudaTextureUtils.TensorToRenderTexture(tensor, texture);
```

Convert from TensorFlow:
```bash
python tensorflow_to_barracuda.py Models/3DBall-tf-model.pb Destination/3DBall-bc.bytes
python tensorflow_to_barracuda.py Models/3DBall-tf-model.pb Destination/3DBall-bc.nn
python onnx_to_barracuda.py Models/mnist/model.onnx Destination/mnist-bc.bytes
python onnx_to_barracuda.py Models/mnist/model.onnx Destination/mnist-bc.nn
```
If network has multiple outputs, but you need only particular ones during the inference, there is an optional `-trim` flag to remove unused outputs and calculations.

Trim will first remove outputs that do not match regular expression from the graph. In this case only output that ends with `action` will be left.
Next trim will strip all nodes that do not participate in the evaluation of the output.
You could pass `--print-supported-ops` to get approximate list of supported operations/activations for specific converter.
P.S. Python 3.5 or 3.6 is recommended
## Approximate list of supported layers/operations for TensorFlow converter
```
Activation
Add
AvgPool
BatchNormalization
BatchNormalizationRuntime
BiasAdd
Concat
Conv2D
Conv2DBackpropInput
Dense
DepthwiseConv2dNative
Flatten
FusedBatchNorm
GlobalAveragePool
GlobalAvgPool
InstanceNormalization
LRN
MatMul
Max
MaxPool
Maximum
Mean
Min
Minimum
Mul
Multinomial
Nop
OneHot
Pad
Pow
Prod
RandomStandardNormal
RandomUniform
RealDiv
Reshape
ResizeBicubic
ResizeBilinear
ResizeNearestNeighbor
StridedSlice
Sub
Sum
```
## Approximate list of supported activations for TensorFlow converter
```
Abs
Acos
Acosh
Asin
Asinh
Atan
Atanh
Ceil
Cos
Cosh
Elu
Exp
Floor
LeakyRelu
Linear
Log
LogSoftmax
Neg
Relu
Relu6
Selu
Sigmoid
Sin
Sinh
Softmax
Softplus
Softsign
Sqrt
Swish
Tan
Tanh
```
P.S. some of these operations are under limited support and not all configurations are properly supported
P.P.S. We plan to migrate Tensorflow and ONNX converters from Python to C# in the future.
P.P.S. Python 3.5 or 3.6 is recommended
P.P.P.S. We plan to migrate Tensorflow and ONNX converters from Python to C# in the future.

1000
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Barracuda.dll
文件差异内容过多而无法显示
查看文件

918
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Activation.compute


#pragma kernel Relu_Flat
#pragma kernel Relu_Loop
#pragma kernel Relu6_Flat
#pragma kernel Relu6_Loop
#pragma kernel Tanh_Flat
#pragma kernel Tanh_Loop
#pragma kernel Swish_Flat
#pragma kernel Swish_Loop
#pragma kernel Sigmoid_Flat
#pragma kernel Sigmoid_Loop
#pragma kernel Elu_Flat
#pragma kernel Elu_Loop
#pragma kernel LeakyRelu_Flat
#pragma kernel LeakyRelu_Loop
#pragma kernel Exp_Flat
#pragma kernel Exp_Loop
#pragma kernel Log_Flat
#pragma kernel Log_Loop
#pragma kernel Pow_Flat
#pragma kernel Pow_Loop
/*
Relu_Flat (NEW) vs Relu_Nyxc+Relu_CNyx+Relu
Compute Precompiled
VGG@1
<<<Exec #128: 59.6 ms, cpu: .9 ms, avg: 62.4 ms, result:OK <--- NEW!
<<<Exec #128: 63.6 ms, cpu: .9 ms, avg: 64.0 ms, result:OK
VGG@4
<<<Exec #16: 276.7 ms, cpu: .9 ms, avg: 272.8 ms, result:OK <--- NEW!
<<<Exec #16: 297.5 ms, cpu: .9 ms, avg: 274.4 ms, result:OK
RES@1
<<<Exec #100: 82.2 ms, cpu: 22.2 ms, avg: 81.0 ms, result:OK <--- NEW!
<<<Exec #100: 82.1 ms, cpu: 22.5 ms, avg: 85.4 ms, result:OK
PPO_2@256
<<<Exec #200: 10.3 ms, cpu: 7.6 ms, avg: 11.9 ms, result:OK <--- NEW!
<<<Exec #200: 10.9 ms, cpu: 8.3 ms, avg: 12.3 ms, result:OK
PPO_CNN@256
<<<Exec #100: 60.6 ms, cpu: 62.3 ms, avg: 65.6 ms, result:OK <--- NEW!
<<<Exec #100: 72.6 ms, cpu: 62.7 ms, avg: 66.0 ms, result:OK
*/
#pragma kernel Relu
#pragma kernel Relu_CNyx
#pragma kernel Relu_Nyxc

#pragma kernel Exp
#pragma kernel Exp_CNyx
#pragma kernel Exp_Nyxc
#pragma kernel Log
#pragma kernel Log_CNyx
#pragma kernel Log_Nyxc
#pragma kernel Pow
#pragma kernel Pow_CNyx
#pragma kernel Pow_Nyxc

TENSOR_DECL_RW(O)
float _Alpha;
uint _LoopStride;
#define FLAT_ACTIVATION(name, op_name) \
void name##_Flat (uint3 dispatchThreadID : SV_DispatchThreadID)\
{\
DISPATCH_ARGS(O.length, 1, 1)\
TENSOR_ARGS2(X, O);\
\
uint i = dispatchThreadID.x;\
if (i > O.GetLength()) return;\
\
float v = X.Get(i);\
v = op_name (v);\
O.Set(i, v);\
}
#define LOOP_ACTIVATION(name, op_name) \
void name##_Loop (uint3 dispatchThreadID : SV_DispatchThreadID)\
{\
DISPATCH_ARGS(O.length, 1, 1)\
TENSOR_ARGS2(X, O);\
\
uint i = dispatchThreadID.x;\
uint len = O.GetLength();\
\
while (i < len) {\
float v = X.Get(i); \
v = op_name (v); \
O.Set(i, v); \
i += _LoopStride; \
}\
}
#define ACTIVATION(name, op_name) \
NUMTHREADS((512,1,1), (128,1,1), (64,1,1))\
FLAT_ACTIVATION(name, op_name)\
NUMTHREADS((512,1,1), (128,1,1), (64,1,1))\
LOOP_ACTIVATION(name, op_name)
return 0.5f * (v + abs(v));
return 0.5f * (v + abs(v));
return min(max(0, v), 6);
return min(max(0, v), 6);
return v / (1.f + exp(-v));
return v / (1.f + exp(-v));
return 1.f / (1.f + exp(-v));
return 1.f / (1.f + exp(-v));
if (v <= 0)
v = _Alpha * (exp(v) - 1);
return v;
if (v <= 0)
v = _Alpha * (exp(v) - 1);
return v;
return max(v, _Alpha * v);
return max(v, _Alpha * v);
float signed_pow(float f, float e)
float signed_pow(float f)
// handle negative f
float v = pow(abs(f), e);
float s = (e % 2 == 1) ?
sign(f): // exponent is odd => sign(f) * pow(abs(f), e)
1; // exponent is even => pow(abs(f), e)
return v * s;
float e = _Alpha;
// handle negative f
float v = pow(abs(f), e);
float s = (e % 2 == 1) ?
sign(f): // exponent is odd => sign(f) * pow(abs(f), e)
1; // exponent is even => pow(abs(f), e)
return v * s;
ACTIVATION(Relu, relu)
ACTIVATION(Relu6, relu6)
ACTIVATION(Tanh, tanh)
ACTIVATION(Sigmoid, sigmoid)
ACTIVATION(Swish, swish)
ACTIVATION(Elu, elu)
ACTIVATION(LeakyRelu, lrelu)
ACTIVATION(Exp, exp)
ACTIVATION(Log, log)
ACTIVATION(Pow, signed_pow)
// -------------------
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
uint c = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
if (c >= O.channels) return;
if (x >= O.width) return;
if (y >= O.height) return;
if (c >= O.channels) return;
if (x >= O.width) return;
if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = relu(v);
O.Set(n, y, x, c, v);
}
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = relu(v);
O.Set(n, y, x, c, v);
}
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
uint c = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
if (c >= O.channels) return;
if (x >= O.width) return;
if (y >= O.height) return;
if (c >= O.channels) return;
if (x >= O.width) return;
if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = relu6(v);
O.Set(n, y, x, c, v);
}
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = relu6(v);
O.Set(n, y, x, c, v);
}
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = tanh(v);
O.Set(n, y, x, c, v);
}
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = tanh(v);
O.Set(n, y, x, c, v);
}
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
uint c = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
if (c >= O.channels) return;
if (x >= O.width) return;
if (y >= O.height) return;
if (c >= O.channels) return;
if (x >= O.width) return;
if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = sigmoid(v);
O.Set(n, y, x, c, v);
}
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = sigmoid(v);
O.Set(n, y, x, c, v);
}
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
uint c = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
if (c >= O.channels) return;
if (x >= O.width) return;
if (y >= O.height) return;
if (c >= O.channels) return;
if (x >= O.width) return;
if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = swish(v);
O.Set(n, y, x, c, v);
}
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = swish(v);
O.Set(n, y, x, c, v);
}
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = elu(v);
O.Set(n, y, x, c, v);
}
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = elu(v);
O.Set(n, y, x, c, v);
}
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = lrelu(v);
O.Set(n, y, x, c, v);
}
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = lrelu(v);
O.Set(n, y, x, c, v);
}
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = exp(v);
O.Set(n, y, x, c, v);
}
}
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
NUMTHREADS((4,8,8), (4,8,4), (4,4,4))
void Log(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = exp(v);
O.Set(n, y, x, c, v);
}
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = log(v);
O.Set(n, y, x, c, v);
}
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = signed_pow(v, _Alpha);
O.Set(n, y, x, c, v);
}
for (uint n = 0; n < X.batch; ++n)
{
float v = X.Get(n, y, x, c);
v = signed_pow(v);
O.Set(n, y, x, c, v);
}
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (c >= X.channels) return;
if (n >= X.batch) return;
if (c >= X.channels) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = relu(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = relu(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint nyxc = dispatchThreadID.x;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (n >= X.batch) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = relu(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = relu(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (c >= X.channels) return;
if (n >= X.batch) return;
if (c >= X.channels) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = relu6(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = relu6(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint nyxc = dispatchThreadID.x;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (n >= X.batch) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = relu6(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = relu6(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (c >= X.channels) return;
if (n >= X.batch) return;
if (c >= X.channels) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = tanh(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = tanh(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint nyxc = dispatchThreadID.x;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (n >= X.batch) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = tanh(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = tanh(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (c >= X.channels) return;
if (n >= X.batch) return;
if (c >= X.channels) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = sigmoid(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = sigmoid(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint nyxc = dispatchThreadID.x;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (n >= X.batch) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = sigmoid(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = sigmoid(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (c >= X.channels) return;
if (n >= X.batch) return;
if (c >= X.channels) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = swish(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = swish(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint nyxc = dispatchThreadID.x;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (n >= X.batch) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = swish(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = swish(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (c >= X.channels) return;
if (n >= X.batch) return;
if (c >= X.channels) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = elu(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = elu(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint nyxc = dispatchThreadID.x;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (n >= X.batch) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = elu(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = elu(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (c >= X.channels) return;
if (n >= X.batch) return;
if (c >= X.channels) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = lrelu(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = lrelu(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint nyxc = dispatchThreadID.x;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (n >= X.batch) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = lrelu(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = lrelu(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (c >= X.channels) return;
if (n >= X.batch) return;
if (c >= X.channels) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = exp(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = exp(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = exp(v);
O.Set(n, y, x, c, v);
}
NUMTHREADS((16,16,1), (16,8,1), (16,4,1))
void Log_CNyx(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (c >= X.channels) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = log(v);
O.Set(n, y, x, c, v);
}
NUMTHREADS((512,1,1), (128,1,1), (64,1,1))
void Log_Nyxc(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (n >= X.batch) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = exp(v);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = log(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.channels, O.batch * O.height * O.width, 1);
TENSOR_ARGS2(X, O);
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint c = dispatchThreadID.x;
uint nyx = dispatchThreadID.y;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (c >= X.channels) return;
if (n >= X.batch) return;
if (c >= X.channels) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = signed_pow(v, _Alpha);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = signed_pow(v);
O.Set(n, y, x, c, v);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.batch * O.height * O.width * O.channels, 1, 1)
TENSOR_ARGS2(X, O);
uint nyxc = dispatchThreadID.x;
uint nyxc = dispatchThreadID.x;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
uint c = nyxc % X.channels;
uint nyx = nyxc / X.channels;
uint x = nyx % X.width;
uint ny = nyx / X.width;
uint y = ny % X.height;
uint n = ny / X.height;
if (n >= X.batch) return;
if (n >= X.batch) return;
float v = X.Get(n, y, x, c);
v = signed_pow(v, _Alpha);
O.Set(n, y, x, c, v);
float v = X.Get(n, y, x, c);
v = signed_pow(v);
O.Set(n, y, x, c, v);
}

DISPATCH_ARGS(O.flatWidth, O.flatHeight, 1);
TENSOR_ARGS2(X, O);
DISPATCH_ARGS(O.flatWidth, O.flatHeight, 1);
TENSOR_ARGS2(X, O);
uint x = dispatchThreadID.x;
uint y = dispatchThreadID.y;
uint x = dispatchThreadID.x;
uint y = dispatchThreadID.y;
if (x >= O.GetFlatWidth()) return;
if (y >= O.GetFlatHeight()) return;
if (x >= O.GetFlatWidth()) return;
if (y >= O.GetFlatHeight()) return;
float maxV = -FLT_MAX;
for (uint i = 0; i < X.GetFlatWidth(); ++i)
{
float v = X.Get(y, i);
if (v > maxV)
maxV = v;
}
float maxV = -FLT_MAX;
for (uint i = 0; i < X.GetFlatWidth(); ++i)
{
float v = X.Get(y, i);
if (v > maxV)
maxV = v;
}
float acc = 0.0f;
for (i = 0; i < X.GetFlatWidth(); ++i)
{
float v = X.Get(y, i);
acc += exp(v - maxV);
}
float acc = 0.0f;
for (i = 0; i < X.GetFlatWidth(); ++i)
{
float v = X.Get(y, i);
acc += exp(v - maxV);
}
float v = X.Get(y, x);
v = exp(v - maxV) / acc;
O.Set(y, x, v);
float v = X.Get(y, x);
v = exp(v - maxV) / acc;
O.Set(y, x, v);
}

944
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/BarracudaReferenceImpl.compute
文件差异内容过多而无法显示
查看文件

68
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Broadcast.compute


NUMTHREADS((4,8,8), (4,8,4), (4,4,4))
void BroadcastAdd(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{

NUMTHREADS((4,8,8), (4,8,4), (4,4,4))
void BroadcastSub(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{

NUMTHREADS((4,8,8), (4,8,4), (4,4,4))
void BroadcastMul(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < O.batch; ++n)
{

NUMTHREADS((4,8,8), (4,8,4), (4,4,4))
void BroadcastDiv(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{

float signed_pow(float f, float e)
{
// handle negative f
float v = pow(abs(f), e);
float s = (e % 2 == 1) ?
sign(f): // exponent is odd => sign(f) * pow(abs(f), e)
1; // exponent is even => pow(abs(f), e)
return v * s;
// handle negative f
float v = pow(abs(f), e);
float s = (e % 2 == 1) ?
sign(f): // exponent is odd => sign(f) * pow(abs(f), e)
1; // exponent is even => pow(abs(f), e)
return v * s;
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{

NUMTHREADS((4,8,8), (4,8,4), (4,4,4))
void BroadcastMin(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{

NUMTHREADS((4,8,8), (4,8,4), (4,4,4))
void BroadcastMax(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
DISPATCH_ARGS(O.channels, O.width, O.height);
TENSOR_ARGS3(X, B, O);
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
uint c = dispatchThreadID.x; uint x = dispatchThreadID.y; uint y = dispatchThreadID.z;
if (c >= O.channels) return; if (x >= O.width) return; if (y >= O.height) return;
for (uint n = 0; n < X.batch; ++n)
{

596
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Resources/Conv.compute


#pragma kernel Conv2D
#pragma kernel Conv2D_RegisterBlock4x2
//#pragma kernel Conv2D_L1Cached64_RegisterBlock4x4
#pragma kernel Conv2D_L1Cached64_RegisterBlock4x4
#pragma kernel Conv2D_L1Cached32_RegisterBlock4x4
#pragma kernel DepthwiseConv2D

NUMTHREADS((16,4,4), (8,4,4), (4,4,4))
void Conv2D(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(K.kernelCount, O.width, O.height);
TENSOR_SHARED2_ARGS4(X, K, B, WBK, O);
DISPATCH_ARGS(K.kernelCount, O.width, O.height);
TENSOR_SHARED2_ARGS4(X, K, B, WBK, O);
uint k = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
uint k = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
if (k >= K.channels) return;
if (x >= O.width) return;
if (y >= O.height) return;
if (k >= K.channels) return;
if (x >= O.width) return;
if (y >= O.height) return;
uint2 leftCorner = _Pad.xy;
uint2 rightCorner = uint2(X.width, X.height) + _Pad.xy;
for (uint n = 0; n < O.batch; ++n)
{
float acc = B.Get(k);
for (uint dy = 0; dy < K.GetKernelHeight(); ++dy)
{
for (uint dx = 0; dx < K.GetKernelWidth(); ++dx)
{
uint2 pos = uint2(x, y) * _Stride.xy + uint2(dx, dy);
// @TODO: investigate
// WARNING: had to move both y check into the loop (as opposed to checking y in parent loop) - due to potential bug in Metal compiler
if (any(pos < leftCorner)) continue;
if (any(pos >= rightCorner)) continue;
uint2 leftCorner = _Pad.xy;
uint2 rightCorner = uint2(X.width, X.height) + _Pad.xy;
for (uint n = 0; n < O.batch; ++n)
{
float acc = B.Get(k);
for (uint dy = 0; dy < K.GetKernelHeight(); ++dy)
{
for (uint dx = 0; dx < K.GetKernelWidth(); ++dx)
{
uint2 pos = uint2(x, y) * _Stride.xy + uint2(dx, dy);
// @TODO: investigate
// WARNING: had to move both y check into the loop (as opposed to checking y in parent loop) - due to potential bug in Metal compiler
if (any(pos < leftCorner)) continue;
if (any(pos >= rightCorner)) continue;
for (uint c = 0; c < X.channels; ++c)
acc = fastfma(X.Get(n, pos.y - leftCorner.y, pos.x - leftCorner.x, c), K.Get(dy, dx, c, k), acc);
}
}
for (uint c = 0; c < X.channels; ++c)
acc = fastfma(X.Get(n, pos.y - leftCorner.y, pos.x - leftCorner.x, c), K.Get(dy, dx, c, k), acc);
}
}
O.Set(n, y, x, k, acc);
}
O.Set(n, y, x, k, acc);
}
}

void Conv2D_RegisterBlock4x2(uint3 dispatchThreadID : SV_DispatchThreadID)
{
DISPATCH_ARGS(K.kernelCount, O.width, O.height);
TENSOR_SHARED2_ARGS4(X, K, B, WBK, O);
DISPATCH_ARGS(K.kernelCount, O.width, O.height);
TENSOR_SHARED2_ARGS4(X, K, B, WBK, O);
uint k = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
uint k = dispatchThreadID.x;
uint x = dispatchThreadID.y;
uint y = dispatchThreadID.z;
if (k >= K.channels) return;
if (x*SIZE_W >= O.width) return;
if (y*SIZE_H >= O.height) return;
if (k >= K.channels) return;
if (x*SIZE_W >= O.width) return;
if (y*SIZE_H >= O.height) return;
uint2 leftCorner = _Pad.xy;
uint2 rightCorner = uint2(X.width, X.height) + _Pad.xy;
for (uint n = 0; n < O.batch; ++n)
{
float acc[SIZE_H*SIZE_W];
[unroll]
for (uint q = 0; q < SIZE_H*SIZE_W; ++q)
acc[q] = B.Get(k);
for (uint dy = 0; dy < K.GetKernelHeight(); ++dy)
{
for (uint dx = 0; dx < K.GetKernelWidth(); ++dx)
{
uint2 pos[SIZE_H*SIZE_W];
[unroll]
for (uint q = 0; q < SIZE_H*SIZE_W; ++q)
pos[q] = uint2(x*SIZE_W+(q%SIZE_W), y*SIZE_H+(q/SIZE_W)) * _Stride.xy + uint2(dx, dy);
uint2 leftCorner = _Pad.xy;
uint2 rightCorner = uint2(X.width, X.height) + _Pad.xy;
for (uint n = 0; n < O.batch; ++n)
{
float acc[SIZE_H*SIZE_W];
[unroll]
for (uint q = 0; q < SIZE_H*SIZE_W; ++q)
acc[q] = B.Get(k);
for (uint dy = 0; dy < K.GetKernelHeight(); ++dy)
{
for (uint dx = 0; dx < K.GetKernelWidth(); ++dx)
{
uint2 pos[SIZE_H*SIZE_W];