比较提交
合并到: unity-tech-cn:main
unity-tech-cn:/main
unity-tech-cn:/develop-generalizationTraining-TrainerController
unity-tech-cn:/tag-0.2.0
unity-tech-cn:/tag-0.2.1
unity-tech-cn:/tag-0.2.1a
unity-tech-cn:/tag-0.2.1c
unity-tech-cn:/tag-0.2.1d
unity-tech-cn:/hotfix-v0.9.2a
unity-tech-cn:/develop-gpu-test
unity-tech-cn:/0.10.1
unity-tech-cn:/develop-pyinstaller
unity-tech-cn:/develop-horovod
unity-tech-cn:/PhysXArticulations20201
unity-tech-cn:/importdocfix
unity-tech-cn:/develop-resizetexture
unity-tech-cn:/hh-develop-walljump_bugfixes
unity-tech-cn:/develop-walljump-fix-sac
unity-tech-cn:/hh-develop-walljump_rnd
unity-tech-cn:/tag-0.11.0.dev0
unity-tech-cn:/develop-pytorch
unity-tech-cn:/tag-0.11.0.dev2
unity-tech-cn:/develop-newnormalization
unity-tech-cn:/tag-0.11.0.dev3
unity-tech-cn:/develop
unity-tech-cn:/release-0.12.0
unity-tech-cn:/tag-0.12.0-dev
unity-tech-cn:/tag-0.12.0.dev0
unity-tech-cn:/tag-0.12.1
unity-tech-cn:/2D-explorations
unity-tech-cn:/asymm-envs
unity-tech-cn:/tag-0.12.1.dev0
unity-tech-cn:/2D-exploration-raycast
unity-tech-cn:/tag-0.12.1.dev1
unity-tech-cn:/release-0.13.0
unity-tech-cn:/release-0.13.1
unity-tech-cn:/plugin-proof-of-concept
unity-tech-cn:/release-0.14.0
unity-tech-cn:/hotfix-bump-version-master
unity-tech-cn:/soccer-fives
unity-tech-cn:/release-0.14.1
unity-tech-cn:/bug-failed-api-check
unity-tech-cn:/test-recurrent-gail
unity-tech-cn:/hh-add-icons
unity-tech-cn:/release-0.15.0
unity-tech-cn:/release-0.15.1
unity-tech-cn:/hh-develop-all-posed-characters
unity-tech-cn:/internal-policy-ghost
unity-tech-cn:/distributed-training
unity-tech-cn:/hh-develop-improve_tennis
unity-tech-cn:/test-tf-ver
unity-tech-cn:/release_1_branch
unity-tech-cn:/tennis-time-horizon
unity-tech-cn:/whitepaper-experiments
unity-tech-cn:/r2v-yamato-linux
unity-tech-cn:/docs-update
unity-tech-cn:/release_2_branch
unity-tech-cn:/exp-mede
unity-tech-cn:/sensitivity
unity-tech-cn:/release_2_verified_load_fix
unity-tech-cn:/test-sampler
unity-tech-cn:/release_2_verified
unity-tech-cn:/hh-develop-ragdoll-testing
unity-tech-cn:/origin-develop-taggedobservations
unity-tech-cn:/MLA-1734-demo-provider
unity-tech-cn:/sampler-refactor-copy
unity-tech-cn:/PhysXArticulations20201Package
unity-tech-cn:/tag-com.unity.ml-agents_1.0.8
unity-tech-cn:/release_3_branch
unity-tech-cn:/github-actions
unity-tech-cn:/release_3_distributed
unity-tech-cn:/fix-batch-tennis
unity-tech-cn:/distributed-ppo-sac
unity-tech-cn:/gridworld-custom-obs
unity-tech-cn:/hw20-segmentation
unity-tech-cn:/hh-develop-gamedev-demo
unity-tech-cn:/active-variablespeed
unity-tech-cn:/release_4_branch
unity-tech-cn:/fix-env-step-loop
unity-tech-cn:/release_5_branch
unity-tech-cn:/fix-walker
unity-tech-cn:/release_6_branch
unity-tech-cn:/hh-32-observation-crawler
unity-tech-cn:/trainer-plugin
unity-tech-cn:/hh-develop-max-steps-demo-recorder
unity-tech-cn:/hh-develop-loco-walker-variable-speed
unity-tech-cn:/exp-0002
unity-tech-cn:/experiment-less-max-step
unity-tech-cn:/hh-develop-hallway-wall-mesh-fix
unity-tech-cn:/release_7_branch
unity-tech-cn:/exp-vince
unity-tech-cn:/hh-develop-gridsensor-tests
unity-tech-cn:/tag-release_8_test0
unity-tech-cn:/tag-release_8_test1
unity-tech-cn:/release_8_branch
unity-tech-cn:/docfix-end-episode
unity-tech-cn:/release_9_branch
unity-tech-cn:/hybrid-action-rewardsignals
unity-tech-cn:/MLA-462-yamato-win
unity-tech-cn:/exp-alternate-atten
unity-tech-cn:/hh-develop-fps_game_project
unity-tech-cn:/fix-conflict-base-env
unity-tech-cn:/release_10_branch
unity-tech-cn:/exp-bullet-hell-trainer
unity-tech-cn:/ai-summit-exp
unity-tech-cn:/comms-grad
unity-tech-cn:/walljump-pushblock
unity-tech-cn:/goal-conditioning
unity-tech-cn:/release_11_branch
unity-tech-cn:/hh-develop-water-balloon-fight
unity-tech-cn:/gc-hyper
unity-tech-cn:/layernorm
unity-tech-cn:/yamato-linux-debug-venv
unity-tech-cn:/soccer-comms
unity-tech-cn:/hh-develop-pushblockcollab
unity-tech-cn:/release_12_branch
unity-tech-cn:/fix-get-step-sp-curr
unity-tech-cn:/continuous-comms
unity-tech-cn:/no-comms
unity-tech-cn:/hh-develop-zombiepushblock
unity-tech-cn:/hypernetwork
unity-tech-cn:/revert-4859-develop-update-readme
unity-tech-cn:/sequencer-env-attention
unity-tech-cn:/hh-develop-variableobs
unity-tech-cn:/exp-tanh
unity-tech-cn:/reward-dist
unity-tech-cn:/exp-weight-decay
unity-tech-cn:/exp-robot
unity-tech-cn:/bullet-hell-barracuda-test-1.3.1
unity-tech-cn:/release_13_branch
unity-tech-cn:/release_14_branch
unity-tech-cn:/exp-clipped-gaussian-entropy
unity-tech-cn:/tic-tac-toe
unity-tech-cn:/hh-develop-dodgeball
unity-tech-cn:/repro-vis-obs-perf
unity-tech-cn:/v2-staging-rebase
unity-tech-cn:/release_15_branch
unity-tech-cn:/release_15_removeendepisode
unity-tech-cn:/release_16_branch
unity-tech-cn:/release_16_fix_gridsensor
unity-tech-cn:/ai-hw-2021
unity-tech-cn:/check-for-ModelOverriders
unity-tech-cn:/fix-grid-obs-shape-init
unity-tech-cn:/fix-gym-needs-reset
unity-tech-cn:/fix-resume-imi
unity-tech-cn:/release_17_branch
unity-tech-cn:/release_17_branch_gpu_test
unity-tech-cn:/colab-links
unity-tech-cn:/exp-continuous-div
unity-tech-cn:/release_17_branch_gpu_2
unity-tech-cn:/exp-diverse-behavior
unity-tech-cn:/grid-onehot-extra-dim-empty
unity-tech-cn:/2.0-verified
unity-tech-cn:/faster-entropy-coeficient-convergence
unity-tech-cn:/pre-r18-update-changelog
unity-tech-cn:/release_18_branch
unity-tech-cn:/main/tracking
unity-tech-cn:/main/reward-providers
unity-tech-cn:/main/project-upgrade
unity-tech-cn:/main/limitation-docs
unity-tech-cn:/develop/nomaxstep-test
unity-tech-cn:/develop/tf2.0
unity-tech-cn:/develop/tanhsquash
unity-tech-cn:/develop/magic-string
unity-tech-cn:/develop/trainerinterface
unity-tech-cn:/develop/separatevalue
unity-tech-cn:/develop/nopreviousactions
unity-tech-cn:/develop/reenablerepeatactions
unity-tech-cn:/develop/0memories
unity-tech-cn:/develop/fixmemoryleak
unity-tech-cn:/develop/reducewalljump
unity-tech-cn:/develop/removeactionholder-onehot
unity-tech-cn:/develop/canonicalize-quaternions
unity-tech-cn:/develop/self-playassym
unity-tech-cn:/develop/demo-load-seek
unity-tech-cn:/develop/progress-bar
unity-tech-cn:/develop/sac-apex
unity-tech-cn:/develop/cubewars
unity-tech-cn:/develop/add-fire
unity-tech-cn:/develop/gym-wrapper
unity-tech-cn:/develop/mm-docs-main-readme
unity-tech-cn:/develop/mm-docs-overview
unity-tech-cn:/develop/no-threading
unity-tech-cn:/develop/dockerfile
unity-tech-cn:/develop/model-store
unity-tech-cn:/develop/checkout-conversion-rebase
unity-tech-cn:/develop/model-transfer
unity-tech-cn:/develop/bisim-review
unity-tech-cn:/develop/taggedobservations
unity-tech-cn:/develop/transfer-bisim
unity-tech-cn:/develop/bisim-sac-transfer
unity-tech-cn:/develop/basketball
unity-tech-cn:/develop/torchmodules
unity-tech-cn:/develop/fixmarkdown
unity-tech-cn:/develop/shortenstrikervsgoalie
unity-tech-cn:/develop/shortengoalie
unity-tech-cn:/develop/torch-save-rp
unity-tech-cn:/develop/torch-to-np
unity-tech-cn:/develop/torch-omp-no-thread
unity-tech-cn:/develop/actionmodel-csharp
unity-tech-cn:/develop/torch-extra
unity-tech-cn:/develop/restructure-torch-networks
unity-tech-cn:/develop/jit
unity-tech-cn:/develop/adjust-cpu-settings-experiment
unity-tech-cn:/develop/torch-sac-threading
unity-tech-cn:/develop/wb
unity-tech-cn:/develop/amrl
unity-tech-cn:/develop/memorydump
unity-tech-cn:/develop/permutepytorch
unity-tech-cn:/develop/sac-targetq
unity-tech-cn:/develop/actions-out
unity-tech-cn:/develop/reshapeonnxmemories
unity-tech-cn:/develop/crawlergail
unity-tech-cn:/develop/debugtorchfood
unity-tech-cn:/develop/hybrid-actions
unity-tech-cn:/develop/bullet-hell
unity-tech-cn:/develop/action-spec-gym
unity-tech-cn:/develop/battlefoodcollector
unity-tech-cn:/develop/use-action-buffers
unity-tech-cn:/develop/hardswish
unity-tech-cn:/develop/leakyrelu
unity-tech-cn:/develop/torch-clip-scale
unity-tech-cn:/develop/contentropy
unity-tech-cn:/develop/manch
unity-tech-cn:/develop/torchcrawlerdebug
unity-tech-cn:/develop/fix-nan
unity-tech-cn:/develop/multitype-buffer
unity-tech-cn:/develop/windows-delay
unity-tech-cn:/develop/torch-tanh
unity-tech-cn:/develop/gail-norm
unity-tech-cn:/develop/multiprocess
unity-tech-cn:/develop/unified-obs
unity-tech-cn:/develop/rm-rf-new-models
unity-tech-cn:/develop/skipcritic
unity-tech-cn:/develop/centralizedcritic
unity-tech-cn:/develop/dodgeball-tests
unity-tech-cn:/develop/cc-teammanager
unity-tech-cn:/develop/weight-decay
unity-tech-cn:/develop/singular-embeddings
unity-tech-cn:/develop/zombieteammanager
unity-tech-cn:/develop/superpush
unity-tech-cn:/develop/teammanager
unity-tech-cn:/develop/zombie-exp
unity-tech-cn:/develop/update-readme
unity-tech-cn:/develop/readme-fix
unity-tech-cn:/develop/coma-noact
unity-tech-cn:/develop/coma-withq
unity-tech-cn:/develop/coma2
unity-tech-cn:/develop/action-slice
unity-tech-cn:/develop/gru
unity-tech-cn:/develop/critic-op-lstm-currentmem
unity-tech-cn:/develop/decaygail
unity-tech-cn:/develop/gail-srl-hack
unity-tech-cn:/develop/rear-pad
unity-tech-cn:/develop/mm-copyright-dates
unity-tech-cn:/develop/dodgeball-raycasts
unity-tech-cn:/develop/collab-envs-exp-ervin
unity-tech-cn:/develop/pushcollabonly
unity-tech-cn:/develop/sample-curation
unity-tech-cn:/develop/soccer-groupman
unity-tech-cn:/develop/input-actuator-tanks
unity-tech-cn:/develop/validate-release-fix
unity-tech-cn:/develop/new-console-log
unity-tech-cn:/develop/lex-walker-model
unity-tech-cn:/develop/lstm-burnin
unity-tech-cn:/develop/grid-vaiable-names
unity-tech-cn:/develop/fix-attn-embedding
unity-tech-cn:/develop/api-documentation-update-some-fixes
unity-tech-cn:/develop/update-grpc
unity-tech-cn:/develop/grid-rootref-debug
unity-tech-cn:/develop/pbcollab-rays
unity-tech-cn:/develop/2.0-verified-pre
unity-tech-cn:/develop/parameterizedenvs
unity-tech-cn:/develop/custom-ray-sensor
unity-tech-cn:/develop/mm-add-v2blog
unity-tech-cn:/develop/custom-raycast
unity-tech-cn:/develop/area-manager
unity-tech-cn:/develop/remove-unecessary-lr
unity-tech-cn:/develop/use-base-env-in-learn
unity-tech-cn:/soccer-fives/multiagent
unity-tech-cn:/develop/cubewars/splashdamage
unity-tech-cn:/develop/add-fire/exp
unity-tech-cn:/develop/add-fire/jit
unity-tech-cn:/develop/add-fire/speedtest
unity-tech-cn:/develop/add-fire/bc
unity-tech-cn:/develop/add-fire/ckpt-2
unity-tech-cn:/develop/add-fire/normalize-context
unity-tech-cn:/develop/add-fire/components-dir
unity-tech-cn:/develop/add-fire/halfentropy
unity-tech-cn:/develop/add-fire/memoryclass
unity-tech-cn:/develop/add-fire/categoricaldist
unity-tech-cn:/develop/add-fire/mm
unity-tech-cn:/develop/add-fire/sac-lst
unity-tech-cn:/develop/add-fire/mm3
unity-tech-cn:/develop/add-fire/continuous
unity-tech-cn:/develop/add-fire/ghost
unity-tech-cn:/develop/add-fire/policy-tests
unity-tech-cn:/develop/add-fire/export-discrete
unity-tech-cn:/develop/add-fire/test-simple-rl-fix-resnet
unity-tech-cn:/develop/add-fire/remove-currdoc
unity-tech-cn:/develop/add-fire/clean2
unity-tech-cn:/develop/add-fire/doc-cleanups
unity-tech-cn:/develop/add-fire/changelog
unity-tech-cn:/develop/add-fire/mm2
unity-tech-cn:/develop/model-transfer/add-physics
unity-tech-cn:/develop/model-transfer/train
unity-tech-cn:/develop/jit/experiments
unity-tech-cn:/exp-vince/sep30-2020
unity-tech-cn:/hh-develop-gridsensor-tests/static
unity-tech-cn:/develop/hybrid-actions/distlist
unity-tech-cn:/develop/bullet-hell/buffer
unity-tech-cn:/goal-conditioning/new
unity-tech-cn:/goal-conditioning/sensors-2
unity-tech-cn:/goal-conditioning/sensors-3-pytest-fix
unity-tech-cn:/goal-conditioning/grid-world
unity-tech-cn:/soccer-comms/disc
unity-tech-cn:/develop/centralizedcritic/counterfact
unity-tech-cn:/develop/centralizedcritic/mm
unity-tech-cn:/develop/centralizedcritic/nonego
unity-tech-cn:/develop/zombieteammanager/disableagent
unity-tech-cn:/develop/zombieteammanager/killfirst
unity-tech-cn:/develop/superpush/int
unity-tech-cn:/develop/superpush/branch-cleanup
unity-tech-cn:/develop/teammanager/int
unity-tech-cn:/develop/teammanager/cubewar-nocycle
unity-tech-cn:/develop/teammanager/cubewars
unity-tech-cn:/develop/superpush/int/hunter
unity-tech-cn:/goal-conditioning/new/allo-crawler
unity-tech-cn:/develop/coma2/clip
unity-tech-cn:/develop/coma2/singlenetwork
unity-tech-cn:/develop/coma2/samenet
unity-tech-cn:/develop/coma2/fixgroup
unity-tech-cn:/develop/coma2/samenet/sum
unity-tech-cn:/hh-develop-dodgeball/goy-input
unity-tech-cn:/develop/soccer-groupman/mod
unity-tech-cn:/develop/soccer-groupman/mod/hunter
unity-tech-cn:/develop/soccer-groupman/mod/hunter/cine
unity-tech-cn:/ai-hw-2021/tensor-applier
拉取从: unity-tech-cn:ai-hw-2021/tensor-applier
unity-tech-cn:/main
unity-tech-cn:/develop-generalizationTraining-TrainerController
unity-tech-cn:/tag-0.2.0
unity-tech-cn:/tag-0.2.1
unity-tech-cn:/tag-0.2.1a
unity-tech-cn:/tag-0.2.1c
unity-tech-cn:/tag-0.2.1d
unity-tech-cn:/hotfix-v0.9.2a
unity-tech-cn:/develop-gpu-test
unity-tech-cn:/0.10.1
unity-tech-cn:/develop-pyinstaller
unity-tech-cn:/develop-horovod
unity-tech-cn:/PhysXArticulations20201
unity-tech-cn:/importdocfix
unity-tech-cn:/develop-resizetexture
unity-tech-cn:/hh-develop-walljump_bugfixes
unity-tech-cn:/develop-walljump-fix-sac
unity-tech-cn:/hh-develop-walljump_rnd
unity-tech-cn:/tag-0.11.0.dev0
unity-tech-cn:/develop-pytorch
unity-tech-cn:/tag-0.11.0.dev2
unity-tech-cn:/develop-newnormalization
unity-tech-cn:/tag-0.11.0.dev3
unity-tech-cn:/develop
unity-tech-cn:/release-0.12.0
unity-tech-cn:/tag-0.12.0-dev
unity-tech-cn:/tag-0.12.0.dev0
unity-tech-cn:/tag-0.12.1
unity-tech-cn:/2D-explorations
unity-tech-cn:/asymm-envs
unity-tech-cn:/tag-0.12.1.dev0
unity-tech-cn:/2D-exploration-raycast
unity-tech-cn:/tag-0.12.1.dev1
unity-tech-cn:/release-0.13.0
unity-tech-cn:/release-0.13.1
unity-tech-cn:/plugin-proof-of-concept
unity-tech-cn:/release-0.14.0
unity-tech-cn:/hotfix-bump-version-master
unity-tech-cn:/soccer-fives
unity-tech-cn:/release-0.14.1
unity-tech-cn:/bug-failed-api-check
unity-tech-cn:/test-recurrent-gail
unity-tech-cn:/hh-add-icons
unity-tech-cn:/release-0.15.0
unity-tech-cn:/release-0.15.1
unity-tech-cn:/hh-develop-all-posed-characters
unity-tech-cn:/internal-policy-ghost
unity-tech-cn:/distributed-training
unity-tech-cn:/hh-develop-improve_tennis
unity-tech-cn:/test-tf-ver
unity-tech-cn:/release_1_branch
unity-tech-cn:/tennis-time-horizon
unity-tech-cn:/whitepaper-experiments
unity-tech-cn:/r2v-yamato-linux
unity-tech-cn:/docs-update
unity-tech-cn:/release_2_branch
unity-tech-cn:/exp-mede
unity-tech-cn:/sensitivity
unity-tech-cn:/release_2_verified_load_fix
unity-tech-cn:/test-sampler
unity-tech-cn:/release_2_verified
unity-tech-cn:/hh-develop-ragdoll-testing
unity-tech-cn:/origin-develop-taggedobservations
unity-tech-cn:/MLA-1734-demo-provider
unity-tech-cn:/sampler-refactor-copy
unity-tech-cn:/PhysXArticulations20201Package
unity-tech-cn:/tag-com.unity.ml-agents_1.0.8
unity-tech-cn:/release_3_branch
unity-tech-cn:/github-actions
unity-tech-cn:/release_3_distributed
unity-tech-cn:/fix-batch-tennis
unity-tech-cn:/distributed-ppo-sac
unity-tech-cn:/gridworld-custom-obs
unity-tech-cn:/hw20-segmentation
unity-tech-cn:/hh-develop-gamedev-demo
unity-tech-cn:/active-variablespeed
unity-tech-cn:/release_4_branch
unity-tech-cn:/fix-env-step-loop
unity-tech-cn:/release_5_branch
unity-tech-cn:/fix-walker
unity-tech-cn:/release_6_branch
unity-tech-cn:/hh-32-observation-crawler
unity-tech-cn:/trainer-plugin
unity-tech-cn:/hh-develop-max-steps-demo-recorder
unity-tech-cn:/hh-develop-loco-walker-variable-speed
unity-tech-cn:/exp-0002
unity-tech-cn:/experiment-less-max-step
unity-tech-cn:/hh-develop-hallway-wall-mesh-fix
unity-tech-cn:/release_7_branch
unity-tech-cn:/exp-vince
unity-tech-cn:/hh-develop-gridsensor-tests
unity-tech-cn:/tag-release_8_test0
unity-tech-cn:/tag-release_8_test1
unity-tech-cn:/release_8_branch
unity-tech-cn:/docfix-end-episode
unity-tech-cn:/release_9_branch
unity-tech-cn:/hybrid-action-rewardsignals
unity-tech-cn:/MLA-462-yamato-win
unity-tech-cn:/exp-alternate-atten
unity-tech-cn:/hh-develop-fps_game_project
unity-tech-cn:/fix-conflict-base-env
unity-tech-cn:/release_10_branch
unity-tech-cn:/exp-bullet-hell-trainer
unity-tech-cn:/ai-summit-exp
unity-tech-cn:/comms-grad
unity-tech-cn:/walljump-pushblock
unity-tech-cn:/goal-conditioning
unity-tech-cn:/release_11_branch
unity-tech-cn:/hh-develop-water-balloon-fight
unity-tech-cn:/gc-hyper
unity-tech-cn:/layernorm
unity-tech-cn:/yamato-linux-debug-venv
unity-tech-cn:/soccer-comms
unity-tech-cn:/hh-develop-pushblockcollab
unity-tech-cn:/release_12_branch
unity-tech-cn:/fix-get-step-sp-curr
unity-tech-cn:/continuous-comms
unity-tech-cn:/no-comms
unity-tech-cn:/hh-develop-zombiepushblock
unity-tech-cn:/hypernetwork
unity-tech-cn:/revert-4859-develop-update-readme
unity-tech-cn:/sequencer-env-attention
unity-tech-cn:/hh-develop-variableobs
unity-tech-cn:/exp-tanh
unity-tech-cn:/reward-dist
unity-tech-cn:/exp-weight-decay
unity-tech-cn:/exp-robot
unity-tech-cn:/bullet-hell-barracuda-test-1.3.1
unity-tech-cn:/release_13_branch
unity-tech-cn:/release_14_branch
unity-tech-cn:/exp-clipped-gaussian-entropy
unity-tech-cn:/tic-tac-toe
unity-tech-cn:/hh-develop-dodgeball
unity-tech-cn:/repro-vis-obs-perf
unity-tech-cn:/v2-staging-rebase
unity-tech-cn:/release_15_branch
unity-tech-cn:/release_15_removeendepisode
unity-tech-cn:/release_16_branch
unity-tech-cn:/release_16_fix_gridsensor
unity-tech-cn:/ai-hw-2021
unity-tech-cn:/check-for-ModelOverriders
unity-tech-cn:/fix-grid-obs-shape-init
unity-tech-cn:/fix-gym-needs-reset
unity-tech-cn:/fix-resume-imi
unity-tech-cn:/release_17_branch
unity-tech-cn:/release_17_branch_gpu_test
unity-tech-cn:/colab-links
unity-tech-cn:/exp-continuous-div
unity-tech-cn:/release_17_branch_gpu_2
unity-tech-cn:/exp-diverse-behavior
unity-tech-cn:/grid-onehot-extra-dim-empty
unity-tech-cn:/2.0-verified
unity-tech-cn:/faster-entropy-coeficient-convergence
unity-tech-cn:/pre-r18-update-changelog
unity-tech-cn:/release_18_branch
unity-tech-cn:/main/tracking
unity-tech-cn:/main/reward-providers
unity-tech-cn:/main/project-upgrade
unity-tech-cn:/main/limitation-docs
unity-tech-cn:/develop/nomaxstep-test
unity-tech-cn:/develop/tf2.0
unity-tech-cn:/develop/tanhsquash
unity-tech-cn:/develop/magic-string
unity-tech-cn:/develop/trainerinterface
unity-tech-cn:/develop/separatevalue
unity-tech-cn:/develop/nopreviousactions
unity-tech-cn:/develop/reenablerepeatactions
unity-tech-cn:/develop/0memories
unity-tech-cn:/develop/fixmemoryleak
unity-tech-cn:/develop/reducewalljump
unity-tech-cn:/develop/removeactionholder-onehot
unity-tech-cn:/develop/canonicalize-quaternions
unity-tech-cn:/develop/self-playassym
unity-tech-cn:/develop/demo-load-seek
unity-tech-cn:/develop/progress-bar
unity-tech-cn:/develop/sac-apex
unity-tech-cn:/develop/cubewars
unity-tech-cn:/develop/add-fire
unity-tech-cn:/develop/gym-wrapper
unity-tech-cn:/develop/mm-docs-main-readme
unity-tech-cn:/develop/mm-docs-overview
unity-tech-cn:/develop/no-threading
unity-tech-cn:/develop/dockerfile
unity-tech-cn:/develop/model-store
unity-tech-cn:/develop/checkout-conversion-rebase
unity-tech-cn:/develop/model-transfer
unity-tech-cn:/develop/bisim-review
unity-tech-cn:/develop/taggedobservations
unity-tech-cn:/develop/transfer-bisim
unity-tech-cn:/develop/bisim-sac-transfer
unity-tech-cn:/develop/basketball
unity-tech-cn:/develop/torchmodules
unity-tech-cn:/develop/fixmarkdown
unity-tech-cn:/develop/shortenstrikervsgoalie
unity-tech-cn:/develop/shortengoalie
unity-tech-cn:/develop/torch-save-rp
unity-tech-cn:/develop/torch-to-np
unity-tech-cn:/develop/torch-omp-no-thread
unity-tech-cn:/develop/actionmodel-csharp
unity-tech-cn:/develop/torch-extra
unity-tech-cn:/develop/restructure-torch-networks
unity-tech-cn:/develop/jit
unity-tech-cn:/develop/adjust-cpu-settings-experiment
unity-tech-cn:/develop/torch-sac-threading
unity-tech-cn:/develop/wb
unity-tech-cn:/develop/amrl
unity-tech-cn:/develop/memorydump
unity-tech-cn:/develop/permutepytorch
unity-tech-cn:/develop/sac-targetq
unity-tech-cn:/develop/actions-out
unity-tech-cn:/develop/reshapeonnxmemories
unity-tech-cn:/develop/crawlergail
unity-tech-cn:/develop/debugtorchfood
unity-tech-cn:/develop/hybrid-actions
unity-tech-cn:/develop/bullet-hell
unity-tech-cn:/develop/action-spec-gym
unity-tech-cn:/develop/battlefoodcollector
unity-tech-cn:/develop/use-action-buffers
unity-tech-cn:/develop/hardswish
unity-tech-cn:/develop/leakyrelu
unity-tech-cn:/develop/torch-clip-scale
unity-tech-cn:/develop/contentropy
unity-tech-cn:/develop/manch
unity-tech-cn:/develop/torchcrawlerdebug
unity-tech-cn:/develop/fix-nan
unity-tech-cn:/develop/multitype-buffer
unity-tech-cn:/develop/windows-delay
unity-tech-cn:/develop/torch-tanh
unity-tech-cn:/develop/gail-norm
unity-tech-cn:/develop/multiprocess
unity-tech-cn:/develop/unified-obs
unity-tech-cn:/develop/rm-rf-new-models
unity-tech-cn:/develop/skipcritic
unity-tech-cn:/develop/centralizedcritic
unity-tech-cn:/develop/dodgeball-tests
unity-tech-cn:/develop/cc-teammanager
unity-tech-cn:/develop/weight-decay
unity-tech-cn:/develop/singular-embeddings
unity-tech-cn:/develop/zombieteammanager
unity-tech-cn:/develop/superpush
unity-tech-cn:/develop/teammanager
unity-tech-cn:/develop/zombie-exp
unity-tech-cn:/develop/update-readme
unity-tech-cn:/develop/readme-fix
unity-tech-cn:/develop/coma-noact
unity-tech-cn:/develop/coma-withq
unity-tech-cn:/develop/coma2
unity-tech-cn:/develop/action-slice
unity-tech-cn:/develop/gru
unity-tech-cn:/develop/critic-op-lstm-currentmem
unity-tech-cn:/develop/decaygail
unity-tech-cn:/develop/gail-srl-hack
unity-tech-cn:/develop/rear-pad
unity-tech-cn:/develop/mm-copyright-dates
unity-tech-cn:/develop/dodgeball-raycasts
unity-tech-cn:/develop/collab-envs-exp-ervin
unity-tech-cn:/develop/pushcollabonly
unity-tech-cn:/develop/sample-curation
unity-tech-cn:/develop/soccer-groupman
unity-tech-cn:/develop/input-actuator-tanks
unity-tech-cn:/develop/validate-release-fix
unity-tech-cn:/develop/new-console-log
unity-tech-cn:/develop/lex-walker-model
unity-tech-cn:/develop/lstm-burnin
unity-tech-cn:/develop/grid-vaiable-names
unity-tech-cn:/develop/fix-attn-embedding
unity-tech-cn:/develop/api-documentation-update-some-fixes
unity-tech-cn:/develop/update-grpc
unity-tech-cn:/develop/grid-rootref-debug
unity-tech-cn:/develop/pbcollab-rays
unity-tech-cn:/develop/2.0-verified-pre
unity-tech-cn:/develop/parameterizedenvs
unity-tech-cn:/develop/custom-ray-sensor
unity-tech-cn:/develop/mm-add-v2blog
unity-tech-cn:/develop/custom-raycast
unity-tech-cn:/develop/area-manager
unity-tech-cn:/develop/remove-unecessary-lr
unity-tech-cn:/develop/use-base-env-in-learn
unity-tech-cn:/soccer-fives/multiagent
unity-tech-cn:/develop/cubewars/splashdamage
unity-tech-cn:/develop/add-fire/exp
unity-tech-cn:/develop/add-fire/jit
unity-tech-cn:/develop/add-fire/speedtest
unity-tech-cn:/develop/add-fire/bc
unity-tech-cn:/develop/add-fire/ckpt-2
unity-tech-cn:/develop/add-fire/normalize-context
unity-tech-cn:/develop/add-fire/components-dir
unity-tech-cn:/develop/add-fire/halfentropy
unity-tech-cn:/develop/add-fire/memoryclass
unity-tech-cn:/develop/add-fire/categoricaldist
unity-tech-cn:/develop/add-fire/mm
unity-tech-cn:/develop/add-fire/sac-lst
unity-tech-cn:/develop/add-fire/mm3
unity-tech-cn:/develop/add-fire/continuous
unity-tech-cn:/develop/add-fire/ghost
unity-tech-cn:/develop/add-fire/policy-tests
unity-tech-cn:/develop/add-fire/export-discrete
unity-tech-cn:/develop/add-fire/test-simple-rl-fix-resnet
unity-tech-cn:/develop/add-fire/remove-currdoc
unity-tech-cn:/develop/add-fire/clean2
unity-tech-cn:/develop/add-fire/doc-cleanups
unity-tech-cn:/develop/add-fire/changelog
unity-tech-cn:/develop/add-fire/mm2
unity-tech-cn:/develop/model-transfer/add-physics
unity-tech-cn:/develop/model-transfer/train
unity-tech-cn:/develop/jit/experiments
unity-tech-cn:/exp-vince/sep30-2020
unity-tech-cn:/hh-develop-gridsensor-tests/static
unity-tech-cn:/develop/hybrid-actions/distlist
unity-tech-cn:/develop/bullet-hell/buffer
unity-tech-cn:/goal-conditioning/new
unity-tech-cn:/goal-conditioning/sensors-2
unity-tech-cn:/goal-conditioning/sensors-3-pytest-fix
unity-tech-cn:/goal-conditioning/grid-world
unity-tech-cn:/soccer-comms/disc
unity-tech-cn:/develop/centralizedcritic/counterfact
unity-tech-cn:/develop/centralizedcritic/mm
unity-tech-cn:/develop/centralizedcritic/nonego
unity-tech-cn:/develop/zombieteammanager/disableagent
unity-tech-cn:/develop/zombieteammanager/killfirst
unity-tech-cn:/develop/superpush/int
unity-tech-cn:/develop/superpush/branch-cleanup
unity-tech-cn:/develop/teammanager/int
unity-tech-cn:/develop/teammanager/cubewar-nocycle
unity-tech-cn:/develop/teammanager/cubewars
unity-tech-cn:/develop/superpush/int/hunter
unity-tech-cn:/goal-conditioning/new/allo-crawler
unity-tech-cn:/develop/coma2/clip
unity-tech-cn:/develop/coma2/singlenetwork
unity-tech-cn:/develop/coma2/samenet
unity-tech-cn:/develop/coma2/fixgroup
unity-tech-cn:/develop/coma2/samenet/sum
unity-tech-cn:/hh-develop-dodgeball/goy-input
unity-tech-cn:/develop/soccer-groupman/mod
unity-tech-cn:/develop/soccer-groupman/mod/hunter
unity-tech-cn:/develop/soccer-groupman/mod/hunter/cine
unity-tech-cn:/ai-hw-2021/tensor-applier
此合并请求有变更与目标分支冲突。
/Project/Packages/manifest.json
/Project/ProjectSettings/ProjectVersion.txt
/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
/com.unity.ml-agents/Runtime/Academy.cs
/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
1 次代码提交
作者 | SHA1 | 备注 | 提交日期 |
---|---|---|---|
vincentpierre | a9ca4a7d | Moving the tensor applier arround | 4 年前 |
共有 23 个文件被更改,包括 1106 次插入 和 35 次删除
-
9Project/Packages/manifest.json
-
4Project/ProjectSettings/ProjectVersion.txt
-
41Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
-
14com.unity.ml-agents/Runtime/Academy.cs
-
18com.unity.ml-agents/Runtime/Inference/TensorProxy.cs
-
3com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
-
1com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
-
21com.unity.ml-agents/Runtime/Inference/TensorNames.cs
-
1com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
-
59com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
-
9com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
-
11com.unity.ml-agents/Runtime/ReplayBuffer.cs.meta
-
11com.unity.ml-agents/Runtime/Trainer.cs.meta
-
87com.unity.ml-agents/Runtime/ReplayBuffer.cs
-
79com.unity.ml-agents/Runtime/Trainer.cs
-
267com.unity.ml-agents/Runtime/Inference/TrainingTensorGenerator.cs
-
11com.unity.ml-agents/Runtime/Inference/TrainingTensorGenerator.cs.meta
-
130com.unity.ml-agents/Runtime/Inference/TrainingForwardTensorApplier.cs
-
11com.unity.ml-agents/Runtime/Inference/TrainingForwardTensorApplier.cs.meta
-
11com.unity.ml-agents/Runtime/Policies/TrainingModelRunner.cs.meta
-
84com.unity.ml-agents/Runtime/Policies/TrainingPolicy.cs
-
11com.unity.ml-agents/Runtime/Policies/TrainingPolicy.cs.meta
-
248com.unity.ml-agents/Runtime/Policies/TrainingModelRunner.cs
|
|||
m_EditorVersion: 2019.4.20f1 |
|||
m_EditorVersionWithRevision: 2019.4.20f1 (6dd1c08eedfa) |
|||
m_EditorVersion: 2020.3.0f1 |
|||
m_EditorVersionWithRevision: 2020.3.0f1 (c7b5465681fb) |
|
|||
fileFormatVersion: 2 |
|||
guid: be3c5834a200742ed983cd073dd69f9a |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: 8dd9e7f1621bd487998fd883b2518733 |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
// Buffer for C# training
|
|||
|
|||
using System; |
|||
using System.Linq; |
|||
using Unity.Barracuda; |
|||
using System.Collections.Generic; |
|||
using Unity.MLAgents.Actuators; |
|||
using Unity.MLAgents.Inference; |
|||
|
|||
namespace Unity.MLAgents |
|||
{ |
|||
internal struct Transition |
|||
{ |
|||
public IReadOnlyList<TensorProxy> state; |
|||
public ActionBuffers action; |
|||
public float reward; |
|||
public bool done; |
|||
public IReadOnlyList<TensorProxy> nextState; |
|||
} |
|||
|
|||
internal class ReplayBuffer |
|||
{ |
|||
List<Transition> m_Buffer; |
|||
int m_CurrentIndex; |
|||
int m_MaxSize; |
|||
|
|||
public ReplayBuffer(int maxSize) |
|||
{ |
|||
m_Buffer = new List<Transition>(); |
|||
m_Buffer.Capacity = maxSize; |
|||
m_MaxSize = maxSize; |
|||
} |
|||
|
|||
public int Count |
|||
{ |
|||
get => m_Buffer.Count; |
|||
} |
|||
|
|||
public void Push(AgentInfo info, IReadOnlyList<TensorProxy> state, IReadOnlyList<TensorProxy> nextState) |
|||
{ |
|||
if (m_Buffer.Count < m_MaxSize) |
|||
{ |
|||
m_Buffer.Add(new Transition() {state=state, action=info.storedActions, reward=info.reward, done=info.done, nextState=nextState}); |
|||
} |
|||
else |
|||
{ |
|||
m_Buffer[m_CurrentIndex] = new Transition() {state=state, action=info.storedActions, reward=info.reward, done=info.done, nextState=nextState}; |
|||
} |
|||
m_CurrentIndex += 1; |
|||
m_CurrentIndex = m_CurrentIndex % m_MaxSize; |
|||
} |
|||
|
|||
public List<Transition> SampleBatch(int batchSize) |
|||
{ |
|||
var indexList = SampleIndex(batchSize); |
|||
var samples = new List<Transition>(batchSize); |
|||
for (var i = 0; i < batchSize; i++) |
|||
{ |
|||
samples.Add(m_Buffer[indexList[i]]); |
|||
} |
|||
return samples; |
|||
} |
|||
|
|||
public List<Transition> SampleDummyBatch(int batchSize) |
|||
{ |
|||
var indexList = SampleIndex(batchSize); |
|||
var samples = new List<Transition>(batchSize); |
|||
for (var i = 0; i < batchSize; i++) |
|||
{ |
|||
samples.Add(m_Buffer[m_CurrentIndex-1]); |
|||
} |
|||
return samples; |
|||
} |
|||
|
|||
private List<int> SampleIndex(int batchSize) |
|||
{ |
|||
Random random = new Random(); |
|||
HashSet<int> index = new HashSet<int>(); |
|||
|
|||
while (index.Count < batchSize) |
|||
{ |
|||
index.Add(random.Next(m_Buffer.Count)); |
|||
} |
|||
return index.ToList(); |
|||
} |
|||
} |
|||
} |
|
|||
// Trainer for C# training. One trainer per behavior.
|
|||
|
|||
using System; |
|||
using Unity.MLAgents.Actuators; |
|||
using Unity.Barracuda; |
|||
using UnityEngine; |
|||
|
|||
namespace Unity.MLAgents |
|||
{ |
|||
internal class TrainerConfig |
|||
{ |
|||
public int bufferSize = 100; |
|||
public int batchSize = 4; |
|||
public float gamma = 0.99f; |
|||
public float learningRate = 0.0005f; |
|||
public int updateTargetFreq = 200; |
|||
} |
|||
|
|||
internal class Trainer: IDisposable |
|||
{ |
|||
ReplayBuffer m_Buffer; |
|||
TrainingModelRunner m_ModelRunner; |
|||
TrainingModelRunner m_TargetModelRunner; |
|||
string m_behaviorName; |
|||
TrainerConfig m_Config; |
|||
int m_TrainingStep; |
|||
|
|||
public Trainer(string behaviorName, ActionSpec actionSpec, NNModel model, int seed=0, TrainerConfig config=null) |
|||
{ |
|||
m_Config = config ?? new TrainerConfig(); |
|||
m_behaviorName = behaviorName; |
|||
m_Buffer = new ReplayBuffer(m_Config.bufferSize); |
|||
m_ModelRunner = new TrainingModelRunner(actionSpec, model, m_Buffer, m_Config, seed); |
|||
m_TargetModelRunner = new TrainingModelRunner(actionSpec, model, m_Buffer, m_Config, seed); |
|||
// copy weights from model to target model
|
|||
// m_TargetModelRunner.model.weights = m_ModelRunner.model.weights
|
|||
Academy.Instance.TrainerUpdate += Update; |
|||
} |
|||
|
|||
public string BehaviorName |
|||
{ |
|||
get => m_behaviorName; |
|||
} |
|||
|
|||
public ReplayBuffer Buffer |
|||
{ |
|||
get => m_Buffer; |
|||
} |
|||
|
|||
public TrainingModelRunner TrainerModelRunner |
|||
{ |
|||
get => m_ModelRunner; |
|||
} |
|||
|
|||
public void Dispose() |
|||
{ |
|||
Academy.Instance.TrainerUpdate -= Update; |
|||
} |
|||
|
|||
public void Update() |
|||
{ |
|||
if (m_Buffer.Count < m_Config.batchSize * 2) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
var samples = m_Buffer.SampleBatch(m_Config.batchSize); |
|||
m_ModelRunner.UpdateModel(samples); |
|||
|
|||
// Update target network
|
|||
if (m_TrainingStep % m_Config.updateTargetFreq == 0) |
|||
{ |
|||
// copy weights
|
|||
} |
|||
|
|||
m_TrainingStep += 1; |
|||
} |
|||
} |
|||
} |
|
|||
using System.Collections.Generic; |
|||
using Unity.Barracuda; |
|||
using Unity.MLAgents.Sensors; |
|||
using Unity.MLAgents; |
|||
using UnityEngine; |
|||
|
|||
namespace Unity.MLAgents.Inference |
|||
{ |
|||
|
|||
internal class TrainingTensorGenerator |
|||
{ |
|||
public interface ITrainingGenerator |
|||
{ |
|||
void Generate( |
|||
TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState); |
|||
} |
|||
|
|||
readonly Dictionary<string, ITrainingGenerator> m_Dict = new Dictionary<string, ITrainingGenerator>(); |
|||
|
|||
|
|||
public TrainingTensorGenerator( |
|||
int seed, |
|||
ITensorAllocator allocator, |
|||
float learning_rate, |
|||
float gamma, |
|||
object barracudaModel = null |
|||
) |
|||
{ |
|||
// If model is null, no inference to run and exception is thrown before reaching here.
|
|||
if (barracudaModel == null) |
|||
{ |
|||
return; |
|||
} |
|||
var model = (Model)barracudaModel; |
|||
|
|||
// Generator for Inputs
|
|||
var obsGen = new CopyObservationTensorsGenerator(allocator); |
|||
obsGen.SetSensorIndex(0); |
|||
m_Dict[TensorNames.Observations] = obsGen; |
|||
var nextObsGen = new CopyNextObservationTensorsGenerator(allocator); |
|||
nextObsGen.SetSensorIndex(0); |
|||
m_Dict[TensorNames.NextObservations] = nextObsGen; |
|||
m_Dict[TensorNames.ActionInput] = new ActionInputGenerator(allocator); |
|||
m_Dict[TensorNames.RewardInput] = new RewardInputGenerator(allocator); |
|||
m_Dict[TensorNames.DoneInput] = new DoneInputGenerator(allocator); |
|||
m_Dict[TensorNames.LearningRate] = new ConstantGenerator(allocator,learning_rate); |
|||
m_Dict[TensorNames.Gamma] = new ConstantGenerator(allocator, gamma); |
|||
m_Dict[TensorNames.BatchSizePlaceholder] = new TrainingBatchSizeGenerator(allocator); |
|||
m_Dict[TensorNames.TrainingStateIn] = new TrainingStateGenerator(allocator); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Populates the data of the tensor inputs given the data contained in the current batch
|
|||
/// of agents.
|
|||
/// </summary>
|
|||
/// <param name="tensors"> Enumerable of tensors that will be modified.</param>
|
|||
/// <param name="currentBatchSize"> The number of agents present in the current batch
|
|||
/// </param>
|
|||
/// <param name="infos"> List of AgentsInfos and Sensors that contains the
|
|||
/// data that will be used to modify the tensors</param>
|
|||
/// <exception cref="UnityAgentsException"> One of the tensor does not have an
|
|||
/// associated generator.</exception>
|
|||
public void GenerateTensors( |
|||
IReadOnlyList<TensorProxy> tensors, int currentBatchSize, IList<Transition> transitions, TensorProxy trainingState, bool training=false) |
|||
{ |
|||
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++) |
|||
{ |
|||
var tensor = tensors[tensorIndex]; |
|||
if (!m_Dict.ContainsKey(tensor.name)) |
|||
{ |
|||
throw new UnityAgentsException( |
|||
$"Unknown tensorProxy expected as input : {tensor.name}"); |
|||
} |
|||
if ((tensor.name == TensorNames.Observations || tensor.name == TensorNames.BatchSizePlaceholder) && training == false) |
|||
{ |
|||
continue; |
|||
} |
|||
m_Dict[tensor.name].Generate(tensor, currentBatchSize, transitions, trainingState); |
|||
} |
|||
} |
|||
|
|||
public static void CopyTensorToBatch(TensorProxy source, TensorProxy target, int batchIndex) |
|||
{ |
|||
for (var i = 0; i < source.Height; i++) |
|||
{ |
|||
for (var j = 0; j < source.Width; j++) |
|||
{ |
|||
for(var k = 0; k < source.Channels; k++) |
|||
{ |
|||
target.data[batchIndex, i, j, k] = source.data[0, i, j, k]; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal class ActionInputGenerator: TrainingTensorGenerator.ITrainingGenerator |
|||
{ |
|||
readonly ITensorAllocator m_Allocator; |
|||
|
|||
public ActionInputGenerator(ITensorAllocator allocator) |
|||
{ |
|||
m_Allocator = allocator; |
|||
} |
|||
|
|||
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState) |
|||
{ |
|||
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); |
|||
for (var index = 0; index < batchSize; index++) |
|||
{ |
|||
var actions = transitions[index].action.DiscreteActions; |
|||
for (var j = 0; j < actions.Length; j++) |
|||
{ |
|||
tensorProxy.data[index, j] = actions[j]; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal class RewardInputGenerator: TrainingTensorGenerator.ITrainingGenerator |
|||
{ |
|||
readonly ITensorAllocator m_Allocator; |
|||
|
|||
public RewardInputGenerator(ITensorAllocator allocator) |
|||
{ |
|||
m_Allocator = allocator; |
|||
} |
|||
|
|||
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState) |
|||
{ |
|||
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); |
|||
for (var index = 0; index < batchSize; index++) |
|||
{ |
|||
tensorProxy.data[index, 0] = transitions[index].reward; |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal class DoneInputGenerator: TrainingTensorGenerator.ITrainingGenerator |
|||
{ |
|||
readonly ITensorAllocator m_Allocator; |
|||
|
|||
public DoneInputGenerator(ITensorAllocator allocator) |
|||
{ |
|||
m_Allocator = allocator; |
|||
} |
|||
|
|||
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState) |
|||
{ |
|||
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); |
|||
for (var index = 0; index < batchSize; index++) |
|||
{ |
|||
tensorProxy.data[index, 0] = transitions[index].done==true ? 1f : 0f; |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal class CopyObservationTensorsGenerator: TrainingTensorGenerator.ITrainingGenerator |
|||
{ |
|||
readonly ITensorAllocator m_Allocator; |
|||
|
|||
int m_SensorIndex; |
|||
|
|||
public CopyObservationTensorsGenerator(ITensorAllocator allocator) |
|||
{ |
|||
m_Allocator = allocator; |
|||
} |
|||
|
|||
public void SetSensorIndex(int index) |
|||
{ |
|||
m_SensorIndex = index; |
|||
} |
|||
|
|||
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState) |
|||
{ |
|||
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); |
|||
for (var index = 0; index < batchSize; index++) |
|||
{ |
|||
TrainingTensorGenerator.CopyTensorToBatch(transitions[index].state[m_SensorIndex], tensorProxy, index); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal class CopyNextObservationTensorsGenerator: TrainingTensorGenerator.ITrainingGenerator |
|||
{ |
|||
readonly ITensorAllocator m_Allocator; |
|||
|
|||
int m_SensorIndex; |
|||
|
|||
public CopyNextObservationTensorsGenerator(ITensorAllocator allocator) |
|||
{ |
|||
m_Allocator = allocator; |
|||
} |
|||
|
|||
public void SetSensorIndex(int index) |
|||
{ |
|||
m_SensorIndex = index; |
|||
} |
|||
|
|||
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState) |
|||
{ |
|||
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); |
|||
for (var index = 0; index < batchSize; index++) |
|||
{ |
|||
TrainingTensorGenerator.CopyTensorToBatch(transitions[index].nextState[m_SensorIndex], tensorProxy, index); |
|||
} |
|||
} |
|||
} |
|||
|
|||
internal class ConstantGenerator: TrainingTensorGenerator.ITrainingGenerator |
|||
{ |
|||
readonly ITensorAllocator m_Allocator; |
|||
float m_Const; |
|||
|
|||
public ConstantGenerator(ITensorAllocator allocator, float c) |
|||
{ |
|||
m_Allocator = allocator; |
|||
m_Const = c; |
|||
} |
|||
|
|||
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState) |
|||
{ |
|||
TensorUtils.ResizeTensor(tensorProxy, 1, m_Allocator); |
|||
for (var index = 0; index < batchSize; index++) |
|||
{ |
|||
tensorProxy.data?.Dispose(); |
|||
tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1)); |
|||
tensorProxy.data[0] = m_Const; |
|||
} |
|||
} |
|||
} |
|||
internal class TrainingBatchSizeGenerator : TrainingTensorGenerator.ITrainingGenerator |
|||
{ |
|||
readonly ITensorAllocator m_Allocator; |
|||
|
|||
public TrainingBatchSizeGenerator(ITensorAllocator allocator) |
|||
{ |
|||
m_Allocator = allocator; |
|||
} |
|||
|
|||
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState) |
|||
{ |
|||
tensorProxy.data?.Dispose(); |
|||
tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1)); |
|||
tensorProxy.data[0] = batchSize; |
|||
} |
|||
} |
|||
|
|||
internal class TrainingStateGenerator: TrainingTensorGenerator.ITrainingGenerator |
|||
{ |
|||
readonly ITensorAllocator m_Allocator; |
|||
|
|||
public TrainingStateGenerator(ITensorAllocator allocator) |
|||
{ |
|||
m_Allocator = allocator; |
|||
} |
|||
|
|||
public void Generate(TensorProxy tensorProxy, int batchSize, IList<Transition> transitions, TensorProxy trainingState) |
|||
{ |
|||
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); |
|||
for (var index = 0; index < batchSize; index++) |
|||
{ |
|||
TensorUtils.CopyTensor(trainingState, tensorProxy); |
|||
} |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: cca690e21a2fe49b49f636cd4e76e0b4 |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
using System.Collections.Generic; |
|||
using Unity.Barracuda; |
|||
using Unity.MLAgents.Actuators; |
|||
using System.Linq; |
|||
using Unity.MLAgents.Inference.Utils; |
|||
using UnityEngine; |
|||
|
|||
|
|||
|
|||
namespace Unity.MLAgents.Inference |
|||
{ |
|||
/// <summary>
|
|||
/// Mapping between the output tensor names and the method that will use the
|
|||
/// output tensors and the Agents present in the batch to update their action, memories and
|
|||
/// value estimates.
|
|||
/// A TensorApplier implements a Dictionary of strings (node names) to an Action.
|
|||
/// This action takes as input the tensor and the Dictionary of Agent to AgentInfo for
|
|||
/// the current batch.
|
|||
/// </summary>
|
|||
internal class TrainingForwardTensorApplier |
|||
{ |
|||
|
|||
readonly Dictionary<string, TensorApplier.IApplier> m_Dict = new Dictionary<string, TensorApplier.IApplier>(); |
|||
|
|||
/// <summary>
|
|||
/// Returns a new TensorAppliers object.
|
|||
/// </summary>
|
|||
/// <param name="actionSpec"> Description of the actions for the Agent.</param>
|
|||
/// <param name="seed"> The seed the Appliers will be initialized with.</param>
|
|||
/// <param name="allocator"> Tensor allocator</param>
|
|||
/// <param name="memories">Dictionary of AgentInfo.id to memory used to pass to the inference model.</param>
|
|||
/// <param name="barracudaModel"></param>
|
|||
public TrainingForwardTensorApplier( |
|||
ActionSpec actionSpec, |
|||
int seed, |
|||
ITensorAllocator allocator, |
|||
object barracudaModel = null) |
|||
{ |
|||
// If model is null, no inference to run and exception is thrown before reaching here.
|
|||
if (barracudaModel == null) |
|||
{ |
|||
return; |
|||
} |
|||
if (actionSpec.NumContinuousActions > 0) |
|||
{ |
|||
throw new System.Exception("Cannot do continuous actions"); |
|||
} |
|||
if (actionSpec.NumDiscreteActions != 1) |
|||
{ |
|||
throw new System.Exception("Cannot do multi discrete actions, only single discrete"); |
|||
} |
|||
|
|||
var model = (Model)barracudaModel; |
|||
|
|||
|
|||
m_Dict[TensorNames.TrainingOutput] = new MaxActionOutputApplier(actionSpec, seed, allocator); |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Updates the state of the agents based on the data present in the tensor.
|
|||
/// </summary>
|
|||
/// <param name="tensors"> Enumerable of tensors containing the data.</param>
|
|||
/// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
|
|||
/// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
|
|||
/// <exception cref="UnityAgentsException"> One of the tensor does not have an
|
|||
/// associated applier.</exception>
|
|||
public void ApplyTensors( |
|||
IReadOnlyList<TensorProxy> tensors, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
|||
{ |
|||
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++) |
|||
{ |
|||
var tensor = tensors[tensorIndex]; |
|||
if (!m_Dict.ContainsKey(tensor.name)) |
|||
{ |
|||
throw new UnityAgentsException( |
|||
$"Unknown tensorProxy expected as output : {tensor.name}"); |
|||
} |
|||
m_Dict[tensor.name].Apply(tensor, actionIds, lastActions); |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
internal class MaxActionOutputApplier : TensorApplier.IApplier |
|||
{ |
|||
readonly ActionSpec m_ActionSpec; |
|||
|
|||
|
|||
public MaxActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator) |
|||
{ |
|||
m_ActionSpec = actionSpec; |
|||
} |
|||
|
|||
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
|||
{ |
|||
var agentIndex = 0; |
|||
var actionSpaceSize = tensorProxy.shape[tensorProxy.shape.Length - 1]; |
|||
|
|||
for (var i = 0; i < actionIds.Count; i++) |
|||
{ |
|||
var agentId = actionIds[i]; |
|||
if (lastActions.ContainsKey(agentId)) |
|||
{ |
|||
var actionBuffer = lastActions[agentId]; |
|||
if (actionBuffer.IsEmpty()) |
|||
{ |
|||
actionBuffer = new ActionBuffers(m_ActionSpec); |
|||
lastActions[agentId] = actionBuffer; |
|||
} |
|||
var discreteBuffer = actionBuffer.DiscreteActions; |
|||
var maxIndex = 0; |
|||
var maxValue = 0; |
|||
for (var j = 0; j < actionSpaceSize; j++) |
|||
{ |
|||
var value = (int)tensorProxy.data[agentIndex, j]; |
|||
if (value > maxValue) |
|||
{ |
|||
maxIndex = j; |
|||
} |
|||
} |
|||
var actionSize = discreteBuffer.Length; |
|||
discreteBuffer[0] = maxIndex; |
|||
} |
|||
agentIndex++; |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: eaafcce9c7c794667bc726e40e420824 |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: 03ace8815cd804ee994a5068f618b845 |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
// Policy for C# training
|
|||
|
|||
using Unity.Barracuda; |
|||
using System.Collections.Generic; |
|||
using Unity.MLAgents.Actuators; |
|||
using Unity.MLAgents.Inference; |
|||
using Unity.MLAgents.Sensors; |
|||
|
|||
namespace Unity.MLAgents.Policies |
|||
{ |
|||
internal class TrainingPolicy : IPolicy |
|||
{ |
|||
protected TrainingModelRunner m_ModelRunner; |
|||
ActionBuffers m_LastActionBuffer; |
|||
|
|||
int m_AgentId; |
|||
|
|||
ActionSpec m_ActionSpec; |
|||
|
|||
string m_BehaviorName; |
|||
|
|||
AgentInfo m_LastInfo; |
|||
|
|||
IReadOnlyList<TensorProxy> m_LastObservations; |
|||
|
|||
ReplayBuffer m_buffer; |
|||
|
|||
IReadOnlyList<TensorProxy> m_CurrentObservations; |
|||
|
|||
/// <inheritdoc />
|
|||
public TrainingPolicy( |
|||
ActionSpec actionSpec, |
|||
string behaviorName, |
|||
NNModel model |
|||
) |
|||
{ |
|||
var trainer = Academy.Instance.GetOrCreateTrainer(behaviorName, actionSpec, model); |
|||
m_ModelRunner = trainer.TrainerModelRunner; |
|||
m_buffer = trainer.Buffer; |
|||
m_CurrentObservations = m_ModelRunner.GetInputTensors(); |
|||
m_BehaviorName = behaviorName; |
|||
m_ActionSpec = actionSpec; |
|||
} |
|||
|
|||
/// <inheritdoc />
|
|||
public void RequestDecision(AgentInfo info, List<ISensor> sensors) |
|||
{ |
|||
m_AgentId = info.episodeId; |
|||
m_ModelRunner.PutObservations(info, sensors); |
|||
m_ModelRunner.GetObservationTensors(m_CurrentObservations, info, sensors); |
|||
|
|||
if (m_LastObservations != null) |
|||
{ |
|||
m_buffer.Push(m_LastInfo, m_LastObservations, m_CurrentObservations); |
|||
} |
|||
else if (m_buffer.Count == 0) |
|||
{ |
|||
// hack
|
|||
m_buffer.Push(info, m_CurrentObservations, m_CurrentObservations); |
|||
} |
|||
|
|||
m_LastInfo = info; |
|||
m_LastObservations = m_CurrentObservations; |
|||
|
|||
if (info.done == true) |
|||
{ |
|||
m_buffer.Push(info, m_CurrentObservations, m_CurrentObservations); // dummy next_state
|
|||
m_LastObservations = null; |
|||
} |
|||
} |
|||
|
|||
/// <inheritdoc />
|
|||
public ref readonly ActionBuffers DecideAction() |
|||
{ |
|||
m_ModelRunner.DecideBatch(); |
|||
m_LastActionBuffer = m_ModelRunner.GetAction(m_AgentId); |
|||
return ref m_LastActionBuffer; |
|||
} |
|||
|
|||
public void Dispose() |
|||
{ |
|||
} |
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: 30a25b3276c294e5eb07b57fc1af4bdb |
|||
MonoImporter: |
|||
externalObjects: {} |
|||
serializedVersion: 2 |
|||
defaultReferences: [] |
|||
executionOrder: 0 |
|||
icon: {instanceID: 0} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
// ModelRunner for C# training.
|
|||
|
|||
using System.Collections.Generic; |
|||
using Unity.Barracuda; |
|||
using Unity.MLAgents; |
|||
using Unity.MLAgents.Actuators; |
|||
using Unity.MLAgents.Inference; |
|||
using Unity.MLAgents.Policies; |
|||
using Unity.MLAgents.Sensors; |
|||
using UnityEngine; |
|||
using Unity.MLAgents.Inference.Utils; |
|||
|
|||
namespace Unity.MLAgents |
|||
{ |
|||
internal class TrainingModelRunner |
|||
{ |
|||
List<AgentInfoSensorsPair> m_Infos = new List<AgentInfoSensorsPair>(); |
|||
Dictionary<int, ActionBuffers> m_LastActionsReceived = new Dictionary<int, ActionBuffers>(); |
|||
List<int> m_OrderedAgentsRequestingDecisions = new List<int>(); |
|||
TensorProxy m_TrainingState; |
|||
|
|||
ITensorAllocator m_TensorAllocator; |
|||
TensorGenerator m_TensorGenerator; |
|||
TrainingTensorGenerator m_TrainingTensorGenerator; |
|||
TrainingForwardTensorApplier m_TensorApplier; |
|||
|
|||
Model m_Model; |
|||
IWorker m_Engine; |
|||
bool m_Verbose = false; |
|||
string[] m_OutputNames; |
|||
IReadOnlyList<TensorProxy> m_TrainingInputs; |
|||
List<TensorProxy> m_TrainingOutputs; |
|||
Dictionary<string, Tensor> m_InputsByName; |
|||
Dictionary<int, List<float>> m_Memories = new Dictionary<int, List<float>>(); |
|||
|
|||
bool m_ObservationsInitialized; |
|||
bool m_TrainingObservationsInitialized; |
|||
|
|||
ReplayBuffer m_Buffer; |
|||
|
|||
/// <summary>
|
|||
/// Initializes the Brain with the Model that it will use when selecting actions for
|
|||
/// the agents
|
|||
/// </summary>
|
|||
/// <param name="model"> The Barracuda model to load </param>
|
|||
/// <param name="actionSpec"> Description of the actions for the Agent.</param>
|
|||
/// <param name="inferenceDevice"> Inference execution device. CPU is the fastest
|
|||
/// option for most of ML Agents models. </param>
|
|||
/// <param name="seed"> The seed that will be used to initialize the RandomNormal
|
|||
/// and Multinomial objects used when running inference.</param>
|
|||
/// <exception cref="UnityAgentsException">Throws an error when the model is null
|
|||
/// </exception>
|
|||
public TrainingModelRunner( |
|||
ActionSpec actionSpec, |
|||
NNModel model, |
|||
ReplayBuffer buffer, |
|||
TrainerConfig config, |
|||
int seed = 0) |
|||
{ |
|||
Model barracudaModel; |
|||
m_TensorAllocator = new TensorCachingAllocator(); |
|||
|
|||
// barracudaModel = Barracuda.SomeModelBuilder.CreateModel();
|
|||
barracudaModel = ModelLoader.Load(model); |
|||
m_Model = barracudaModel; |
|||
WorkerFactory.Type executionDevice = WorkerFactory.Type.CSharpBurst; |
|||
m_Engine = WorkerFactory.CreateWorker(executionDevice, barracudaModel, m_Verbose); |
|||
|
|||
m_TrainingInputs = barracudaModel.GetTrainingInputTensors(); |
|||
m_OutputNames = barracudaModel.GetOutputNames(); |
|||
InitializeTrainingState(barracudaModel); |
|||
m_TensorGenerator = new TensorGenerator( |
|||
seed, m_TensorAllocator, m_Memories, barracudaModel); |
|||
m_TrainingTensorGenerator = new TrainingTensorGenerator( |
|||
seed, m_TensorAllocator, config.learningRate, config.gamma, barracudaModel); |
|||
m_TensorApplier = new TrainingForwardTensorApplier( |
|||
actionSpec, seed, m_TensorAllocator, barracudaModel); |
|||
m_InputsByName = new Dictionary<string, Tensor>(); |
|||
m_TrainingOutputs = new List<TensorProxy>(); |
|||
m_Buffer = buffer; |
|||
} |
|||
|
|||
void InitializeTrainingState(Model barracudaModel) |
|||
{ |
|||
m_TrainingState = new TensorProxy |
|||
{ |
|||
data = barracudaModel.GetTensorByName(TensorNames.InitialTrainingState) |
|||
}; |
|||
} |
|||
|
|||
void PrepareBarracudaInputs(IReadOnlyList<TensorProxy> infInputs) |
|||
{ |
|||
m_InputsByName.Clear(); |
|||
for (var i = 0; i < infInputs.Count; i++) |
|||
{ |
|||
var inp = infInputs[i]; |
|||
m_InputsByName[inp.name] = inp.data; |
|||
} |
|||
} |
|||
|
|||
public void Dispose() |
|||
{ |
|||
if (m_Engine != null) |
|||
m_Engine.Dispose(); |
|||
m_TensorAllocator?.Reset(false); |
|||
} |
|||
|
|||
void FetchBarracudaOutputs(string[] names) |
|||
{ |
|||
m_TrainingOutputs.Clear(); |
|||
foreach (var n in names) |
|||
{ |
|||
var output = m_Engine.PeekOutput(n); |
|||
m_TrainingOutputs.Add(TensorUtils.TensorProxyFromBarracuda(output, n)); |
|||
} |
|||
} |
|||
|
|||
public void PutObservations(AgentInfo info, List<ISensor> sensors) |
|||
{ |
|||
m_Infos.Add(new AgentInfoSensorsPair |
|||
{ |
|||
agentInfo = info, |
|||
sensors = sensors |
|||
}); |
|||
|
|||
// We add the episodeId to this list to maintain the order in which the decisions were requested
|
|||
m_OrderedAgentsRequestingDecisions.Add(info.episodeId); |
|||
|
|||
if (!m_LastActionsReceived.ContainsKey(info.episodeId)) |
|||
{ |
|||
m_LastActionsReceived[info.episodeId] = ActionBuffers.Empty; |
|||
} |
|||
if (info.done) |
|||
{ |
|||
// If the agent is done, we remove the key from the last action dictionary since no action
|
|||
// should be taken.
|
|||
m_LastActionsReceived.Remove(info.episodeId); |
|||
} |
|||
} |
|||
|
|||
public void GetObservationTensors(IReadOnlyList<TensorProxy> tensors, AgentInfo info, List<ISensor> sensors) |
|||
{ |
|||
if (!m_ObservationsInitialized) |
|||
{ |
|||
// Just grab the first agent in the collection (any will suffice, really).
|
|||
// We check for an empty Collection above, so this will always return successfully.
|
|||
m_TensorGenerator.InitializeObservations(sensors, m_TensorAllocator); |
|||
m_ObservationsInitialized = true; |
|||
} |
|||
var infoSensorPair = new AgentInfoSensorsPair |
|||
{ |
|||
agentInfo = info, |
|||
sensors = sensors |
|||
}; |
|||
m_TensorGenerator.GenerateTensors(tensors, 1, new List<AgentInfoSensorsPair> { infoSensorPair }); |
|||
} |
|||
|
|||
public IReadOnlyList<TensorProxy> GetInputTensors() |
|||
{ |
|||
return m_Model.GetInputTensors(); |
|||
} |
|||
|
|||
public void DecideBatch() |
|||
{ |
|||
var currentBatchSize = m_Infos.Count; |
|||
if (currentBatchSize == 0) |
|||
{ |
|||
return; |
|||
} |
|||
if (!m_ObservationsInitialized) |
|||
{ |
|||
// Just grab the first agent in the collection (any will suffice, really).
|
|||
// We check for an empty Collection above, so this will always return successfully.
|
|||
var firstInfo = m_Infos[0]; |
|||
m_TensorGenerator.InitializeObservations(firstInfo.sensors, m_TensorAllocator); |
|||
m_ObservationsInitialized = true; |
|||
} |
|||
|
|||
// Prepare the input tensors to be feed into the engine
|
|||
m_TensorGenerator.GenerateTensors(m_TrainingInputs, currentBatchSize, m_Infos); |
|||
m_TrainingTensorGenerator.GenerateTensors(m_TrainingInputs, currentBatchSize, m_Buffer.SampleDummyBatch(currentBatchSize), m_TrainingState); |
|||
|
|||
PrepareBarracudaInputs(m_TrainingInputs); |
|||
|
|||
// Execute the Model
|
|||
m_Engine.Execute(m_InputsByName); |
|||
|
|||
FetchBarracudaOutputs(m_OutputNames); |
|||
|
|||
// Update the outputs
|
|||
m_TensorApplier.ApplyTensors(m_TrainingOutputs, m_OrderedAgentsRequestingDecisions, m_LastActionsReceived); |
|||
|
|||
m_Infos.Clear(); |
|||
|
|||
m_OrderedAgentsRequestingDecisions.Clear(); |
|||
} |
|||
|
|||
public void UpdateModel(List<Transition> transitions) |
|||
{ |
|||
var currentBatchSize = transitions.Count; |
|||
if (currentBatchSize == 0) |
|||
{ |
|||
return; |
|||
} |
|||
|
|||
m_TrainingTensorGenerator.GenerateTensors(m_TrainingInputs, currentBatchSize, transitions, m_TrainingState, true); |
|||
|
|||
PrepareBarracudaInputs(m_TrainingInputs); |
|||
|
|||
// Execute the Model
|
|||
m_Engine.Execute(m_InputsByName); |
|||
|
|||
// Update the model
|
|||
FetchBarracudaOutputs(new string[] { TensorNames.TrainingStateOut }); |
|||
m_TrainingState = m_TrainingOutputs[0]; |
|||
|
|||
} |
|||
|
|||
public ActionBuffers GetAction(int agentId) |
|||
{ |
|||
if (m_LastActionsReceived.ContainsKey(agentId)) |
|||
{ |
|||
return m_LastActionsReceived[agentId]; |
|||
} |
|||
return ActionBuffers.Empty; |
|||
} |
|||
|
|||
// void PrintTensor(TensorProxy tensor)
|
|||
// {
|
|||
// Debug.Log($"Print tensor {tensor.name}");
|
|||
// for (var b = 0; b < tensor.data.batch; b++)
|
|||
// {
|
|||
// var message = new List<float>();
|
|||
// for (var i = 0; i < tensor.data.height; i++)
|
|||
// {
|
|||
// for (var j = 0; j < tensor.data.width; j++)
|
|||
// {
|
|||
// for(var k = 0; k < tensor.data.channels; k++)
|
|||
// {
|
|||
// message.Add(tensor.data[b, i, j, k]);
|
|||
// }
|
|||
// }
|
|||
// }
|
|||
// Debug.Log(string.Join(", ", message));
|
|||
// }
|
|||
// }
|
|||
} |
|||
} |
撰写
预览
正在加载...
取消
保存
Reference in new issue