比较提交
合并到: unity-tech-cn:main
unity-tech-cn:/main
unity-tech-cn:/develop-generalizationTraining-TrainerController
unity-tech-cn:/tag-0.2.0
unity-tech-cn:/tag-0.2.1
unity-tech-cn:/tag-0.2.1a
unity-tech-cn:/tag-0.2.1c
unity-tech-cn:/tag-0.2.1d
unity-tech-cn:/hotfix-v0.9.2a
unity-tech-cn:/develop-gpu-test
unity-tech-cn:/0.10.1
unity-tech-cn:/develop-pyinstaller
unity-tech-cn:/develop-horovod
unity-tech-cn:/PhysXArticulations20201
unity-tech-cn:/importdocfix
unity-tech-cn:/develop-resizetexture
unity-tech-cn:/hh-develop-walljump_bugfixes
unity-tech-cn:/develop-walljump-fix-sac
unity-tech-cn:/hh-develop-walljump_rnd
unity-tech-cn:/tag-0.11.0.dev0
unity-tech-cn:/develop-pytorch
unity-tech-cn:/tag-0.11.0.dev2
unity-tech-cn:/develop-newnormalization
unity-tech-cn:/tag-0.11.0.dev3
unity-tech-cn:/develop
unity-tech-cn:/release-0.12.0
unity-tech-cn:/tag-0.12.0-dev
unity-tech-cn:/tag-0.12.0.dev0
unity-tech-cn:/tag-0.12.1
unity-tech-cn:/2D-explorations
unity-tech-cn:/asymm-envs
unity-tech-cn:/tag-0.12.1.dev0
unity-tech-cn:/2D-exploration-raycast
unity-tech-cn:/tag-0.12.1.dev1
unity-tech-cn:/release-0.13.0
unity-tech-cn:/release-0.13.1
unity-tech-cn:/plugin-proof-of-concept
unity-tech-cn:/release-0.14.0
unity-tech-cn:/hotfix-bump-version-master
unity-tech-cn:/soccer-fives
unity-tech-cn:/release-0.14.1
unity-tech-cn:/bug-failed-api-check
unity-tech-cn:/test-recurrent-gail
unity-tech-cn:/hh-add-icons
unity-tech-cn:/release-0.15.0
unity-tech-cn:/release-0.15.1
unity-tech-cn:/hh-develop-all-posed-characters
unity-tech-cn:/internal-policy-ghost
unity-tech-cn:/distributed-training
unity-tech-cn:/hh-develop-improve_tennis
unity-tech-cn:/test-tf-ver
unity-tech-cn:/release_1_branch
unity-tech-cn:/tennis-time-horizon
unity-tech-cn:/whitepaper-experiments
unity-tech-cn:/r2v-yamato-linux
unity-tech-cn:/docs-update
unity-tech-cn:/release_2_branch
unity-tech-cn:/exp-mede
unity-tech-cn:/sensitivity
unity-tech-cn:/release_2_verified_load_fix
unity-tech-cn:/test-sampler
unity-tech-cn:/release_2_verified
unity-tech-cn:/hh-develop-ragdoll-testing
unity-tech-cn:/origin-develop-taggedobservations
unity-tech-cn:/MLA-1734-demo-provider
unity-tech-cn:/sampler-refactor-copy
unity-tech-cn:/PhysXArticulations20201Package
unity-tech-cn:/tag-com.unity.ml-agents_1.0.8
unity-tech-cn:/release_3_branch
unity-tech-cn:/github-actions
unity-tech-cn:/release_3_distributed
unity-tech-cn:/fix-batch-tennis
unity-tech-cn:/distributed-ppo-sac
unity-tech-cn:/gridworld-custom-obs
unity-tech-cn:/hw20-segmentation
unity-tech-cn:/hh-develop-gamedev-demo
unity-tech-cn:/active-variablespeed
unity-tech-cn:/release_4_branch
unity-tech-cn:/fix-env-step-loop
unity-tech-cn:/release_5_branch
unity-tech-cn:/fix-walker
unity-tech-cn:/release_6_branch
unity-tech-cn:/hh-32-observation-crawler
unity-tech-cn:/trainer-plugin
unity-tech-cn:/hh-develop-max-steps-demo-recorder
unity-tech-cn:/hh-develop-loco-walker-variable-speed
unity-tech-cn:/exp-0002
unity-tech-cn:/experiment-less-max-step
unity-tech-cn:/hh-develop-hallway-wall-mesh-fix
unity-tech-cn:/release_7_branch
unity-tech-cn:/exp-vince
unity-tech-cn:/hh-develop-gridsensor-tests
unity-tech-cn:/tag-release_8_test0
unity-tech-cn:/tag-release_8_test1
unity-tech-cn:/release_8_branch
unity-tech-cn:/docfix-end-episode
unity-tech-cn:/release_9_branch
unity-tech-cn:/hybrid-action-rewardsignals
unity-tech-cn:/MLA-462-yamato-win
unity-tech-cn:/exp-alternate-atten
unity-tech-cn:/hh-develop-fps_game_project
unity-tech-cn:/fix-conflict-base-env
unity-tech-cn:/release_10_branch
unity-tech-cn:/exp-bullet-hell-trainer
unity-tech-cn:/ai-summit-exp
unity-tech-cn:/comms-grad
unity-tech-cn:/walljump-pushblock
unity-tech-cn:/goal-conditioning
unity-tech-cn:/release_11_branch
unity-tech-cn:/hh-develop-water-balloon-fight
unity-tech-cn:/gc-hyper
unity-tech-cn:/layernorm
unity-tech-cn:/yamato-linux-debug-venv
unity-tech-cn:/soccer-comms
unity-tech-cn:/hh-develop-pushblockcollab
unity-tech-cn:/release_12_branch
unity-tech-cn:/fix-get-step-sp-curr
unity-tech-cn:/continuous-comms
unity-tech-cn:/no-comms
unity-tech-cn:/hh-develop-zombiepushblock
unity-tech-cn:/hypernetwork
unity-tech-cn:/revert-4859-develop-update-readme
unity-tech-cn:/sequencer-env-attention
unity-tech-cn:/hh-develop-variableobs
unity-tech-cn:/exp-tanh
unity-tech-cn:/reward-dist
unity-tech-cn:/exp-weight-decay
unity-tech-cn:/exp-robot
unity-tech-cn:/bullet-hell-barracuda-test-1.3.1
unity-tech-cn:/release_13_branch
unity-tech-cn:/release_14_branch
unity-tech-cn:/exp-clipped-gaussian-entropy
unity-tech-cn:/tic-tac-toe
unity-tech-cn:/hh-develop-dodgeball
unity-tech-cn:/repro-vis-obs-perf
unity-tech-cn:/v2-staging-rebase
unity-tech-cn:/release_15_branch
unity-tech-cn:/release_15_removeendepisode
unity-tech-cn:/release_16_branch
unity-tech-cn:/release_16_fix_gridsensor
unity-tech-cn:/ai-hw-2021
unity-tech-cn:/check-for-ModelOverriders
unity-tech-cn:/fix-grid-obs-shape-init
unity-tech-cn:/fix-gym-needs-reset
unity-tech-cn:/fix-resume-imi
unity-tech-cn:/release_17_branch
unity-tech-cn:/release_17_branch_gpu_test
unity-tech-cn:/colab-links
unity-tech-cn:/exp-continuous-div
unity-tech-cn:/release_17_branch_gpu_2
unity-tech-cn:/exp-diverse-behavior
unity-tech-cn:/grid-onehot-extra-dim-empty
unity-tech-cn:/2.0-verified
unity-tech-cn:/faster-entropy-coeficient-convergence
unity-tech-cn:/pre-r18-update-changelog
unity-tech-cn:/release_18_branch
unity-tech-cn:/main/tracking
unity-tech-cn:/main/reward-providers
unity-tech-cn:/main/project-upgrade
unity-tech-cn:/main/limitation-docs
unity-tech-cn:/develop/nomaxstep-test
unity-tech-cn:/develop/tf2.0
unity-tech-cn:/develop/tanhsquash
unity-tech-cn:/develop/magic-string
unity-tech-cn:/develop/trainerinterface
unity-tech-cn:/develop/separatevalue
unity-tech-cn:/develop/nopreviousactions
unity-tech-cn:/develop/reenablerepeatactions
unity-tech-cn:/develop/0memories
unity-tech-cn:/develop/fixmemoryleak
unity-tech-cn:/develop/reducewalljump
unity-tech-cn:/develop/removeactionholder-onehot
unity-tech-cn:/develop/canonicalize-quaternions
unity-tech-cn:/develop/self-playassym
unity-tech-cn:/develop/demo-load-seek
unity-tech-cn:/develop/progress-bar
unity-tech-cn:/develop/sac-apex
unity-tech-cn:/develop/cubewars
unity-tech-cn:/develop/add-fire
unity-tech-cn:/develop/gym-wrapper
unity-tech-cn:/develop/mm-docs-main-readme
unity-tech-cn:/develop/mm-docs-overview
unity-tech-cn:/develop/no-threading
unity-tech-cn:/develop/dockerfile
unity-tech-cn:/develop/model-store
unity-tech-cn:/develop/checkout-conversion-rebase
unity-tech-cn:/develop/model-transfer
unity-tech-cn:/develop/bisim-review
unity-tech-cn:/develop/taggedobservations
unity-tech-cn:/develop/transfer-bisim
unity-tech-cn:/develop/bisim-sac-transfer
unity-tech-cn:/develop/basketball
unity-tech-cn:/develop/torchmodules
unity-tech-cn:/develop/fixmarkdown
unity-tech-cn:/develop/shortenstrikervsgoalie
unity-tech-cn:/develop/shortengoalie
unity-tech-cn:/develop/torch-save-rp
unity-tech-cn:/develop/torch-to-np
unity-tech-cn:/develop/torch-omp-no-thread
unity-tech-cn:/develop/actionmodel-csharp
unity-tech-cn:/develop/torch-extra
unity-tech-cn:/develop/restructure-torch-networks
unity-tech-cn:/develop/jit
unity-tech-cn:/develop/adjust-cpu-settings-experiment
unity-tech-cn:/develop/torch-sac-threading
unity-tech-cn:/develop/wb
unity-tech-cn:/develop/amrl
unity-tech-cn:/develop/memorydump
unity-tech-cn:/develop/permutepytorch
unity-tech-cn:/develop/sac-targetq
unity-tech-cn:/develop/actions-out
unity-tech-cn:/develop/reshapeonnxmemories
unity-tech-cn:/develop/crawlergail
unity-tech-cn:/develop/debugtorchfood
unity-tech-cn:/develop/hybrid-actions
unity-tech-cn:/develop/bullet-hell
unity-tech-cn:/develop/action-spec-gym
unity-tech-cn:/develop/battlefoodcollector
unity-tech-cn:/develop/use-action-buffers
unity-tech-cn:/develop/hardswish
unity-tech-cn:/develop/leakyrelu
unity-tech-cn:/develop/torch-clip-scale
unity-tech-cn:/develop/contentropy
unity-tech-cn:/develop/manch
unity-tech-cn:/develop/torchcrawlerdebug
unity-tech-cn:/develop/fix-nan
unity-tech-cn:/develop/multitype-buffer
unity-tech-cn:/develop/windows-delay
unity-tech-cn:/develop/torch-tanh
unity-tech-cn:/develop/gail-norm
unity-tech-cn:/develop/multiprocess
unity-tech-cn:/develop/unified-obs
unity-tech-cn:/develop/rm-rf-new-models
unity-tech-cn:/develop/skipcritic
unity-tech-cn:/develop/centralizedcritic
unity-tech-cn:/develop/dodgeball-tests
unity-tech-cn:/develop/cc-teammanager
unity-tech-cn:/develop/weight-decay
unity-tech-cn:/develop/singular-embeddings
unity-tech-cn:/develop/zombieteammanager
unity-tech-cn:/develop/superpush
unity-tech-cn:/develop/teammanager
unity-tech-cn:/develop/zombie-exp
unity-tech-cn:/develop/update-readme
unity-tech-cn:/develop/readme-fix
unity-tech-cn:/develop/coma-noact
unity-tech-cn:/develop/coma-withq
unity-tech-cn:/develop/coma2
unity-tech-cn:/develop/action-slice
unity-tech-cn:/develop/gru
unity-tech-cn:/develop/critic-op-lstm-currentmem
unity-tech-cn:/develop/decaygail
unity-tech-cn:/develop/gail-srl-hack
unity-tech-cn:/develop/rear-pad
unity-tech-cn:/develop/mm-copyright-dates
unity-tech-cn:/develop/dodgeball-raycasts
unity-tech-cn:/develop/collab-envs-exp-ervin
unity-tech-cn:/develop/pushcollabonly
unity-tech-cn:/develop/sample-curation
unity-tech-cn:/develop/soccer-groupman
unity-tech-cn:/develop/input-actuator-tanks
unity-tech-cn:/develop/validate-release-fix
unity-tech-cn:/develop/new-console-log
unity-tech-cn:/develop/lex-walker-model
unity-tech-cn:/develop/lstm-burnin
unity-tech-cn:/develop/grid-vaiable-names
unity-tech-cn:/develop/fix-attn-embedding
unity-tech-cn:/develop/api-documentation-update-some-fixes
unity-tech-cn:/develop/update-grpc
unity-tech-cn:/develop/grid-rootref-debug
unity-tech-cn:/develop/pbcollab-rays
unity-tech-cn:/develop/2.0-verified-pre
unity-tech-cn:/develop/parameterizedenvs
unity-tech-cn:/develop/custom-ray-sensor
unity-tech-cn:/develop/mm-add-v2blog
unity-tech-cn:/develop/custom-raycast
unity-tech-cn:/develop/area-manager
unity-tech-cn:/develop/remove-unecessary-lr
unity-tech-cn:/develop/use-base-env-in-learn
unity-tech-cn:/soccer-fives/multiagent
unity-tech-cn:/develop/cubewars/splashdamage
unity-tech-cn:/develop/add-fire/exp
unity-tech-cn:/develop/add-fire/jit
unity-tech-cn:/develop/add-fire/speedtest
unity-tech-cn:/develop/add-fire/bc
unity-tech-cn:/develop/add-fire/ckpt-2
unity-tech-cn:/develop/add-fire/normalize-context
unity-tech-cn:/develop/add-fire/components-dir
unity-tech-cn:/develop/add-fire/halfentropy
unity-tech-cn:/develop/add-fire/memoryclass
unity-tech-cn:/develop/add-fire/categoricaldist
unity-tech-cn:/develop/add-fire/mm
unity-tech-cn:/develop/add-fire/sac-lst
unity-tech-cn:/develop/add-fire/mm3
unity-tech-cn:/develop/add-fire/continuous
unity-tech-cn:/develop/add-fire/ghost
unity-tech-cn:/develop/add-fire/policy-tests
unity-tech-cn:/develop/add-fire/export-discrete
unity-tech-cn:/develop/add-fire/test-simple-rl-fix-resnet
unity-tech-cn:/develop/add-fire/remove-currdoc
unity-tech-cn:/develop/add-fire/clean2
unity-tech-cn:/develop/add-fire/doc-cleanups
unity-tech-cn:/develop/add-fire/changelog
unity-tech-cn:/develop/add-fire/mm2
unity-tech-cn:/develop/model-transfer/add-physics
unity-tech-cn:/develop/model-transfer/train
unity-tech-cn:/develop/jit/experiments
unity-tech-cn:/exp-vince/sep30-2020
unity-tech-cn:/hh-develop-gridsensor-tests/static
unity-tech-cn:/develop/hybrid-actions/distlist
unity-tech-cn:/develop/bullet-hell/buffer
unity-tech-cn:/goal-conditioning/new
unity-tech-cn:/goal-conditioning/sensors-2
unity-tech-cn:/goal-conditioning/sensors-3-pytest-fix
unity-tech-cn:/goal-conditioning/grid-world
unity-tech-cn:/soccer-comms/disc
unity-tech-cn:/develop/centralizedcritic/counterfact
unity-tech-cn:/develop/centralizedcritic/mm
unity-tech-cn:/develop/centralizedcritic/nonego
unity-tech-cn:/develop/zombieteammanager/disableagent
unity-tech-cn:/develop/zombieteammanager/killfirst
unity-tech-cn:/develop/superpush/int
unity-tech-cn:/develop/superpush/branch-cleanup
unity-tech-cn:/develop/teammanager/int
unity-tech-cn:/develop/teammanager/cubewar-nocycle
unity-tech-cn:/develop/teammanager/cubewars
unity-tech-cn:/develop/superpush/int/hunter
unity-tech-cn:/goal-conditioning/new/allo-crawler
unity-tech-cn:/develop/coma2/clip
unity-tech-cn:/develop/coma2/singlenetwork
unity-tech-cn:/develop/coma2/samenet
unity-tech-cn:/develop/coma2/fixgroup
unity-tech-cn:/develop/coma2/samenet/sum
unity-tech-cn:/hh-develop-dodgeball/goy-input
unity-tech-cn:/develop/soccer-groupman/mod
unity-tech-cn:/develop/soccer-groupman/mod/hunter
unity-tech-cn:/develop/soccer-groupman/mod/hunter/cine
unity-tech-cn:/ai-hw-2021/tensor-applier
拉取从: unity-tech-cn:develop-newnormalization
unity-tech-cn:/main
unity-tech-cn:/develop-generalizationTraining-TrainerController
unity-tech-cn:/tag-0.2.0
unity-tech-cn:/tag-0.2.1
unity-tech-cn:/tag-0.2.1a
unity-tech-cn:/tag-0.2.1c
unity-tech-cn:/tag-0.2.1d
unity-tech-cn:/hotfix-v0.9.2a
unity-tech-cn:/develop-gpu-test
unity-tech-cn:/0.10.1
unity-tech-cn:/develop-pyinstaller
unity-tech-cn:/develop-horovod
unity-tech-cn:/PhysXArticulations20201
unity-tech-cn:/importdocfix
unity-tech-cn:/develop-resizetexture
unity-tech-cn:/hh-develop-walljump_bugfixes
unity-tech-cn:/develop-walljump-fix-sac
unity-tech-cn:/hh-develop-walljump_rnd
unity-tech-cn:/tag-0.11.0.dev0
unity-tech-cn:/develop-pytorch
unity-tech-cn:/tag-0.11.0.dev2
unity-tech-cn:/develop-newnormalization
unity-tech-cn:/tag-0.11.0.dev3
unity-tech-cn:/develop
unity-tech-cn:/release-0.12.0
unity-tech-cn:/tag-0.12.0-dev
unity-tech-cn:/tag-0.12.0.dev0
unity-tech-cn:/tag-0.12.1
unity-tech-cn:/2D-explorations
unity-tech-cn:/asymm-envs
unity-tech-cn:/tag-0.12.1.dev0
unity-tech-cn:/2D-exploration-raycast
unity-tech-cn:/tag-0.12.1.dev1
unity-tech-cn:/release-0.13.0
unity-tech-cn:/release-0.13.1
unity-tech-cn:/plugin-proof-of-concept
unity-tech-cn:/release-0.14.0
unity-tech-cn:/hotfix-bump-version-master
unity-tech-cn:/soccer-fives
unity-tech-cn:/release-0.14.1
unity-tech-cn:/bug-failed-api-check
unity-tech-cn:/test-recurrent-gail
unity-tech-cn:/hh-add-icons
unity-tech-cn:/release-0.15.0
unity-tech-cn:/release-0.15.1
unity-tech-cn:/hh-develop-all-posed-characters
unity-tech-cn:/internal-policy-ghost
unity-tech-cn:/distributed-training
unity-tech-cn:/hh-develop-improve_tennis
unity-tech-cn:/test-tf-ver
unity-tech-cn:/release_1_branch
unity-tech-cn:/tennis-time-horizon
unity-tech-cn:/whitepaper-experiments
unity-tech-cn:/r2v-yamato-linux
unity-tech-cn:/docs-update
unity-tech-cn:/release_2_branch
unity-tech-cn:/exp-mede
unity-tech-cn:/sensitivity
unity-tech-cn:/release_2_verified_load_fix
unity-tech-cn:/test-sampler
unity-tech-cn:/release_2_verified
unity-tech-cn:/hh-develop-ragdoll-testing
unity-tech-cn:/origin-develop-taggedobservations
unity-tech-cn:/MLA-1734-demo-provider
unity-tech-cn:/sampler-refactor-copy
unity-tech-cn:/PhysXArticulations20201Package
unity-tech-cn:/tag-com.unity.ml-agents_1.0.8
unity-tech-cn:/release_3_branch
unity-tech-cn:/github-actions
unity-tech-cn:/release_3_distributed
unity-tech-cn:/fix-batch-tennis
unity-tech-cn:/distributed-ppo-sac
unity-tech-cn:/gridworld-custom-obs
unity-tech-cn:/hw20-segmentation
unity-tech-cn:/hh-develop-gamedev-demo
unity-tech-cn:/active-variablespeed
unity-tech-cn:/release_4_branch
unity-tech-cn:/fix-env-step-loop
unity-tech-cn:/release_5_branch
unity-tech-cn:/fix-walker
unity-tech-cn:/release_6_branch
unity-tech-cn:/hh-32-observation-crawler
unity-tech-cn:/trainer-plugin
unity-tech-cn:/hh-develop-max-steps-demo-recorder
unity-tech-cn:/hh-develop-loco-walker-variable-speed
unity-tech-cn:/exp-0002
unity-tech-cn:/experiment-less-max-step
unity-tech-cn:/hh-develop-hallway-wall-mesh-fix
unity-tech-cn:/release_7_branch
unity-tech-cn:/exp-vince
unity-tech-cn:/hh-develop-gridsensor-tests
unity-tech-cn:/tag-release_8_test0
unity-tech-cn:/tag-release_8_test1
unity-tech-cn:/release_8_branch
unity-tech-cn:/docfix-end-episode
unity-tech-cn:/release_9_branch
unity-tech-cn:/hybrid-action-rewardsignals
unity-tech-cn:/MLA-462-yamato-win
unity-tech-cn:/exp-alternate-atten
unity-tech-cn:/hh-develop-fps_game_project
unity-tech-cn:/fix-conflict-base-env
unity-tech-cn:/release_10_branch
unity-tech-cn:/exp-bullet-hell-trainer
unity-tech-cn:/ai-summit-exp
unity-tech-cn:/comms-grad
unity-tech-cn:/walljump-pushblock
unity-tech-cn:/goal-conditioning
unity-tech-cn:/release_11_branch
unity-tech-cn:/hh-develop-water-balloon-fight
unity-tech-cn:/gc-hyper
unity-tech-cn:/layernorm
unity-tech-cn:/yamato-linux-debug-venv
unity-tech-cn:/soccer-comms
unity-tech-cn:/hh-develop-pushblockcollab
unity-tech-cn:/release_12_branch
unity-tech-cn:/fix-get-step-sp-curr
unity-tech-cn:/continuous-comms
unity-tech-cn:/no-comms
unity-tech-cn:/hh-develop-zombiepushblock
unity-tech-cn:/hypernetwork
unity-tech-cn:/revert-4859-develop-update-readme
unity-tech-cn:/sequencer-env-attention
unity-tech-cn:/hh-develop-variableobs
unity-tech-cn:/exp-tanh
unity-tech-cn:/reward-dist
unity-tech-cn:/exp-weight-decay
unity-tech-cn:/exp-robot
unity-tech-cn:/bullet-hell-barracuda-test-1.3.1
unity-tech-cn:/release_13_branch
unity-tech-cn:/release_14_branch
unity-tech-cn:/exp-clipped-gaussian-entropy
unity-tech-cn:/tic-tac-toe
unity-tech-cn:/hh-develop-dodgeball
unity-tech-cn:/repro-vis-obs-perf
unity-tech-cn:/v2-staging-rebase
unity-tech-cn:/release_15_branch
unity-tech-cn:/release_15_removeendepisode
unity-tech-cn:/release_16_branch
unity-tech-cn:/release_16_fix_gridsensor
unity-tech-cn:/ai-hw-2021
unity-tech-cn:/check-for-ModelOverriders
unity-tech-cn:/fix-grid-obs-shape-init
unity-tech-cn:/fix-gym-needs-reset
unity-tech-cn:/fix-resume-imi
unity-tech-cn:/release_17_branch
unity-tech-cn:/release_17_branch_gpu_test
unity-tech-cn:/colab-links
unity-tech-cn:/exp-continuous-div
unity-tech-cn:/release_17_branch_gpu_2
unity-tech-cn:/exp-diverse-behavior
unity-tech-cn:/grid-onehot-extra-dim-empty
unity-tech-cn:/2.0-verified
unity-tech-cn:/faster-entropy-coeficient-convergence
unity-tech-cn:/pre-r18-update-changelog
unity-tech-cn:/release_18_branch
unity-tech-cn:/main/tracking
unity-tech-cn:/main/reward-providers
unity-tech-cn:/main/project-upgrade
unity-tech-cn:/main/limitation-docs
unity-tech-cn:/develop/nomaxstep-test
unity-tech-cn:/develop/tf2.0
unity-tech-cn:/develop/tanhsquash
unity-tech-cn:/develop/magic-string
unity-tech-cn:/develop/trainerinterface
unity-tech-cn:/develop/separatevalue
unity-tech-cn:/develop/nopreviousactions
unity-tech-cn:/develop/reenablerepeatactions
unity-tech-cn:/develop/0memories
unity-tech-cn:/develop/fixmemoryleak
unity-tech-cn:/develop/reducewalljump
unity-tech-cn:/develop/removeactionholder-onehot
unity-tech-cn:/develop/canonicalize-quaternions
unity-tech-cn:/develop/self-playassym
unity-tech-cn:/develop/demo-load-seek
unity-tech-cn:/develop/progress-bar
unity-tech-cn:/develop/sac-apex
unity-tech-cn:/develop/cubewars
unity-tech-cn:/develop/add-fire
unity-tech-cn:/develop/gym-wrapper
unity-tech-cn:/develop/mm-docs-main-readme
unity-tech-cn:/develop/mm-docs-overview
unity-tech-cn:/develop/no-threading
unity-tech-cn:/develop/dockerfile
unity-tech-cn:/develop/model-store
unity-tech-cn:/develop/checkout-conversion-rebase
unity-tech-cn:/develop/model-transfer
unity-tech-cn:/develop/bisim-review
unity-tech-cn:/develop/taggedobservations
unity-tech-cn:/develop/transfer-bisim
unity-tech-cn:/develop/bisim-sac-transfer
unity-tech-cn:/develop/basketball
unity-tech-cn:/develop/torchmodules
unity-tech-cn:/develop/fixmarkdown
unity-tech-cn:/develop/shortenstrikervsgoalie
unity-tech-cn:/develop/shortengoalie
unity-tech-cn:/develop/torch-save-rp
unity-tech-cn:/develop/torch-to-np
unity-tech-cn:/develop/torch-omp-no-thread
unity-tech-cn:/develop/actionmodel-csharp
unity-tech-cn:/develop/torch-extra
unity-tech-cn:/develop/restructure-torch-networks
unity-tech-cn:/develop/jit
unity-tech-cn:/develop/adjust-cpu-settings-experiment
unity-tech-cn:/develop/torch-sac-threading
unity-tech-cn:/develop/wb
unity-tech-cn:/develop/amrl
unity-tech-cn:/develop/memorydump
unity-tech-cn:/develop/permutepytorch
unity-tech-cn:/develop/sac-targetq
unity-tech-cn:/develop/actions-out
unity-tech-cn:/develop/reshapeonnxmemories
unity-tech-cn:/develop/crawlergail
unity-tech-cn:/develop/debugtorchfood
unity-tech-cn:/develop/hybrid-actions
unity-tech-cn:/develop/bullet-hell
unity-tech-cn:/develop/action-spec-gym
unity-tech-cn:/develop/battlefoodcollector
unity-tech-cn:/develop/use-action-buffers
unity-tech-cn:/develop/hardswish
unity-tech-cn:/develop/leakyrelu
unity-tech-cn:/develop/torch-clip-scale
unity-tech-cn:/develop/contentropy
unity-tech-cn:/develop/manch
unity-tech-cn:/develop/torchcrawlerdebug
unity-tech-cn:/develop/fix-nan
unity-tech-cn:/develop/multitype-buffer
unity-tech-cn:/develop/windows-delay
unity-tech-cn:/develop/torch-tanh
unity-tech-cn:/develop/gail-norm
unity-tech-cn:/develop/multiprocess
unity-tech-cn:/develop/unified-obs
unity-tech-cn:/develop/rm-rf-new-models
unity-tech-cn:/develop/skipcritic
unity-tech-cn:/develop/centralizedcritic
unity-tech-cn:/develop/dodgeball-tests
unity-tech-cn:/develop/cc-teammanager
unity-tech-cn:/develop/weight-decay
unity-tech-cn:/develop/singular-embeddings
unity-tech-cn:/develop/zombieteammanager
unity-tech-cn:/develop/superpush
unity-tech-cn:/develop/teammanager
unity-tech-cn:/develop/zombie-exp
unity-tech-cn:/develop/update-readme
unity-tech-cn:/develop/readme-fix
unity-tech-cn:/develop/coma-noact
unity-tech-cn:/develop/coma-withq
unity-tech-cn:/develop/coma2
unity-tech-cn:/develop/action-slice
unity-tech-cn:/develop/gru
unity-tech-cn:/develop/critic-op-lstm-currentmem
unity-tech-cn:/develop/decaygail
unity-tech-cn:/develop/gail-srl-hack
unity-tech-cn:/develop/rear-pad
unity-tech-cn:/develop/mm-copyright-dates
unity-tech-cn:/develop/dodgeball-raycasts
unity-tech-cn:/develop/collab-envs-exp-ervin
unity-tech-cn:/develop/pushcollabonly
unity-tech-cn:/develop/sample-curation
unity-tech-cn:/develop/soccer-groupman
unity-tech-cn:/develop/input-actuator-tanks
unity-tech-cn:/develop/validate-release-fix
unity-tech-cn:/develop/new-console-log
unity-tech-cn:/develop/lex-walker-model
unity-tech-cn:/develop/lstm-burnin
unity-tech-cn:/develop/grid-vaiable-names
unity-tech-cn:/develop/fix-attn-embedding
unity-tech-cn:/develop/api-documentation-update-some-fixes
unity-tech-cn:/develop/update-grpc
unity-tech-cn:/develop/grid-rootref-debug
unity-tech-cn:/develop/pbcollab-rays
unity-tech-cn:/develop/2.0-verified-pre
unity-tech-cn:/develop/parameterizedenvs
unity-tech-cn:/develop/custom-ray-sensor
unity-tech-cn:/develop/mm-add-v2blog
unity-tech-cn:/develop/custom-raycast
unity-tech-cn:/develop/area-manager
unity-tech-cn:/develop/remove-unecessary-lr
unity-tech-cn:/develop/use-base-env-in-learn
unity-tech-cn:/soccer-fives/multiagent
unity-tech-cn:/develop/cubewars/splashdamage
unity-tech-cn:/develop/add-fire/exp
unity-tech-cn:/develop/add-fire/jit
unity-tech-cn:/develop/add-fire/speedtest
unity-tech-cn:/develop/add-fire/bc
unity-tech-cn:/develop/add-fire/ckpt-2
unity-tech-cn:/develop/add-fire/normalize-context
unity-tech-cn:/develop/add-fire/components-dir
unity-tech-cn:/develop/add-fire/halfentropy
unity-tech-cn:/develop/add-fire/memoryclass
unity-tech-cn:/develop/add-fire/categoricaldist
unity-tech-cn:/develop/add-fire/mm
unity-tech-cn:/develop/add-fire/sac-lst
unity-tech-cn:/develop/add-fire/mm3
unity-tech-cn:/develop/add-fire/continuous
unity-tech-cn:/develop/add-fire/ghost
unity-tech-cn:/develop/add-fire/policy-tests
unity-tech-cn:/develop/add-fire/export-discrete
unity-tech-cn:/develop/add-fire/test-simple-rl-fix-resnet
unity-tech-cn:/develop/add-fire/remove-currdoc
unity-tech-cn:/develop/add-fire/clean2
unity-tech-cn:/develop/add-fire/doc-cleanups
unity-tech-cn:/develop/add-fire/changelog
unity-tech-cn:/develop/add-fire/mm2
unity-tech-cn:/develop/model-transfer/add-physics
unity-tech-cn:/develop/model-transfer/train
unity-tech-cn:/develop/jit/experiments
unity-tech-cn:/exp-vince/sep30-2020
unity-tech-cn:/hh-develop-gridsensor-tests/static
unity-tech-cn:/develop/hybrid-actions/distlist
unity-tech-cn:/develop/bullet-hell/buffer
unity-tech-cn:/goal-conditioning/new
unity-tech-cn:/goal-conditioning/sensors-2
unity-tech-cn:/goal-conditioning/sensors-3-pytest-fix
unity-tech-cn:/goal-conditioning/grid-world
unity-tech-cn:/soccer-comms/disc
unity-tech-cn:/develop/centralizedcritic/counterfact
unity-tech-cn:/develop/centralizedcritic/mm
unity-tech-cn:/develop/centralizedcritic/nonego
unity-tech-cn:/develop/zombieteammanager/disableagent
unity-tech-cn:/develop/zombieteammanager/killfirst
unity-tech-cn:/develop/superpush/int
unity-tech-cn:/develop/superpush/branch-cleanup
unity-tech-cn:/develop/teammanager/int
unity-tech-cn:/develop/teammanager/cubewar-nocycle
unity-tech-cn:/develop/teammanager/cubewars
unity-tech-cn:/develop/superpush/int/hunter
unity-tech-cn:/goal-conditioning/new/allo-crawler
unity-tech-cn:/develop/coma2/clip
unity-tech-cn:/develop/coma2/singlenetwork
unity-tech-cn:/develop/coma2/samenet
unity-tech-cn:/develop/coma2/fixgroup
unity-tech-cn:/develop/coma2/samenet/sum
unity-tech-cn:/hh-develop-dodgeball/goy-input
unity-tech-cn:/develop/soccer-groupman/mod
unity-tech-cn:/develop/soccer-groupman/mod/hunter
unity-tech-cn:/develop/soccer-groupman/mod/hunter/cine
unity-tech-cn:/ai-hw-2021/tensor-applier
此合并请求有变更与目标分支冲突。
/ml-agents/mlagents/trainers/learn.py
/ml-agents/mlagents/trainers/trainer_controller.py
/ml-agents/mlagents/trainers/demo_loader.py
/ml-agents/mlagents/trainers/action_info.py
/ml-agents/mlagents/trainers/ppo/trainer.py
/ml-agents/mlagents/trainers/sac/trainer.py
/ml-agents/mlagents/trainers/tests/test_trainer_util.py
/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
/ml-agents/mlagents/trainers/tests/mock_brain.py
/ml-agents/mlagents/trainers/tests/test_buffer.py
/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
/ml-agents/mlagents/trainers/buffer.py
/ml-agents/mlagents/trainers/rl_trainer.py
/ml-agents/mlagents/trainers/trainer.py
/ml-agents/mlagents/trainers/trajectory.py
/ml-agents/mlagents/trainers/stats.py
/ml-agents/mlagents/trainers/tests/test_agent_processor.py
/ml-agents/mlagents/trainers/tests/test_trajectory.py
/ml-agents/mlagents/trainers/tests/test_stats.py
/ml-agents/mlagents/trainers/curriculum.py
/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
/ml-agents/mlagents/trainers/components/bc/module.py
/ml-agents/mlagents/trainers/ppo/policy.py
/ml-agents/mlagents/trainers/sac/policy.py
/ml-agents/mlagents/trainers/tests/test_ppo.py
/ml-agents/mlagents/trainers/tests/test_sac.py
/ml-agents/mlagents/trainers/tests/test_simple_rl.py
/ml-agents/mlagents/trainers/tf_policy.py
/ml-agents/mlagents/trainers/trainer_util.py
/ml-agents/mlagents/trainers/models.py
/ml-agents/mlagents/trainers/agent_processor.py
3 次代码提交
作者 | SHA1 | 备注 | 提交日期 |
---|---|---|---|
Ervin Teng | f80b1d12 | Use running norm and std | 5 年前 |
Ervin Teng | 0040dc7f | New way to update mean and var | 5 年前 |
Ervin Teng | 3d25f9d2 | Merge branch 'master' into develop-agentprocessor | 5 年前 |
共有 32 个文件被更改,包括 1218 次插入 和 859 次删除
-
13ml-agents/mlagents/trainers/curriculum.py
-
2ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
-
1ml-agents/mlagents/trainers/components/bc/module.py
-
242ml-agents/mlagents/trainers/rl_trainer.py
-
3ml-agents/mlagents/trainers/action_info.py
-
29ml-agents/mlagents/trainers/buffer.py
-
32ml-agents/mlagents/trainers/demo_loader.py
-
6ml-agents/mlagents/trainers/learn.py
-
212ml-agents/mlagents/trainers/ppo/trainer.py
-
45ml-agents/mlagents/trainers/ppo/policy.py
-
2ml-agents/mlagents/trainers/sac/policy.py
-
140ml-agents/mlagents/trainers/sac/trainer.py
-
22ml-agents/mlagents/trainers/tests/test_trainer_controller.py
-
45ml-agents/mlagents/trainers/tests/mock_brain.py
-
95ml-agents/mlagents/trainers/tests/test_buffer.py
-
47ml-agents/mlagents/trainers/tests/test_rl_trainer.py
-
108ml-agents/mlagents/trainers/tests/test_ppo.py
-
38ml-agents/mlagents/trainers/tests/test_sac.py
-
3ml-agents/mlagents/trainers/tests/test_simple_rl.py
-
4ml-agents/mlagents/trainers/tests/test_trainer_util.py
-
62ml-agents/mlagents/trainers/tf_policy.py
-
112ml-agents/mlagents/trainers/trainer.py
-
45ml-agents/mlagents/trainers/trainer_controller.py
-
4ml-agents/mlagents/trainers/trainer_util.py
-
32ml-agents/mlagents/trainers/models.py
-
162ml-agents/mlagents/trainers/agent_processor.py
-
128ml-agents/mlagents/trainers/trajectory.py
-
119ml-agents/mlagents/trainers/stats.py
-
63ml-agents/mlagents/trainers/tests/test_agent_processor.py
-
110ml-agents/mlagents/trainers/tests/test_trajectory.py
-
76ml-agents/mlagents/trainers/tests/test_stats.py
-
75ml-agents/mlagents/trainers/agent_processor.py
|
|||
from typing import List, Dict |
|||
from collections import defaultdict, Counter |
|||
import numpy as np |
|||
|
|||
from mlagents.trainers.trainer import Trainer |
|||
from mlagents.trainers.trajectory import Trajectory, AgentExperience |
|||
from mlagents.trainers.brain import BrainInfo |
|||
from mlagents.trainers.tf_policy import TFPolicy |
|||
from mlagents.trainers.action_info import ActionInfoOutputs |
|||
from mlagents.trainers.stats import StatsReporter |
|||
|
|||
|
|||
class AgentProcessor: |
|||
""" |
|||
AgentProcessor contains a dictionary per-agent trajectory buffers. The buffers are indexed by agent_id. |
|||
Buffer also contains an update_buffer that corresponds to the buffer used when updating the model. |
|||
One AgentProcessor should be created per agent group. |
|||
""" |
|||
|
|||
def __init__( |
|||
self, |
|||
trainer: Trainer, |
|||
policy: TFPolicy, |
|||
max_trajectory_length: int, |
|||
stats_reporter: StatsReporter, |
|||
): |
|||
""" |
|||
Create an AgentProcessor. |
|||
:param trainer: Trainer instance connected to this AgentProcessor. Trainer is given trajectory |
|||
when it is finished. |
|||
:param policy: Policy instance associated with this AgentProcessor. |
|||
:param max_trajectory_length: Maximum length of a trajectory before it is added to the trainer. |
|||
:param stats_category: The category under which to write the stats. Usually, this comes from the Trainer. |
|||
""" |
|||
self.experience_buffers: Dict[str, List[AgentExperience]] = defaultdict(list) |
|||
self.last_brain_info: Dict[str, BrainInfo] = {} |
|||
self.last_take_action_outputs: Dict[str, ActionInfoOutputs] = {} |
|||
# Note: this is needed until we switch to AgentExperiences as the data input type. |
|||
# We still need some info from the policy (memories, previous actions) |
|||
# that really should be gathered by the env-manager. |
|||
self.policy = policy |
|||
self.episode_steps: Counter = Counter() |
|||
self.episode_rewards: Dict[str, float] = defaultdict(float) |
|||
self.stats_reporter = stats_reporter |
|||
if max_trajectory_length: |
|||
self.max_trajectory_length = max_trajectory_length |
|||
self.ignore_max_length = False |
|||
else: |
|||
self.max_trajectory_length = 0 |
|||
self.ignore_max_length = True |
|||
self.trainer = trainer |
|||
|
|||
def add_experiences( |
|||
self, |
|||
curr_info: BrainInfo, |
|||
next_info: BrainInfo, |
|||
take_action_outputs: ActionInfoOutputs, |
|||
) -> None: |
|||
""" |
|||
Adds experiences to each agent's experience history. |
|||
:param curr_info: current BrainInfo. |
|||
:param next_info: next BrainInfo. |
|||
:param take_action_outputs: The outputs of the Policy's get_action method. |
|||
""" |
|||
if take_action_outputs: |
|||
self.stats_reporter.add_stat( |
|||
"Policy/Entropy", take_action_outputs["entropy"].mean() |
|||
) |
|||
self.stats_reporter.add_stat( |
|||
"Policy/Learning Rate", take_action_outputs["learning_rate"] |
|||
) |
|||
|
|||
for agent_id in curr_info.agents: |
|||
self.last_brain_info[agent_id] = curr_info |
|||
self.last_take_action_outputs[agent_id] = take_action_outputs |
|||
|
|||
# Store the environment reward |
|||
tmp_environment_reward = np.array(next_info.rewards, dtype=np.float32) |
|||
|
|||
for agent_id in next_info.agents: |
|||
stored_info = self.last_brain_info.get(agent_id, None) |
|||
if stored_info is not None: |
|||
stored_take_action_outputs = self.last_take_action_outputs[agent_id] |
|||
idx = stored_info.agents.index(agent_id) |
|||
next_idx = next_info.agents.index(agent_id) |
|||
obs = [] |
|||
if not stored_info.local_done[idx]: |
|||
for i, _ in enumerate(stored_info.visual_observations): |
|||
obs.append(stored_info.visual_observations[i][idx]) |
|||
if self.policy.use_vec_obs: |
|||
obs.append(stored_info.vector_observations[idx]) |
|||
if self.policy.use_recurrent: |
|||
memory = self.policy.retrieve_memories([agent_id])[0, :] |
|||
else: |
|||
memory = None |
|||
|
|||
done = next_info.local_done[next_idx] |
|||
max_step = next_info.max_reached[next_idx] |
|||
|
|||
# Add the outputs of the last eval |
|||
action = stored_take_action_outputs["action"][idx] |
|||
if self.policy.use_continuous_act: |
|||
action_pre = stored_take_action_outputs["pre_action"][idx] |
|||
else: |
|||
action_pre = None |
|||
action_probs = stored_take_action_outputs["log_probs"][idx] |
|||
action_masks = stored_info.action_masks[idx] |
|||
prev_action = self.policy.retrieve_previous_action([agent_id])[0, :] |
|||
|
|||
experience = AgentExperience( |
|||
obs=obs, |
|||
reward=tmp_environment_reward[next_idx], |
|||
done=done, |
|||
action=action, |
|||
action_probs=action_probs, |
|||
action_pre=action_pre, |
|||
action_mask=action_masks, |
|||
prev_action=prev_action, |
|||
max_step=max_step, |
|||
memory=memory, |
|||
) |
|||
# Add the value outputs if needed |
|||
self.experience_buffers[agent_id].append(experience) |
|||
self.episode_rewards[agent_id] += tmp_environment_reward[next_idx] |
|||
if ( |
|||
next_info.local_done[next_idx] |
|||
or ( |
|||
not self.ignore_max_length |
|||
and len(self.experience_buffers[agent_id]) |
|||
>= self.max_trajectory_length |
|||
) |
|||
) and len(self.experience_buffers[agent_id]) > 0: |
|||
# Make next AgentExperience |
|||
next_obs = [] |
|||
for i, _ in enumerate(next_info.visual_observations): |
|||
next_obs.append(next_info.visual_observations[i][next_idx]) |
|||
if self.policy.use_vec_obs: |
|||
next_obs.append(next_info.vector_observations[next_idx]) |
|||
trajectory = Trajectory( |
|||
steps=self.experience_buffers[agent_id], |
|||
agent_id=agent_id, |
|||
next_obs=next_obs, |
|||
) |
|||
# This will eventually be replaced with a queue |
|||
self.trainer.process_trajectory(trajectory) |
|||
self.experience_buffers[agent_id] = [] |
|||
if next_info.local_done[next_idx]: |
|||
self.stats_reporter.add_stat( |
|||
"Environment/Cumulative Reward", |
|||
self.episode_rewards.get(agent_id, 0), |
|||
) |
|||
self.stats_reporter.add_stat( |
|||
"Environment/Episode Length", |
|||
self.episode_steps.get(agent_id, 0), |
|||
) |
|||
del self.episode_steps[agent_id] |
|||
del self.episode_rewards[agent_id] |
|||
elif not next_info.local_done[next_idx]: |
|||
self.episode_steps[agent_id] += 1 |
|||
self.policy.save_previous_action( |
|||
curr_info.agents, take_action_outputs["action"] |
|||
) |
|
|||
from typing import List, NamedTuple |
|||
import numpy as np |
|||
|
|||
from mlagents.trainers.buffer import AgentBuffer |
|||
|
|||
|
|||
class AgentExperience(NamedTuple): |
|||
obs: List[np.ndarray] |
|||
reward: float |
|||
done: bool |
|||
action: np.ndarray |
|||
action_probs: np.ndarray |
|||
action_pre: np.ndarray # TODO: Remove this |
|||
action_mask: np.ndarray |
|||
prev_action: np.ndarray |
|||
max_step: bool |
|||
memory: np.ndarray |
|||
|
|||
|
|||
class SplitObservations(NamedTuple): |
|||
vector_observations: np.ndarray |
|||
visual_observations: List[np.ndarray] |
|||
|
|||
@staticmethod |
|||
def from_observations(obs: List[np.ndarray]) -> "SplitObservations": |
|||
""" |
|||
Divides a List of numpy arrays into a SplitObservations NamedTuple. |
|||
This allows you to access the vector and visual observations directly, |
|||
without enumerating the list over and over. |
|||
:param obs: List of numpy arrays (observation) |
|||
:returns: A SplitObservations object. |
|||
""" |
|||
vis_obs_indices = [] |
|||
vec_obs_indices = [] |
|||
for index, observation in enumerate(obs): |
|||
if len(observation.shape) == 1: |
|||
vec_obs_indices.append(index) |
|||
if len(observation.shape) == 3: |
|||
vis_obs_indices.append(index) |
|||
vec_obs = ( |
|||
np.concatenate([obs[i] for i in vec_obs_indices], axis=0) |
|||
if len(vec_obs_indices) > 0 |
|||
else np.array([], dtype=np.float32) |
|||
) |
|||
vis_obs = [obs[i] for i in vis_obs_indices] |
|||
return SplitObservations( |
|||
vector_observations=vec_obs, visual_observations=vis_obs |
|||
) |
|||
|
|||
|
|||
class Trajectory(NamedTuple): |
|||
steps: List[AgentExperience] |
|||
next_obs: List[ |
|||
np.ndarray |
|||
] # Observation following the trajectory, for bootstrapping |
|||
agent_id: str |
|||
|
|||
def to_agentbuffer(self) -> AgentBuffer: |
|||
""" |
|||
Converts a Trajectory to an AgentBuffer |
|||
:param trajectory: A Trajectory |
|||
:returns: AgentBuffer. Note that the length of the AgentBuffer will be one |
|||
less than the trajectory, as the next observation need to be populated from the last |
|||
step of the trajectory. |
|||
""" |
|||
agent_buffer_trajectory = AgentBuffer() |
|||
for step, exp in enumerate(self.steps): |
|||
vec_vis_obs = SplitObservations.from_observations(exp.obs) |
|||
if step < len(self.steps) - 1: |
|||
next_vec_vis_obs = SplitObservations.from_observations( |
|||
self.steps[step + 1].obs |
|||
) |
|||
else: |
|||
next_vec_vis_obs = SplitObservations.from_observations(self.next_obs) |
|||
|
|||
for i, _ in enumerate(vec_vis_obs.visual_observations): |
|||
agent_buffer_trajectory["visual_obs%d" % i].append( |
|||
vec_vis_obs.visual_observations[i] |
|||
) |
|||
agent_buffer_trajectory["next_visual_obs%d" % i].append( |
|||
next_vec_vis_obs.visual_observations[i] |
|||
) |
|||
agent_buffer_trajectory["vector_obs"].append( |
|||
vec_vis_obs.vector_observations |
|||
) |
|||
agent_buffer_trajectory["next_vector_in"].append( |
|||
next_vec_vis_obs.vector_observations |
|||
) |
|||
if exp.memory is not None: |
|||
agent_buffer_trajectory["memory"].append(exp.memory) |
|||
|
|||
agent_buffer_trajectory["masks"].append(1.0) |
|||
agent_buffer_trajectory["done"].append(exp.done) |
|||
# Add the outputs of the last eval |
|||
if exp.action_pre is not None: |
|||
actions_pre = exp.action_pre |
|||
agent_buffer_trajectory["actions_pre"].append(actions_pre) |
|||
|
|||
# value is a dictionary from name of reward to value estimate of the value head |
|||
agent_buffer_trajectory["actions"].append(exp.action) |
|||
agent_buffer_trajectory["action_probs"].append(exp.action_probs) |
|||
|
|||
# Store action masks if necessary. Eventually these will be |
|||
# None for continuous actions |
|||
if exp.action_mask is not None: |
|||
agent_buffer_trajectory["action_mask"].append( |
|||
exp.action_mask, padding_value=1 |
|||
) |
|||
|
|||
agent_buffer_trajectory["prev_action"].append(exp.prev_action) |
|||
|
|||
# Add the value outputs if needed |
|||
agent_buffer_trajectory["environment_rewards"].append(exp.reward) |
|||
return agent_buffer_trajectory |
|||
|
|||
@property |
|||
def done_reached(self) -> bool: |
|||
""" |
|||
Returns true if trajectory is terminated with a Done. |
|||
""" |
|||
return self.steps[-1].done |
|||
|
|||
@property |
|||
def max_step_reached(self) -> bool: |
|||
""" |
|||
Returns true if trajectory was terminated because max steps was reached. |
|||
""" |
|||
return self.steps[-1].max_step |
|
|||
from collections import defaultdict |
|||
from typing import List, Dict, NamedTuple |
|||
import numpy as np |
|||
import abc |
|||
import os |
|||
|
|||
from mlagents.tf_utils import tf |
|||
|
|||
|
|||
class StatsWriter(abc.ABC): |
|||
""" |
|||
A StatsWriter abstract class. A StatsWriter takes in a category, key, scalar value, and step |
|||
and writes it out by some method. |
|||
""" |
|||
|
|||
@abc.abstractmethod |
|||
def write_stats(self, category: str, key: str, value: float, step: int) -> None: |
|||
pass |
|||
|
|||
@abc.abstractmethod |
|||
def write_text(self, category: str, text: str, step: int) -> None: |
|||
pass |
|||
|
|||
|
|||
class TensorboardWriter(StatsWriter): |
|||
def __init__(self, base_dir: str): |
|||
self.summary_writers: Dict[str, tf.summary.FileWriter] = {} |
|||
self.base_dir: str = base_dir |
|||
|
|||
def write_stats(self, category: str, key: str, value: float, step: int) -> None: |
|||
self._maybe_create_summary_writer(category) |
|||
summary = tf.Summary() |
|||
summary.value.add(tag="{}".format(key), simple_value=value) |
|||
self.summary_writers[category].add_summary(summary, step) |
|||
self.summary_writers[category].flush() |
|||
|
|||
def _maybe_create_summary_writer(self, category: str) -> None: |
|||
if category not in self.summary_writers: |
|||
filewriter_dir = "{basedir}/{category}".format( |
|||
basedir=self.base_dir, category=category |
|||
) |
|||
if not os.path.exists(filewriter_dir): |
|||
os.makedirs(filewriter_dir) |
|||
self.summary_writers[category] = tf.summary.FileWriter(filewriter_dir) |
|||
|
|||
def write_text(self, category: str, text: str, step: int) -> None: |
|||
self._maybe_create_summary_writer(category) |
|||
self.summary_writers[category].add_summary(text, step) |
|||
|
|||
|
|||
class StatsSummary(NamedTuple): |
|||
mean: float |
|||
std: float |
|||
num: int |
|||
|
|||
|
|||
class StatsReporter: |
|||
writers: List[StatsWriter] = [] |
|||
stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list)) |
|||
|
|||
def __init__(self, category): |
|||
""" |
|||
Generic StatsReporter. A category is the broadest type of storage (would |
|||
correspond the run name and trainer name, e.g. 3DBalltest_3DBall. A key is the |
|||
type of stat it is (e.g. Environment/Reward). Finally the Value is the float value |
|||
attached to this stat. |
|||
""" |
|||
self.category: str = category |
|||
|
|||
@staticmethod |
|||
def add_writer(writer: StatsWriter) -> None: |
|||
StatsReporter.writers.append(writer) |
|||
|
|||
def add_stat(self, key: str, value: float) -> None: |
|||
""" |
|||
Add a float value stat to the StatsReporter. |
|||
:param category: The highest categorization of the statistic, e.g. behavior name. |
|||
:param key: The type of statistic, e.g. Environment/Reward. |
|||
:param value: the value of the statistic. |
|||
""" |
|||
StatsReporter.stats_dict[self.category][key].append(value) |
|||
|
|||
def write_stats(self, step: int) -> None: |
|||
""" |
|||
Write out all stored statistics that fall under the category specified. |
|||
The currently stored values will be averaged, written out as a single value, |
|||
and the buffer cleared. |
|||
:param category: The category which to write out the stats. |
|||
:param step: Training step which to write these stats as. |
|||
""" |
|||
for key in StatsReporter.stats_dict[self.category]: |
|||
if len(StatsReporter.stats_dict[self.category][key]) > 0: |
|||
stat_mean = float(np.mean(StatsReporter.stats_dict[self.category][key])) |
|||
for writer in StatsReporter.writers: |
|||
writer.write_stats(self.category, key, stat_mean, step) |
|||
del StatsReporter.stats_dict[self.category] |
|||
|
|||
def write_text(self, text: str, step: int) -> None: |
|||
""" |
|||
Write out some text. |
|||
:param category: The highest categorization of the statistic, e.g. behavior name. |
|||
:param text: The text to write out. |
|||
:param step: Training step which to write these stats as. |
|||
""" |
|||
for writer in StatsReporter.writers: |
|||
writer.write_text(self.category, text, step) |
|||
|
|||
def get_stats_summaries(self, key: str) -> StatsSummary: |
|||
""" |
|||
Get the mean, std, and count of a particular statistic, since last write. |
|||
:param category: The highest categorization of the statistic, e.g. behavior name. |
|||
:param key: The type of statistic, e.g. Environment/Reward. |
|||
:returns: A StatsSummary NamedTuple containing (mean, std, count). |
|||
""" |
|||
return StatsSummary( |
|||
mean=np.mean(StatsReporter.stats_dict[self.category][key]), |
|||
std=np.std(StatsReporter.stats_dict[self.category][key]), |
|||
num=len(StatsReporter.stats_dict[self.category][key]), |
|||
) |
|
|||
import unittest.mock as mock |
|||
import pytest |
|||
import mlagents.trainers.tests.mock_brain as mb |
|||
import numpy as np |
|||
from mlagents.trainers.agent_processor import AgentProcessor |
|||
from mlagents.trainers.stats import StatsReporter |
|||
|
|||
|
|||
def create_mock_brain(): |
|||
mock_brain = mb.create_mock_brainparams( |
|||
vector_action_space_type="continuous", |
|||
vector_action_space_size=[2], |
|||
vector_observation_space_size=8, |
|||
number_visual_observations=1, |
|||
) |
|||
return mock_brain |
|||
|
|||
|
|||
def create_mock_policy(): |
|||
mock_policy = mock.Mock() |
|||
mock_policy.reward_signals = {} |
|||
mock_policy.retrieve_memories.return_value = np.zeros((1, 1), dtype=np.float32) |
|||
mock_policy.retrieve_previous_action.return_value = np.zeros( |
|||
(1, 1), dtype=np.float32 |
|||
) |
|||
return mock_policy |
|||
|
|||
|
|||
@pytest.mark.parametrize("num_vis_obs", [0, 1, 2], ids=["vec", "1 viz", "2 viz"]) |
|||
def test_agentprocessor(num_vis_obs): |
|||
policy = create_mock_policy() |
|||
trainer = mock.Mock() |
|||
processor = AgentProcessor( |
|||
trainer, |
|||
policy, |
|||
max_trajectory_length=5, |
|||
stats_reporter=StatsReporter("testcat"), |
|||
) |
|||
fake_action_outputs = { |
|||
"action": [0.1, 0.1], |
|||
"entropy": np.array([1.0], dtype=np.float32), |
|||
"learning_rate": 1.0, |
|||
"pre_action": [0.1, 0.1], |
|||
"log_probs": [0.1, 0.1], |
|||
} |
|||
mock_braininfo = mb.create_mock_braininfo( |
|||
num_agents=2, |
|||
num_vector_observations=8, |
|||
num_vector_acts=2, |
|||
num_vis_observations=num_vis_obs, |
|||
) |
|||
for i in range(5): |
|||
processor.add_experiences(mock_braininfo, mock_braininfo, fake_action_outputs) |
|||
|
|||
# Assert that two trajectories have been added to the Trainer |
|||
assert len(trainer.process_trajectory.call_args_list) == 2 |
|||
|
|||
# Assert that the trajectory is of length 5 |
|||
trajectory = trainer.process_trajectory.call_args_list[0][0][0] |
|||
assert len(trajectory.steps) == 5 |
|||
|
|||
# Assert that the AgentProcessor is empty |
|||
assert len(processor.experience_buffers[0]) == 0 |
|
|||
import numpy as np |
|||
import pytest |
|||
|
|||
from mlagents.trainers.trajectory import AgentExperience, Trajectory, SplitObservations |
|||
|
|||
VEC_OBS_SIZE = 6 |
|||
ACTION_SIZE = 4 |
|||
|
|||
|
|||
def make_fake_trajectory( |
|||
length: int, |
|||
max_step_complete: bool = False, |
|||
vec_obs_size: int = VEC_OBS_SIZE, |
|||
num_vis_obs: int = 1, |
|||
action_space: int = ACTION_SIZE, |
|||
) -> Trajectory: |
|||
""" |
|||
Makes a fake trajectory of length length. If max_step_complete, |
|||
the trajectory is terminated by a max step rather than a done. |
|||
""" |
|||
steps_list = [] |
|||
for i in range(length - 1): |
|||
obs = [] |
|||
for i in range(num_vis_obs): |
|||
obs.append(np.ones((84, 84, 3), dtype=np.float32)) |
|||
obs.append(np.ones(vec_obs_size, dtype=np.float32)) |
|||
reward = 1.0 |
|||
done = False |
|||
action = np.zeros(action_space, dtype=np.float32) |
|||
action_probs = np.ones(action_space, dtype=np.float32) |
|||
action_pre = np.zeros(action_space, dtype=np.float32) |
|||
action_mask = np.ones(action_space, dtype=np.float32) |
|||
prev_action = np.ones(action_space, dtype=np.float32) |
|||
max_step = False |
|||
memory = np.ones(10, dtype=np.float32) |
|||
agent_id = "test_agent" |
|||
experience = AgentExperience( |
|||
obs=obs, |
|||
reward=reward, |
|||
done=done, |
|||
action=action, |
|||
action_probs=action_probs, |
|||
action_pre=action_pre, |
|||
action_mask=action_mask, |
|||
prev_action=prev_action, |
|||
max_step=max_step, |
|||
memory=memory, |
|||
) |
|||
steps_list.append(experience) |
|||
last_experience = AgentExperience( |
|||
obs=obs, |
|||
reward=reward, |
|||
done=not max_step_complete, |
|||
action=action, |
|||
action_probs=action_probs, |
|||
action_pre=action_pre, |
|||
action_mask=action_mask, |
|||
prev_action=prev_action, |
|||
max_step=max_step_complete, |
|||
memory=memory, |
|||
) |
|||
steps_list.append(last_experience) |
|||
return Trajectory(steps=steps_list, agent_id=agent_id, next_obs=obs) |
|||
|
|||
|
|||
@pytest.mark.parametrize("num_visual_obs", [0, 1, 2]) |
|||
@pytest.mark.parametrize("num_vec_obs", [0, 1]) |
|||
def test_split_obs(num_visual_obs, num_vec_obs): |
|||
obs = [] |
|||
for i in range(num_visual_obs): |
|||
obs.append(np.ones((84, 84, 3), dtype=np.float32)) |
|||
for i in range(num_vec_obs): |
|||
obs.append(np.ones(VEC_OBS_SIZE, dtype=np.float32)) |
|||
split_observations = SplitObservations.from_observations(obs) |
|||
|
|||
if num_vec_obs == 1: |
|||
assert len(split_observations.vector_observations) == VEC_OBS_SIZE |
|||
else: |
|||
assert len(split_observations.vector_observations) == 0 |
|||
|
|||
# Assert the number of vector observations. |
|||
assert len(split_observations.visual_observations) == num_visual_obs |
|||
|
|||
|
|||
def test_trajectory_to_agentbuffer(): |
|||
length = 15 |
|||
wanted_keys = [ |
|||
"next_visual_obs0", |
|||
"visual_obs0", |
|||
"vector_obs", |
|||
"next_vector_in", |
|||
"memory", |
|||
"masks", |
|||
"done", |
|||
"actions_pre", |
|||
"actions", |
|||
"action_probs", |
|||
"action_mask", |
|||
"prev_action", |
|||
"environment_rewards", |
|||
] |
|||
wanted_keys = set(wanted_keys) |
|||
trajectory = make_fake_trajectory(length=length) |
|||
agentbuffer = trajectory.to_agentbuffer() |
|||
seen_keys = set() |
|||
for key, field in agentbuffer.items(): |
|||
assert len(field) == length |
|||
seen_keys.add(key) |
|||
|
|||
assert seen_keys == wanted_keys |
|
|||
import unittest.mock as mock |
|||
import os |
|||
|
|||
from mlagents.trainers.stats import StatsReporter, TensorboardWriter |
|||
|
|||
|
|||
def test_stat_reporter_add_summary_write(): |
|||
# Test add_writer |
|||
StatsReporter.writers.clear() |
|||
mock_writer1 = mock.Mock() |
|||
mock_writer2 = mock.Mock() |
|||
StatsReporter.add_writer(mock_writer1) |
|||
StatsReporter.add_writer(mock_writer2) |
|||
assert len(StatsReporter.writers) == 2 |
|||
|
|||
# Test add_stats and summaries |
|||
statsreporter1 = StatsReporter("category1") |
|||
statsreporter2 = StatsReporter("category2") |
|||
for i in range(10): |
|||
statsreporter1.add_stat("key1", float(i)) |
|||
statsreporter2.add_stat("key2", float(i)) |
|||
|
|||
statssummary1 = statsreporter1.get_stats_summaries("key1") |
|||
statssummary2 = statsreporter2.get_stats_summaries("key2") |
|||
|
|||
assert statssummary1.num == 10 |
|||
assert statssummary2.num == 10 |
|||
assert statssummary1.mean == 4.5 |
|||
assert statssummary2.mean == 4.5 |
|||
assert round(statssummary1.std, 1) == 2.9 |
|||
assert round(statssummary2.std, 1) == 2.9 |
|||
|
|||
# Test write_stats |
|||
step = 10 |
|||
statsreporter1.write_stats(step) |
|||
mock_writer1.write_stats.assert_called_once_with("category1", "key1", 4.5, step) |
|||
mock_writer2.write_stats.assert_called_once_with("category1", "key1", 4.5, step) |
|||
|
|||
|
|||
def test_stat_reporter_text(): |
|||
# Test add_writer |
|||
mock_writer = mock.Mock() |
|||
StatsReporter.writers.clear() |
|||
StatsReporter.add_writer(mock_writer) |
|||
assert len(StatsReporter.writers) == 1 |
|||
|
|||
statsreporter1 = StatsReporter("category1") |
|||
|
|||
# Test write_text |
|||
step = 10 |
|||
statsreporter1.write_text("this is a text", step) |
|||
mock_writer.write_text.assert_called_once_with("category1", "this is a text", step) |
|||
|
|||
|
|||
@mock.patch("mlagents.tf_utils.tf.Summary") |
|||
@mock.patch("mlagents.tf_utils.tf.summary.FileWriter") |
|||
def test_tensorboard_writer(mock_filewriter, mock_summary): |
|||
# Test write_stats |
|||
base_dir = "base_dir" |
|||
category = "category1" |
|||
tb_writer = TensorboardWriter(base_dir) |
|||
tb_writer.write_stats("category1", "key1", 1.0, 10) |
|||
|
|||
# Test that the filewriter has been created and the directory has been created. |
|||
filewriter_dir = "{basedir}/{category}".format(basedir=base_dir, category=category) |
|||
assert os.path.exists(filewriter_dir) |
|||
mock_filewriter.assert_called_once_with(filewriter_dir) |
|||
|
|||
# Test that the filewriter was written to and the summary was added. |
|||
mock_summary.return_value.value.add.assert_called_once_with( |
|||
tag="key1", simple_value=1.0 |
|||
) |
|||
mock_filewriter.return_value.add_summary.assert_called_once_with( |
|||
mock_summary.return_value, 10 |
|||
) |
|||
mock_filewriter.return_value.flush.assert_called_once() |
|
|||
from typing import List, Union |
|||
|
|||
from mlagents.trainers.buffer import AgentBuffer, BufferException |
|||
|
|||
|
|||
class ProcessingBuffer(dict): |
|||
""" |
|||
ProcessingBuffer contains a dictionary of AgentBuffer. The AgentBuffers are indexed by agent_id. |
|||
""" |
|||
|
|||
def __str__(self): |
|||
return "local_buffers :\n{0}".format( |
|||
"\n".join(["\tagent {0} :{1}".format(k, str(self[k])) for k in self.keys()]) |
|||
) |
|||
|
|||
def __getitem__(self, key): |
|||
if key not in self.keys(): |
|||
self[key] = AgentBuffer() |
|||
return super().__getitem__(key) |
|||
|
|||
def reset_local_buffers(self) -> None: |
|||
""" |
|||
Resets all the local AgentBuffers. |
|||
""" |
|||
for buf in self.values(): |
|||
buf.reset_agent() |
|||
|
|||
def append_to_update_buffer( |
|||
self, |
|||
update_buffer: AgentBuffer, |
|||
agent_id: Union[int, str], |
|||
key_list: List[str] = None, |
|||
batch_size: int = None, |
|||
training_length: int = None, |
|||
) -> None: |
|||
""" |
|||
Appends the buffer of an agent to the update buffer. |
|||
:param update_buffer: A reference to an AgentBuffer to append the agent's buffer to |
|||
:param agent_id: The id of the agent which data will be appended |
|||
:param key_list: The fields that must be added. If None: all fields will be appended. |
|||
:param batch_size: The number of elements that must be appended. If None: All of them will be. |
|||
:param training_length: The length of the samples that must be appended. If None: only takes one element. |
|||
""" |
|||
if key_list is None: |
|||
key_list = self[agent_id].keys() |
|||
if not self[agent_id].check_length(key_list): |
|||
raise BufferException( |
|||
"The length of the fields {0} for agent {1} were not of same length".format( |
|||
key_list, agent_id |
|||
) |
|||
) |
|||
for field_key in key_list: |
|||
update_buffer[field_key].extend( |
|||
self[agent_id][field_key].get_batch( |
|||
batch_size=batch_size, training_length=training_length |
|||
) |
|||
) |
|||
|
|||
def append_all_agent_batch_to_update_buffer( |
|||
self, |
|||
update_buffer: AgentBuffer, |
|||
key_list: List[str] = None, |
|||
batch_size: int = None, |
|||
training_length: int = None, |
|||
) -> None: |
|||
""" |
|||
Appends the buffer of all agents to the update buffer. |
|||
:param key_list: The fields that must be added. If None: all fields will be appended. |
|||
:param batch_size: The number of elements that must be appended. If None: All of them will be. |
|||
:param training_length: The length of the samples that must be appended. If None: only takes one element. |
|||
""" |
|||
for agent_id in self.keys(): |
|||
self.append_to_update_buffer( |
|||
update_buffer, agent_id, key_list, batch_size, training_length |
|||
) |
撰写
预览
正在加载...
取消
保存
Reference in new issue