"""This module encodes the game action into a ML friendly format"""
from enum import IntEnum
from typing import Tuple
from luafun.game.ipc_send import new_ipc_message, TEAM_RADIANT, TEAM_DIRE
[docs]class AbilitySlot(IntEnum):
"""List all the abilities available to a given hero"""
# Inventory
Item0 = 0
Item1 = 1
Item2 = 2
Item3 = 3
Item4 = 4
Item5 = 5
Backpack1 = 6
Backpack2 = 7
Backpack3 = 8
Stash1 = 9
Stash2 = 10
Stash3 = 11
Stash4 = 12
Stash5 = 13
Stash6 = 14
Item15 = 15 # TP
Item16 = 16 # Neutral ?
Q = 17 # Q | invoker_quas
W = 18 # W | invoker_wex
E = 19 # E | invoker_exort
D = 20 # D generic_hidden | invoker_empty1
F = 21 # F generic_hidden | invoker_empty2
R = 22 # R | invoker_invoke
Ablity6 = 23 # . | invoker_cold_snap
Ablity7 = 24 # . | invoker_ghost_walk
Ablity8 = 25 # . | invoker_tornado
Ablity9 = 26 # . | invoker_emp
Ablity10 = 27 # . | invoker_alacrity
Ablity11 = 28 # . | invoker_chaos_meteor
Ablity12 = 29 # . | invoker_sun_strike
Ablity13 = 30 # . | invoker_forge_spirit
Ablity14 = 31 # . | invoker_ice_wall
Ablity15 = 32 # . | invoker_deafening_blast
Talent11 = 33 # Talent 1 (usually but the talent offset can be shifted)
Talent12 = 34 # Talent 2 example: rubick, invoker, etc..
Talent21 = 35 # Talent 3
Talent22 = 36 # Talent 4 98 heroes follow the pattern above
Talent31 = 37 # Talent 5
Talent32 = 38 # Talent 6
Talent41 = 39 # Talent 7
Talent42 = 40 # Talent 8
assert len(AbilitySlot) == 41, '41 abilities'
# When looking at Action you might think that dota is not that complex
# nevertheless you need to take into account that when calling UseAbility
# you have to choose among ~1000 unique abilities (120 heroes * 4 + 155 items)
# the abilities are context depend each heroes can have
# ~4 ability + tp ability
# ~6 Items + neutral item
#
# NB: To take outpost, you can attack them using AttackUnit action
[docs]class Action(IntEnum):
"""List all the actions available to a hero"""
Stop = 0
MoveToLocation = 1 # ( vLocation )
MoveDirectly = 2 # ( vLocation )
MoveToUnit = 3 # ( hUnit )
AttackUnit = 4 # ( hUnit, bOnce = True )
AttackMove = 5 # ( vLocation )
UseAbility = 6 # ( hAbility )
UseAbilityOnEntity = 7 # ( hAbility, hTarget )
UseAbilityOnLocation = 8 # ( hAbility, vLocation )
UseAbilityOnTree = 9 # ( hAbility, iTree )
PickUpRune = 10 # ( nRune )
PickUpItem = 11 # ( hItem )
DropItem = 12 # ( hItem, vLocation )
PurchaseItem = 13 # ( sItemName )
SellItem = 14 # ( hItem )
DisassembleItem = 15 # ( hItem )
SetItemCombineLock = 16 # ( hItem, bLocked )
# The index order does not matter
# bots automatically transfer items from stash to inventory when possible
SwapItems = 17 # ( index1, index2 )
Buyback = 18 # ()
Glyph = 19 # ()
LevelAbility = 20 # ( sAbilityName )
# Courier Action bundled to the hero
CourierBurst = 21
# hidden ability; cannot use
# CourierEnemySecret = 23
CourierReturn = 22
CourierSecret = 23
CourierTakeStash = 24
CourierTransfer = 25
# Tensor cores work better with a multiple of 8
# This gives us room to grow
NotUsed1 = 26
NotUsed2 = 27
NotUsed3 = 28
NotUsed4 = 29
NotUsed5 = 30
NotUsed6 = 31
# The action exist but it is not necessary
# Courier # ( hCourier, nAction )
# UseShrine # ( hShrine )
# MovePath # ( tWaypoints )
assert len(Action) == 32, '32 actions'
[docs]class DraftAction(IntEnum):
EnableDraft = 0
SelectHero = 1
BanHero = 2
Lane = 3
# Argument index
[docs]class ActionArgument(IntEnum):
action = 0
vLoc = 1
hUnit = 2 # this should be handle
nSlot = 3 # Slot (item or ability)
iTree = 4 # This is problematic we have 2000+ trees
nRune = 5 # This could be bundled as an enum like inventory slots
sItem = 6 # Needed to buy item
ix2 = 7
ARG = ActionArgument
# 2":{"0":11,"2":355}
# boilerplate to help humans send bot like action to lua
# this only to debug & allows human to control the lua bots from python
[docs]class PlayerAction:
"""Player action builder"""
def __init__(self, act: dict):
self.act = act
[docs] def MoveToLocation(self, vLocation: Tuple[float, float]):
self.act[ARG.action] = Action.MoveToLocation
self.act[ARG.vLoc] = vLocation
[docs] def MoveDirectly(self, vLocation: Tuple[float, float]):
self.act[ARG.action] = Action.MoveDirectly
self.act[ARG.vLoc] = vLocation
[docs] def MoveToUnit(self, hUnit: int):
self.act[ARG.action] = Action.MoveToUnit
self.act[ARG.hUnit] = hUnit
[docs] def AttackUnit(self, hUnit: int):
self.act[ARG.action] = Action.AttackUnit
self.act[ARG.hUnit] = hUnit
[docs] def AttackMove(self, vLocation: Tuple[float, float]):
self.act[ARG.action] = Action.AttackMove
self.act[ARG.vLoc] = vLocation
[docs] def UseAbility(self, hAbility: int):
self.act[ARG.action] = Action.UseAbility
self.act[ARG.nSlot] = hAbility
[docs] def UseAbilityOnEntity(self, hAbility: int, hTarget: int):
self.act[ARG.action] = Action.UseAbilityOnEntity
self.act[ARG.nSlot] = hAbility
self.act[ARG.hUnit] = hTarget
[docs] def UseAbilityOnLocation(self, hAbility: int, vLoc: Tuple[float, float]):
self.act[ARG.action] = Action.UseAbilityOnLocation
self.act[ARG.nSlot] = hAbility
self.act[ARG.vLoc] = vLoc
[docs] def UseAbilityOnTree(self, hAbility: int, iTree: int):
self.act[ARG.action] = Action.UseAbilityOnTree
self.act[ARG.nSlot] = hAbility
self.act[ARG.iTree] = iTree
[docs] def PickUpRune(self, nRune: int):
self.act[ARG.action] = Action.PickUpRune
self.act[ARG.nRune] = nRune
[docs] def PickUpItem(self, hItem: int):
self.act[ARG.action] = Action.PickUpItem
self.act[ARG.hUnit] = hItem
[docs] def DropItem(self, hItem: int, vLocation: Tuple[float, float]):
self.act[ARG.action] = Action.DropItem
self.act[ARG.vLoc] = vLocation
self.act[ARG.nSlot] = hItem
[docs] def PurchaseItem(self, sItemName: str):
self.act[ARG.action] = Action.PurchaseItem
self.act[ARG.sItem] = sItemName
[docs] def SellItem(self, hItem: int):
self.act[ARG.action] = Action.SellItem
self.act[ARG.nSlot] = hItem
[docs] def DisassembleItem(self, hItem: int):
self.act[ARG.action] = Action.DisassembleItem
self.act[ARG.nSlot] = hItem
[docs] def SetItemCombineLock(self, hItem):
self.act[ARG.action] = Action.SetItemCombineLock
self.act[ARG.nSlot] = hItem
[docs] def SwapItems(self, nslot: int, index2: int):
# The index order does not matter
self.act[ARG.action] = Action.SwapItems
self.act[ARG.nSlot] = nslot
self.act[ARG.ix2] = index2
[docs] def Buyback(self):
self.act[ARG.action] = Action.Buyback
[docs] def Stop(self):
self.act[ARG.action] = Action.Stop
[docs] def Glyph(self):
self.act[ARG.action] = Action.Glyph
[docs] def LevelAbility(self, nSlot: int):
self.act[ARG.action] = Action.LevelAbility
self.act[ARG.nSlot] = nSlot
[docs] def CourierBurst(self):
self.act[ARG.action] = Action.CourierBurst
# Ability is hidden
# def CourierEnemySecret(self):
# self.act[ARG.action] = Action.CourierEnemySecret
[docs] def CourierReturn(self):
self.act[ARG.action] = Action.CourierReturn
[docs] def CourierSecret(self):
self.act[ARG.action] = Action.CourierSecret
[docs] def CourierTakeStash(self):
self.act[ARG.action] = Action.CourierTakeStash
[docs] def CourierTransfer(self):
self.act[ARG.action] = Action.CourierTransfer
[docs]class DraftBuilder:
"""Drafting action builder"""
def __init__(self, fac: dict):
self.fac = fac
[docs] def select(self, hero: int, lane: int):
"""Select a hero and assign it to a particular lane"""
self.fac[DraftAction.SelectHero] = hero
self.fac[DraftAction.Lane] = lane
[docs] def ban(self, hero: int):
"""Ban does not work for bots, which makes sense in the case of players wanting to practice annoying hero"""
self.fac[DraftAction.BanHero] = hero
[docs]class IPCMessageBuilder:
"""Helper to help users build action using code. Mainly used for testing purposes"""
def __init__(self, game=None):
self.message = new_ipc_message()
self.game = game
[docs] def player(self, idx: int) -> PlayerAction:
"""Prepare to send an action to a given hero"""
faction = TEAM_RADIANT
if idx > 4:
faction = TEAM_DIRE
return PlayerAction(self.message[faction][idx])
[docs] def hero_selection(self, faction: int) -> DraftBuilder:
"""Prepare to draft a hero for a given faction"""
self.message[faction]['HS'] = {
DraftAction.EnableDraft: 1,
DraftAction.SelectHero: None,
DraftAction.BanHero: None,
DraftAction.Lane: None
}
return DraftBuilder(self.message[faction]['HS'])
[docs] def build(self) -> dict:
"""Returns the resulting action message"""
return self.message
[docs] def send(self):
"""If game was set, send the message to the game"""
if self.game is not None:
return self.game.send_message(self.build())
[docs]def player_space():
"""Returns the full action space of a Dota2 bot
Examples
--------
>>> s = player_space()
>>> s.seed(0)
>>> for k, v in s.sample().items():
... print(k, v)
ActionArgument.action 16
ActionArgument.vLoc [-0.8912799 0.9307819]
ActionArgument.nSlot 14
ActionArgument.sItem 112
ActionArgument.ix2 16
"""
from gym import spaces
import numpy as np
import luafun.game.constants as const
action = spaces.Discrete(len(Action))
vloc = spaces.Box(low=-1.0, high=1.0, shape=(2,), dtype=np.float32)
abilities = spaces.Discrete(len(AbilitySlot))
# Tree ID
# tree = spaces.Discrete(const.TREE_COUNT)
# runes = spaces.Discrete(len(const.RuneSlot))
# We set the max number of unit on the map to 256
# the ids are remapped to actual handle id
# hUnit = spaces.Discrete(256)
items = spaces.Discrete(const.ITEM_COUNT)
ix2 = spaces.Discrete(len(const.ItemSlot))
return spaces.Dict({
ARG.action: action,
ARG.vLoc: vloc,
# ARG.hUnit: hUnit,
ARG.nSlot: abilities,
# ARG.iTree: tree,
# ARG.nRune: runes,
ARG.sItem: items,
ARG.ix2: ix2
})
[docs]def team_space(s: int):
"""Returns the full action space of a Dota2 bot team
Examples
--------
>>> s = team_space(0)
>>> s.seed(0)
>>> for k, v in s.sample().items():
... print(k, v)
0 OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])
1 OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])
2 OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])
3 OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])
4 OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])
HS OrderedDict([('ban', 112), ('lane', 0), ('select', 112)])
"""
from gym import spaces
import luafun.game.constants as const
return spaces.Dict({
f'{s + 0}': player_space(),
f'{s + 1}': player_space(),
f'{s + 2}': player_space(),
f'{s + 3}': player_space(),
f'{s + 4}': player_space(),
# Hero Selection
'HS': spaces.Dict({
'select': spaces.Discrete(const.HERO_COUNT),
'ban': spaces.Discrete(const.HERO_COUNT),
'lane': spaces.Discrete(len(const.Lanes))
})
})
[docs]def action_space():
"""Returns the full action space of a Dota2 bots for all teams
Examples
--------
>>> s = action_space()
>>> s.seed(0)
>>> for k, v in s.sample().items():
... print(k, v)
uid 0
2 {0: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 1: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 2: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 3: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 4: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])}
3 {5: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 6: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 7: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 8: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 9: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799, 0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])}
"""
from gym import spaces
full_space = spaces.Dict({
TEAM_RADIANT: team_space(0),
TEAM_DIRE: team_space(5),
})
def fix_sampled_actions(act):
return {
'uid': 0,
TEAM_RADIANT: {
0: act[TEAM_RADIANT]['0'],
1: act[TEAM_RADIANT]['1'],
2: act[TEAM_RADIANT]['2'],
3: act[TEAM_RADIANT]['3'],
4: act[TEAM_RADIANT]['4'],
},
TEAM_DIRE: {
5: act[TEAM_DIRE]['5'],
6: act[TEAM_DIRE]['6'],
7: act[TEAM_DIRE]['7'],
8: act[TEAM_DIRE]['8'],
9: act[TEAM_DIRE]['9'],
}
}
class _SpaceWrap:
def __init__(self, space):
self.space = space
def seed(self, seed):
self.space.seed(seed)
def sample(self):
return fix_sampled_actions(self.space.sample())
return _SpaceWrap(full_space)