Source code for luafun.game.action

"""This module encodes the game action into a ML friendly format"""
from enum import IntEnum
from typing import Tuple
from luafun.game.ipc_send import new_ipc_message, TEAM_RADIANT, TEAM_DIRE


[docs]class AbilitySlot(IntEnum):
    """List all the abilities available to a given hero"""
    # Inventory
    Item0 = 0
    Item1 = 1
    Item2 = 2
    Item3 = 3
    Item4 = 4
    Item5 = 5
    Backpack1 = 6
    Backpack2 = 7
    Backpack3 = 8
    Stash1 = 9
    Stash2 = 10
    Stash3 = 11
    Stash4 = 12
    Stash5 = 13
    Stash6 = 14
    Item15 = 15         # TP
    Item16 = 16         # Neutral ?
    Q = 17              # Q                 | invoker_quas
    W = 18              # W                 | invoker_wex
    E = 19              # E                 | invoker_exort
    D = 20              # D generic_hidden  | invoker_empty1
    F = 21              # F generic_hidden  | invoker_empty2
    R = 22              # R                 | invoker_invoke
    Ablity6 = 23        # .                 | invoker_cold_snap
    Ablity7 = 24        # .                 | invoker_ghost_walk
    Ablity8 = 25        # .                 | invoker_tornado
    Ablity9 = 26        # .                 | invoker_emp
    Ablity10 = 27       # .                 | invoker_alacrity
    Ablity11 = 28       # .                 | invoker_chaos_meteor
    Ablity12 = 29       # .                 | invoker_sun_strike
    Ablity13 = 30       # .                 | invoker_forge_spirit
    Ablity14 = 31       # .                 | invoker_ice_wall
    Ablity15 = 32       # .                 | invoker_deafening_blast
    Talent11 = 33       # Talent 1  (usually but the talent offset can be shifted)
    Talent12 = 34       # Talent 2  example: rubick, invoker, etc..
    Talent21 = 35       # Talent 3
    Talent22 = 36       # Talent 4  98 heroes follow the pattern above
    Talent31 = 37       # Talent 5
    Talent32 = 38       # Talent 6
    Talent41 = 39       # Talent 7
    Talent42 = 40       # Talent 8


assert len(AbilitySlot) == 41, '41 abilities'


# When looking at Action you might think that dota is not that complex
# nevertheless you need to take into account that when calling UseAbility
# you have to choose among ~1000 unique abilities (120 heroes * 4 + 155 items)
# the abilities are context depend each heroes can have
#   ~4 ability + tp ability
#   ~6 Items + neutral item
#
# NB: To take outpost, you can attack them using AttackUnit action
[docs]class Action(IntEnum):
    """List all the actions available to a hero"""
    Stop                          = 0
    MoveToLocation                = 1   # ( vLocation )
    MoveDirectly                  = 2   # ( vLocation )
    MoveToUnit                    = 3   # ( hUnit )
    AttackUnit                    = 4   # ( hUnit, bOnce = True )
    AttackMove                    = 5   # ( vLocation )
    UseAbility                    = 6   # ( hAbility )
    UseAbilityOnEntity            = 7   # ( hAbility, hTarget )
    UseAbilityOnLocation          = 8   # ( hAbility, vLocation )
    UseAbilityOnTree              = 9   # ( hAbility, iTree )
    PickUpRune                    = 10  # ( nRune )
    PickUpItem                    = 11  # ( hItem )
    DropItem                      = 12  # ( hItem, vLocation )
    PurchaseItem                  = 13  # ( sItemName )
    SellItem                      = 14  # ( hItem )
    DisassembleItem               = 15  # ( hItem )
    SetItemCombineLock            = 16  # ( hItem, bLocked )
    # The index order does not matter
    # bots automatically transfer items from stash to inventory when possible
    SwapItems                     = 17  # ( index1, index2 )
    Buyback                       = 18  # ()
    Glyph                         = 19  # ()
    LevelAbility                  = 20  # ( sAbilityName )

    # Courier Action bundled to the hero
    CourierBurst                  = 21
    # hidden ability; cannot use
    # CourierEnemySecret            = 23
    CourierReturn                 = 22
    CourierSecret                 = 23
    CourierTakeStash              = 24
    CourierTransfer               = 25

    # Tensor cores work better with a multiple of 8
    # This gives us room to grow
    NotUsed1 = 26
    NotUsed2 = 27
    NotUsed3 = 28
    NotUsed4 = 29
    NotUsed5 = 30
    NotUsed6 = 31

    # The action exist but it is not necessary
    # Courier                         # ( hCourier, nAction )
    # UseShrine                       # ( hShrine )
    # MovePath                        # ( tWaypoints )


assert len(Action) == 32, '32 actions'


[docs]class DraftAction(IntEnum):
    EnableDraft = 0
    SelectHero = 1
    BanHero = 2
    Lane = 3


# Argument index
[docs]class ActionArgument(IntEnum):
    action   = 0
    vLoc     = 1
    hUnit    = 2    # this should be handle
    nSlot    = 3    # Slot (item or ability)
    iTree    = 4    # This is problematic we have 2000+ trees
    nRune    = 5    # This could be bundled as an enum like inventory slots
    sItem    = 6    # Needed to buy item
    ix2      = 7


ARG = ActionArgument
# 2":{"0":11,"2":355}


# boilerplate to help humans send bot like action to lua
# this only to debug & allows human to control the lua bots from python
[docs]class PlayerAction:
    """Player action builder"""
    def __init__(self, act: dict):
        self.act = act

[docs]    def MoveToLocation(self, vLocation: Tuple[float, float]):
        self.act[ARG.action] = Action.MoveToLocation
        self.act[ARG.vLoc] = vLocation

[docs]    def MoveDirectly(self, vLocation: Tuple[float, float]):
        self.act[ARG.action] = Action.MoveDirectly
        self.act[ARG.vLoc] = vLocation

[docs]    def MoveToUnit(self, hUnit: int):
        self.act[ARG.action] = Action.MoveToUnit
        self.act[ARG.hUnit] = hUnit

[docs]    def AttackUnit(self, hUnit: int):
        self.act[ARG.action] = Action.AttackUnit
        self.act[ARG.hUnit] = hUnit

[docs]    def AttackMove(self, vLocation: Tuple[float, float]):
        self.act[ARG.action] = Action.AttackMove
        self.act[ARG.vLoc] = vLocation

[docs]    def UseAbility(self, hAbility: int):
        self.act[ARG.action] = Action.UseAbility
        self.act[ARG.nSlot] = hAbility

[docs]    def UseAbilityOnEntity(self, hAbility: int, hTarget: int):
        self.act[ARG.action] = Action.UseAbilityOnEntity
        self.act[ARG.nSlot] = hAbility
        self.act[ARG.hUnit] = hTarget

[docs]    def UseAbilityOnLocation(self, hAbility: int, vLoc: Tuple[float, float]):
        self.act[ARG.action] = Action.UseAbilityOnLocation
        self.act[ARG.nSlot] = hAbility
        self.act[ARG.vLoc] = vLoc

[docs]    def UseAbilityOnTree(self, hAbility: int, iTree: int):
        self.act[ARG.action] = Action.UseAbilityOnTree
        self.act[ARG.nSlot] = hAbility
        self.act[ARG.iTree] = iTree

[docs]    def PickUpRune(self, nRune: int):
        self.act[ARG.action] = Action.PickUpRune
        self.act[ARG.nRune] = nRune

[docs]    def PickUpItem(self, hItem: int):
        self.act[ARG.action] = Action.PickUpItem
        self.act[ARG.hUnit] = hItem

[docs]    def DropItem(self, hItem: int, vLocation: Tuple[float, float]):
        self.act[ARG.action] = Action.DropItem
        self.act[ARG.vLoc] = vLocation
        self.act[ARG.nSlot] = hItem

[docs]    def PurchaseItem(self, sItemName: str):
        self.act[ARG.action] = Action.PurchaseItem
        self.act[ARG.sItem] = sItemName

[docs]    def SellItem(self, hItem: int):
        self.act[ARG.action] = Action.SellItem
        self.act[ARG.nSlot] = hItem

[docs]    def DisassembleItem(self, hItem: int):
        self.act[ARG.action] = Action.DisassembleItem
        self.act[ARG.nSlot] = hItem

[docs]    def SetItemCombineLock(self, hItem):
        self.act[ARG.action] = Action.SetItemCombineLock
        self.act[ARG.nSlot] = hItem

[docs]    def SwapItems(self, nslot: int, index2: int):
        # The index order does not matter
        self.act[ARG.action] = Action.SwapItems
        self.act[ARG.nSlot] = nslot
        self.act[ARG.ix2] = index2

[docs]    def Buyback(self):
        self.act[ARG.action] = Action.Buyback

[docs]    def Stop(self):
        self.act[ARG.action] = Action.Stop

[docs]    def Glyph(self):
        self.act[ARG.action] = Action.Glyph

[docs]    def LevelAbility(self, nSlot: int):
        self.act[ARG.action] = Action.LevelAbility
        self.act[ARG.nSlot] = nSlot

[docs]    def CourierBurst(self):
        self.act[ARG.action] = Action.CourierBurst

    # Ability is hidden
    # def CourierEnemySecret(self):
    #    self.act[ARG.action] = Action.CourierEnemySecret

[docs]    def CourierReturn(self):
        self.act[ARG.action] = Action.CourierReturn

[docs]    def CourierSecret(self):
        self.act[ARG.action] = Action.CourierSecret

[docs]    def CourierTakeStash(self):
        self.act[ARG.action] = Action.CourierTakeStash

[docs]    def CourierTransfer(self):
        self.act[ARG.action] = Action.CourierTransfer


[docs]class DraftBuilder:
    """Drafting action builder"""
    def __init__(self, fac: dict):
        self.fac = fac

[docs]    def select(self, hero: int, lane: int):
        """Select a hero and assign it to a particular lane"""
        self.fac[DraftAction.SelectHero] = hero
        self.fac[DraftAction.Lane] = lane

[docs]    def ban(self, hero: int):
        """Ban does not work for bots, which makes sense in the case of players wanting to practice annoying hero"""
        self.fac[DraftAction.BanHero] = hero


[docs]class IPCMessageBuilder:
    """Helper to help users build action using code. Mainly used for testing purposes"""
    def __init__(self, game=None):
        self.message = new_ipc_message()
        self.game = game

[docs]    def player(self, idx: int) -> PlayerAction:
        """Prepare to send an action to a given hero"""
        faction = TEAM_RADIANT

        if idx > 4:
            faction = TEAM_DIRE

        return PlayerAction(self.message[faction][idx])

[docs]    def hero_selection(self, faction: int) -> DraftBuilder:
        """Prepare to draft a hero for a given faction"""
        self.message[faction]['HS'] = {
            DraftAction.EnableDraft: 1,
            DraftAction.SelectHero: None,
            DraftAction.BanHero: None,
            DraftAction.Lane: None
        }
        return DraftBuilder(self.message[faction]['HS'])

[docs]    def build(self) -> dict:
        """Returns the resulting action message"""
        return self.message

[docs]    def send(self):
        """If game was set, send the message to the game"""
        if self.game is not None:
            return self.game.send_message(self.build())


[docs]def player_space():
    """Returns the full action space of a Dota2 bot

    Examples
    --------
    >>> s = player_space()
    >>> s.seed(0)
    >>> for k, v in s.sample().items():
    ...     print(k, v)
    ActionArgument.action 16
    ActionArgument.vLoc [-0.8912799  0.9307819]
    ActionArgument.nSlot 14
    ActionArgument.sItem 112
    ActionArgument.ix2 16

    """
    from gym import spaces
    import numpy as np
    import luafun.game.constants as const

    action = spaces.Discrete(len(Action))
    vloc = spaces.Box(low=-1.0, high=1.0, shape=(2,), dtype=np.float32)

    abilities = spaces.Discrete(len(AbilitySlot))
    # Tree ID
    # tree = spaces.Discrete(const.TREE_COUNT)
    # runes = spaces.Discrete(len(const.RuneSlot))
    # We set the max number of unit on the map to 256
    # the ids are remapped to actual handle id
    # hUnit = spaces.Discrete(256)

    items = spaces.Discrete(const.ITEM_COUNT)
    ix2 = spaces.Discrete(len(const.ItemSlot))

    return spaces.Dict({
        ARG.action: action,
        ARG.vLoc: vloc,
        # ARG.hUnit: hUnit,
        ARG.nSlot: abilities,
        # ARG.iTree: tree,
        # ARG.nRune: runes,
        ARG.sItem: items,
        ARG.ix2: ix2
    })


[docs]def team_space(s: int):
    """Returns the full action space of a Dota2 bot team

    Examples
    --------
    >>> s = team_space(0)
     >>> s.seed(0)
    >>> for k, v in s.sample().items():
    ...     print(k, v)
    0 OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])
    1 OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])
    2 OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])
    3 OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])
    4 OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])
    HS OrderedDict([('ban', 112), ('lane', 0), ('select', 112)])
    """
    from gym import spaces
    import luafun.game.constants as const

    return spaces.Dict({
        f'{s + 0}': player_space(),
        f'{s + 1}': player_space(),
        f'{s + 2}': player_space(),
        f'{s + 3}': player_space(),
        f'{s + 4}': player_space(),

        # Hero Selection
        'HS': spaces.Dict({
            'select': spaces.Discrete(const.HERO_COUNT),
            'ban': spaces.Discrete(const.HERO_COUNT),
            'lane': spaces.Discrete(len(const.Lanes))
        })
    })


[docs]def action_space():
    """Returns the full action space of a Dota2 bots for all teams

    Examples
    --------
    >>> s = action_space()
    >>> s.seed(0)
    >>> for k, v in s.sample().items():
    ...     print(k, v)
    uid 0
    2 {0: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 1: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 2: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 3: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 4: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])}
    3 {5: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 6: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 7: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 8: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)]), 9: OrderedDict([(<ActionArgument.action: 0>, 16), (<ActionArgument.vLoc: 1>, array([-0.8912799,  0.9307819], dtype=float32)), (<ActionArgument.nSlot: 3>, 14), (<ActionArgument.sItem: 6>, 112), (<ActionArgument.ix2: 7>, 16)])}
    """
    from gym import spaces

    full_space = spaces.Dict({
        TEAM_RADIANT: team_space(0),
        TEAM_DIRE: team_space(5),
    })

    def fix_sampled_actions(act):
        return {
            'uid': 0,
            TEAM_RADIANT: {
                0: act[TEAM_RADIANT]['0'],
                1: act[TEAM_RADIANT]['1'],
                2: act[TEAM_RADIANT]['2'],
                3: act[TEAM_RADIANT]['3'],
                4: act[TEAM_RADIANT]['4'],
            },
            TEAM_DIRE: {
                5: act[TEAM_DIRE]['5'],
                6: act[TEAM_DIRE]['6'],
                7: act[TEAM_DIRE]['7'],
                8: act[TEAM_DIRE]['8'],
                9: act[TEAM_DIRE]['9'],
            }
        }

    class _SpaceWrap:
        def __init__(self, space):
            self.space = space

        def seed(self, seed):
            self.space.seed(seed)

        def sample(self):
            return fix_sampled_actions(self.space.sample())

    return _SpaceWrap(full_space)