Source code for luafun.reward

from collections import defaultdict

from luafun.game.dota2.state_types import Player, Unit
from luafun.game.ipc_send import TEAM_DIRE, TEAM_RADIANT

Solo = 0
Team = 1

#     Name                 , Reward    , Type
values = [
    ('Win'                ,    5      , Team),
    ('HeroDeath'          ,   -1      , Solo),
    ('CourierDeath'       ,   -2      , Solo),
    ('XPGained'           ,    0.002  , Solo),
    ('GoldGained'         ,    0.006  , Solo),
    ('GoldSpent'          ,    0.0006 , Solo),
    ('HealthChanged'      ,    2      , Solo), # %of health = ( x + 1 - (1 - x) ^ 4) / 2
    ('ManaChanged'        ,    0.75   , Solo),
    ('KilledHero'         ,   -0.6    , Solo),
    ('LastHit'            ,   -0.16   , Solo), # Reduce the reward since we already got a bit chunk from exp & gold
    ('Deny'               ,    0.15   , Solo),
    ('GainedAegis'        ,    5      , Team),
    ('AncientHPChange'    ,    5      , Team),
    ('MegasUnlocked'      ,    4      , Team),
    ('T1Tower'            ,    2.25   , Team),
    ('T2Tower'            ,    3      , Team),
    ('T3Tower'            ,    4.5    , Team),
    ('T4Tower'            ,    2.25   , Team), # 2/3 = building health + 1/3 on destroy
    ('Outpost'            ,    2.25   , Team),
    ('Barracks'           ,    6      , Team), # 2/3 = building health + 1/3 on destroy
    ('LaneAssign'         ,   -0.15   , Solo),
]


[docs]class RewardConst: Win = 5 HeroDeath = -1 CourierDeath = -2 XPGained = 0.002 GoldGained = 0.006 GoldSpent = 0.0006 HealthChanged = 2 ManaChanged = 0.75 KilledHero = -0.6 LastHit = -0.16 Deny = 0.15 GainedAegis = 5 AncientHPChange = 5 MegasUnlocked = 4 T1Tower = 2.25 T2Tower = 3 T3Tower = 4.5 T4Tower = 2.25 Outpost = 2.25 Barracks = 6 LaneAssign = -0.15
# Implicitly here with gold & exp gain # HeroAssist # See https://dota2.fandom.com/wiki/Experience TotalExp = 0 ExpNeeded = 1 ExperienceTable = { 1: [ 0, 230], 2: [ 230, 370], 3: [ 600, 480], 4: [ 1080, 580], 5: [ 1660, 600], 6: [ 2260, 720], 7: [ 2980, 750], 8: [ 3730, 890], 9: [ 4620, 930], 10: [ 5550, 970], 11: [ 6520, 101], 12: [ 7530, 105], 13: [ 8580, 122], 14: [ 9805, 125], 15: [11055, 127], 16: [12330, 130], 17: [13630, 132], 18: [14955, 150], 19: [16455, 159], 20: [18045, 160], 21: [19645, 185], 22: [21495, 210], 23: [23595, 235], 24: [25945, 260], 25: [28545, 350], 26: [32045, 450], 27: [36545, 550], 28: [42045, 650], 29: [48545, 750], 30: [56045, 0], }
[docs]class Reward: """Base reward function, takes a state and return its resulting reward level""" def __init__(self): self.rewards = defaultdict(float) self.courier_state = [ 1 for _ in range(0, 10) ] self.courier_death_tracker = [ 0 for _ in range(0, 10) ]
[docs] def player_message(self, pmsg: Player, umsg: Unit, courier: Unit): """Computes the 'solo' reward""" pid = pmsg['player_id'] kills = pmsg.get('kills', 0) deaths = pmsg.get('deaths', 0) current_gold = umsg.get('reliable_gold', 0) + umsg.get('unreliable_gold', 0) spent_gold = umsg.get('net_worth', 0) - current_gold xp_left = umsg.get('xp_needed_to_level', 0) level = umsg['level'] needed = ExperienceTable[level][ExpNeeded] xp_gained = ExperienceTable[level][TotalExp] + (needed - xp_left) # == health_max = umsg['health_max'] health = umsg['health'] health_pct = health / health_max health_reward = (health_pct + 1 - (1 - health_pct) ** 4) / 2 # ==== mana_max = umsg['mana_max'] mana = umsg['mana'] mana_pct = mana / mana_max # === if courier is not None: courier_alive = courier['is_alive'] if self.courier_state[pid] != courier_alive: if not courier_alive: self.courier_death_tracker[pid] += 1 self.courier_state[pid] = courier_alive # === reward = ( RewardConst.KilledHero * kills + RewardConst.HeroDeath * deaths + RewardConst.Deny * umsg.get('denies', 0) + RewardConst.LastHit * umsg.get('last_hits', 0) + RewardConst.GoldSpent * spent_gold + RewardConst.GoldGained * current_gold + RewardConst.XPGained * xp_gained + RewardConst.ManaChanged * mana_pct + RewardConst.HealthChanged * health_reward + RewardConst.CourierDeath * self.courier_death_tracker[pid] + # FIXME: This requires us to define the area of the lanes # few rectangles RewardConst.LaneAssign * 0 ) self.rewards[pid] = reward
[docs] def building_messages(self, umsg: Unit): reward = 0 # == # Destroyed enemy buildings increase our rewards team_id = umsg['team_id'] if team_id == TEAM_RADIANT: team_id = TEAM_DIRE else: team_id = TEAM_RADIANT # == def building_reward(value, unit): return value * (0.66 * (1 - unit['health'] / unit['health_max']) + 0.34 * (1 - unit['is_alive'])) if '_tower1_' in umsg['name']: reward += building_reward(RewardConst.T1Tower, umsg) elif '_tower2_' in umsg['name']: reward += building_reward(RewardConst.T2Tower, umsg) elif '_tower3_' in umsg['name']: reward += building_reward(RewardConst.T3Tower, umsg) elif '_tower4_' in umsg['name']: reward += building_reward(RewardConst.T4Tower, umsg) elif '_rax_' in umsg['name']: reward += building_reward(RewardConst.Barracks, umsg) self.rewards[200 + team_id] += 1 - int(umsg['is_alive']) elif '_fort' in umsg['name']: reward += RewardConst.AncientHPChange * (1 - umsg['health'] / umsg['health_max']) reward += RewardConst.Win * (1 - umsg['is_alive']) elif '_OutpostName' in umsg['name']: # Taking the outpost increase our rewards reward += RewardConst.Outpost team_id = umsg['team_id'] # reward = ( # RewardConst.GainedAegis * 0 + # ) self.rewards[100 + team_id] += reward
[docs] def clear(self): self.rewards[100 + TEAM_RADIANT] = 0 self.rewards[100 + TEAM_DIRE] = 0 self.rewards[200 + TEAM_RADIANT] = 0 self.rewards[200 + TEAM_DIRE] = 0
[docs] def partial_radiant_reward(self) -> float: reward = self.rewards[100 + TEAM_RADIANT] + (self.rewards[200 + TEAM_RADIANT] // 6) * RewardConst.MegasUnlocked for i in [0, 1, 2, 3, 4]: reward += self.rewards[i] return reward
[docs] def partial_dire_reward(self) -> float: # OpenAI probably use a different reward for each bots and tweak the solo/team # proportion using the `team_spirit` params # I do not like that this is a cooperation game and support can have a positive impact # on the NW of another hero on the team and the reward should reflect that # because of the change the reward value should probably be tweaked # the solo reward should also diminish through time reward = self.rewards[100 + TEAM_DIRE] + (self.rewards[200 + TEAM_RADIANT] // 6) * RewardConst.MegasUnlocked for i in [5, 6, 7, 8, 9]: reward += self.rewards[i] return reward
[docs] def dire_reward(self) -> float: r = self.partial_dire_reward() - self.partial_radiant_reward() return r
[docs] def radiant_reward(self) -> float: r = self.partial_radiant_reward() - self.partial_dire_reward() return r