from collections import defaultdict
from luafun.game.dota2.state_types import Player, Unit
from luafun.game.ipc_send import TEAM_DIRE, TEAM_RADIANT
Solo = 0
Team = 1
# Name , Reward , Type
values = [
('Win' , 5 , Team),
('HeroDeath' , -1 , Solo),
('CourierDeath' , -2 , Solo),
('XPGained' , 0.002 , Solo),
('GoldGained' , 0.006 , Solo),
('GoldSpent' , 0.0006 , Solo),
('HealthChanged' , 2 , Solo), # %of health = ( x + 1 - (1 - x) ^ 4) / 2
('ManaChanged' , 0.75 , Solo),
('KilledHero' , -0.6 , Solo),
('LastHit' , -0.16 , Solo), # Reduce the reward since we already got a bit chunk from exp & gold
('Deny' , 0.15 , Solo),
('GainedAegis' , 5 , Team),
('AncientHPChange' , 5 , Team),
('MegasUnlocked' , 4 , Team),
('T1Tower' , 2.25 , Team),
('T2Tower' , 3 , Team),
('T3Tower' , 4.5 , Team),
('T4Tower' , 2.25 , Team), # 2/3 = building health + 1/3 on destroy
('Outpost' , 2.25 , Team),
('Barracks' , 6 , Team), # 2/3 = building health + 1/3 on destroy
('LaneAssign' , -0.15 , Solo),
]
[docs]class RewardConst:
Win = 5
HeroDeath = -1
CourierDeath = -2
XPGained = 0.002
GoldGained = 0.006
GoldSpent = 0.0006
HealthChanged = 2
ManaChanged = 0.75
KilledHero = -0.6
LastHit = -0.16
Deny = 0.15
GainedAegis = 5
AncientHPChange = 5
MegasUnlocked = 4
T1Tower = 2.25
T2Tower = 3
T3Tower = 4.5
T4Tower = 2.25
Outpost = 2.25
Barracks = 6
LaneAssign = -0.15
# Implicitly here with gold & exp gain
# HeroAssist
# See https://dota2.fandom.com/wiki/Experience
TotalExp = 0
ExpNeeded = 1
ExperienceTable = {
1: [ 0, 230],
2: [ 230, 370],
3: [ 600, 480],
4: [ 1080, 580],
5: [ 1660, 600],
6: [ 2260, 720],
7: [ 2980, 750],
8: [ 3730, 890],
9: [ 4620, 930],
10: [ 5550, 970],
11: [ 6520, 101],
12: [ 7530, 105],
13: [ 8580, 122],
14: [ 9805, 125],
15: [11055, 127],
16: [12330, 130],
17: [13630, 132],
18: [14955, 150],
19: [16455, 159],
20: [18045, 160],
21: [19645, 185],
22: [21495, 210],
23: [23595, 235],
24: [25945, 260],
25: [28545, 350],
26: [32045, 450],
27: [36545, 550],
28: [42045, 650],
29: [48545, 750],
30: [56045, 0],
}
[docs]class Reward:
"""Base reward function, takes a state and return its resulting reward level"""
def __init__(self):
self.rewards = defaultdict(float)
self.courier_state = [
1 for _ in range(0, 10)
]
self.courier_death_tracker = [
0 for _ in range(0, 10)
]
[docs] def player_message(self, pmsg: Player, umsg: Unit, courier: Unit):
"""Computes the 'solo' reward"""
pid = pmsg['player_id']
kills = pmsg.get('kills', 0)
deaths = pmsg.get('deaths', 0)
current_gold = umsg.get('reliable_gold', 0) + umsg.get('unreliable_gold', 0)
spent_gold = umsg.get('net_worth', 0) - current_gold
xp_left = umsg.get('xp_needed_to_level', 0)
level = umsg['level']
needed = ExperienceTable[level][ExpNeeded]
xp_gained = ExperienceTable[level][TotalExp] + (needed - xp_left)
# ==
health_max = umsg['health_max']
health = umsg['health']
health_pct = health / health_max
health_reward = (health_pct + 1 - (1 - health_pct) ** 4) / 2
# ====
mana_max = umsg['mana_max']
mana = umsg['mana']
mana_pct = mana / mana_max
# ===
if courier is not None:
courier_alive = courier['is_alive']
if self.courier_state[pid] != courier_alive:
if not courier_alive:
self.courier_death_tracker[pid] += 1
self.courier_state[pid] = courier_alive
# ===
reward = (
RewardConst.KilledHero * kills +
RewardConst.HeroDeath * deaths +
RewardConst.Deny * umsg.get('denies', 0) +
RewardConst.LastHit * umsg.get('last_hits', 0) +
RewardConst.GoldSpent * spent_gold +
RewardConst.GoldGained * current_gold +
RewardConst.XPGained * xp_gained +
RewardConst.ManaChanged * mana_pct +
RewardConst.HealthChanged * health_reward +
RewardConst.CourierDeath * self.courier_death_tracker[pid] +
# FIXME: This requires us to define the area of the lanes
# few rectangles
RewardConst.LaneAssign * 0
)
self.rewards[pid] = reward
[docs] def building_messages(self, umsg: Unit):
reward = 0
# ==
# Destroyed enemy buildings increase our rewards
team_id = umsg['team_id']
if team_id == TEAM_RADIANT:
team_id = TEAM_DIRE
else:
team_id = TEAM_RADIANT
# ==
def building_reward(value, unit):
return value * (0.66 * (1 - unit['health'] / unit['health_max']) + 0.34 * (1 - unit['is_alive']))
if '_tower1_' in umsg['name']:
reward += building_reward(RewardConst.T1Tower, umsg)
elif '_tower2_' in umsg['name']:
reward += building_reward(RewardConst.T2Tower, umsg)
elif '_tower3_' in umsg['name']:
reward += building_reward(RewardConst.T3Tower, umsg)
elif '_tower4_' in umsg['name']:
reward += building_reward(RewardConst.T4Tower, umsg)
elif '_rax_' in umsg['name']:
reward += building_reward(RewardConst.Barracks, umsg)
self.rewards[200 + team_id] += 1 - int(umsg['is_alive'])
elif '_fort' in umsg['name']:
reward += RewardConst.AncientHPChange * (1 - umsg['health'] / umsg['health_max'])
reward += RewardConst.Win * (1 - umsg['is_alive'])
elif '_OutpostName' in umsg['name']:
# Taking the outpost increase our rewards
reward += RewardConst.Outpost
team_id = umsg['team_id']
# reward = (
# RewardConst.GainedAegis * 0 +
# )
self.rewards[100 + team_id] += reward
[docs] def clear(self):
self.rewards[100 + TEAM_RADIANT] = 0
self.rewards[100 + TEAM_DIRE] = 0
self.rewards[200 + TEAM_RADIANT] = 0
self.rewards[200 + TEAM_DIRE] = 0
[docs] def partial_radiant_reward(self) -> float:
reward = self.rewards[100 + TEAM_RADIANT] + (self.rewards[200 + TEAM_RADIANT] // 6) * RewardConst.MegasUnlocked
for i in [0, 1, 2, 3, 4]:
reward += self.rewards[i]
return reward
[docs] def partial_dire_reward(self) -> float:
# OpenAI probably use a different reward for each bots and tweak the solo/team
# proportion using the `team_spirit` params
# I do not like that this is a cooperation game and support can have a positive impact
# on the NW of another hero on the team and the reward should reflect that
# because of the change the reward value should probably be tweaked
# the solo reward should also diminish through time
reward = self.rewards[100 + TEAM_DIRE] + (self.rewards[200 + TEAM_RADIANT] // 6) * RewardConst.MegasUnlocked
for i in [5, 6, 7, 8, 9]:
reward += self.rewards[i]
return reward
[docs] def dire_reward(self) -> float:
r = self.partial_dire_reward() - self.partial_radiant_reward()
return r
[docs] def radiant_reward(self) -> float:
r = self.partial_radiant_reward() - self.partial_dire_reward()
return r