蒙特卡洛树搜索(MCTS)实现简易五⼦棋AI
蒙特卡洛树搜索算法可以通过⾃我对弈模拟得到不同状态分⽀中获胜的概率,从⽽获得最优的策略。代码部分可以分为Node类和State类。Node类通过关联⽗节点和⼦节点实现树结构,同时保存每个节点的属性;State类描述游戏的规则并判断胜负。
先导⼊所需要的模块
import numpy as np
import random as rd
import copy
from collections import defaultdict
VECTOR = 7 #棋盘的维度,五⼦棋⼀般是15*15,为了减少运算先使⽤7*7
SIMU_EPSDS = 5000 #⾃我对弈模拟的次数,越⼤代表⾃我对弈的次数越多,速度越慢
创建节点类
class Node():
def __init__(lf, state, parent=None, parent_action=None):
lf.state = state
lf.parent = parent
lf.parent_action = parent_action
lf.children = []
lf._untried_actions = None
lf._untried_actions = lf.untried_actions()韩国R级电影下载
lf._number_of_visits = 0
lf._results = defaultdict(int)
lf._results[1] = 0
lf._results[-1] = 0
#q值代表当前节点胜利次数减去失败次数的差值
def q(lf):
wins = lf._results[1]
los = lf._results[-1]
return wins - los
#n值代表当前节点的访问次数
def n(lf):
return lf._number_of_visits
#扩展⼦节点,即访问该节点的每个⼦节点
def expand(lf):
action = lf._untried_actions.pop()
next_state = lf.state.play(action,0)
child_node = Node(next_state, parent=lf, parent_action=action)
lf.children.append(child_node)
return child_node
def untried_actions(lf):
untried_actions = copy.deepcopy(_legal_actions())变色龙作文
return untried_actions
#⽣成树的逻辑是先判断该节点是否是最终状态,如果不是再判断是否完全扩展,如果不是则继续扩展该节点的⼦节点,否则,
#从⼦结点中随机选择⼀个作为下⼀个要扩展节点
天蝎男和金牛女def _tree_policy(lf):
current_node=lf
while not current_node.is_terminal_node():
一元二次方程求根公式
if not current_node.is_fully_expanded():
return pand()
el:
current_node=current_node.rd_child()
return current_node
return current_node
def is_terminal_node(lf):
return lf.state.is_game_over()
def is_fully_expanded(lf):
return len(lf._untried_actions) == 0
#从所有的⼦节点(下⼀状态)中选择⼀个最优节点
def best_child(lf, c_param=0.1):
current_state = lf.state
#UCT算法
choices_weights = [(c.q() / c.n()) + c_param * np.sqrt((2*np.log(lf.n()) / c.n())) for c in lf.children] order = current_state.state[0]
if order==1: #如果当前玩家是先⼿,则选取权重最⼤的⼦节点作为最优action
return lf.children[np.argmax(choices_weights)]
el: #如果当前玩家是后⼿,则选取权重最⼩的⼦节点(先⼿胜率最低的状态)作为最优action
return lf.children[np.argmin(choices_weights)]
def rd_child(lf):
current_state = lf.state
return rd.choice(lf.children)
#lf.children[np.argmax(choices_weights)]
#⾃我对弈模拟,从⼦结点中随机选取action
def rollout(lf):
current_rollout_state = lf.state
while not current_rollout_state.is_game_over():
possible_moves = current__legal_actions()
action = lf.rollout_policy(possible_moves)
current_rollout_state = current_rollout_state.play(action,0)
return current_rollout_state.game_result()
def rollout_policy(lf, possible_moves):
return possible_moves[np.random.randint(len(possible_moves))] #rd.choice(possible_moves)
#向上回溯,将该节点的胜负信息传到⽗节点
def backpropagate(lf, result):
床的英文
lf._number_of_visits += 1.
lf._results[result] += 1.
if lf.parent:
lf.parent.backpropagate(result)
#每个节点计算best action都要⾃我对弈(次数是SIMU_EPSDS)并记录结果,从中选出最优的⼦节点
def best_action(lf):
for i in range(SIMU_EPSDS):
v=lf._tree_policy()
llout()
v.backpropagate(reward)
return lf.best_child()
创建状态类
class State():
def __init__(lf,ini_state): #初始化时导⼊棋盘的初始状态
lf.state=ini_state
def play(lf,action,is_expl):#player:1 - attack, -1 - defensive
order = lf.state[0]
board = copy.deepcopy(lf.state[1])
x=action[0]
y=action[1]
board[x][y]=order
new_state=[-order,board]
return State(new_state)
def is_game_over(lf):
return lf.game_result()!=999
#判断当前游戏结果,返回结果是1 - 先⼿赢,-1 - 后⼿赢,0 - 未分胜负,没有空余点,游戏和棋结束,999 - 未分胜负,继续游戏 def game_result(lf):
#遍历判断纵向是否五连,前两⾏和倒数两⾏不⽤判断
for i in range(2,VECTOR-2):
for j in range(VECTOR):
颈霜if lf.state[1][i-2][j]==lf.state[1][i-1][j]==lf.state[1][i][j]==lf.state[1][i+1][j]==lf.state[1][i+2][j]!=0:
return lf.state[1][i][j]
#遍历判断横向是否五连,前两列和倒数两列不⽤判断
for i in range(VECTOR):
for j in range(2,VECTOR-2):
if lf.state[1][i][j-2]==lf.state[1][i][j-1]==lf.state[1][i][j]==lf.state[1][i][j+1]==lf.state[1][i][j+1]!=0:
return lf.state[1][i][j]
#遍历斜向是否五连,前两⾏、前两列、倒数两⾏、倒数两列不⽤判断
for i in range(2,VECTOR-2):
for j in range(2,VECTOR-2):
if (lf.state[1][i][j-2]==lf.state[1][i][j-1]==lf.state[1][i][j]==lf.state[1][i][j+1]==lf.state[1][i][j+1]!=0)\
|\
(lf.state[1][i][j-2]==lf.state[1][i][j-1]==lf.state[1][i][j]==lf.state[1][i][j+1]==lf.state[1][i][j+1]!=0):
return lf.state[1][i][j]
#若未分胜负,遍历是否还有空余落⼦点,若没有,结果是和棋结束游戏,返回0
blank_count=0
for i in range(VECTOR):
for j in range(VECTOR):
if (lf.state[1][i][j]==0):
blank_count+=1
if blank_count==0:
最快的流水打一成语return 0
el: #未分胜负胜负,游戏继续
return 999
#获得当前棋盘中所有可落⼦的点
def get_legal_actions(lf):
legal_actions=[]
for i in range(VECTOR):
for j in range(VECTOR):
if lf.state[1][i][j] == 0:
legal_actions.append([i,j])
return legal_actions
主函数,⼈机对弈模式,可选先后⼿
if __name__ == '__main__':
#初始化状态
s([VECTOR,VECTOR])
order=1
state=State([order,state])
first='player' #'player' - 玩家先⼿ 'ai' - 玩家后⼿
while True:
if first=='player':
for i in state.state[1]:
print(i)
player_choice=[int(i) for i in input('input the loacation (x,y):').split(',')] state=state.play(player_choice,0)
print('player:',player_choice)
钓鱼饵料state=State([state.state[0],state.state[1]])
for i in state.state[1]:
print(i)
tree=Node(state)
ai_choice=tree.best_action().parent_action
state=state.play(ai_choice,0)
state=State([state.state[0],state.state[1]])
print('ai:',ai_choice)
el:
state=State([state.state[0],state.state[1]])
tree=Node(state)
ai_choice=tree.best_action().state.state[1][-1]
state=state.play(ai_choice,0)
print('ai:',ai_choice)
for i in state.state[1]:
print(i)
player_choice=[int(i) for i in input('input the loacation (x,y):').split(',')] state=state.play(player_choice,0)
print('player:',player_choice)
for i in state.state[1]:
print(i)
暂时没有添加游戏结束后选项,游戏结束时会报错,while循环⾃动终⽌