蒙特卡洛树搜索(MCTS)实现简易五子棋AI
蒙特卡洛树搜索算法可以通过自我对弈模拟得到不同状态分支中获胜的概率,从而获得最优的策略。代码部分可以分为Node类和State类。Node类通过关联父节点和子节点实现树结构,同时保存每个节点的属性;State类描述游戏的规则并判断胜负。模型结构参考了https://ai-boson.github.io/mcts/先导入所需要的模块import numpy as npimport random as
·
蒙特卡洛树搜索算法可以通过自我对弈模拟得到不同状态分支中获胜的概率,从而获得最优的策略。代码部分可以分为Node类和State类。Node类通过关联父节点和子节点实现树结构,同时保存每个节点的属性;State类描述游戏的规则并判断胜负。
模型结构参考了https://ai-boson.github.io/mcts/
先导入所需要的模块
import numpy as np
import random as rd
import copy
from collections import defaultdict
VECTOR = 7 #棋盘的维度,五子棋一般是15*15,为了减少运算先使用7*7
SIMU_EPSDS = 5000 #自我对弈模拟的次数,越大代表自我对弈的次数越多,速度越慢
创建节点类
class Node():
def __init__(self, state, parent=None, parent_action=None):
self.state = state
self.parent = parent
self.parent_action = parent_action
self.children = []
self._untried_actions = None
self._untried_actions = self.untried_actions()
self._number_of_visits = 0
self._results = defaultdict(int)
self._results[1] = 0
self._results[-1] = 0
#q值代表当前节点胜利次数减去失败次数的差值
def q(self):
wins = self._results[1]
loses = self._results[-1]
return wins - loses
#n值代表当前节点的访问次数
def n(self):
return self._number_of_visits
#扩展子节点,即访问该节点的每个子节点
def expand(self):
action = self._untried_actions.pop()
next_state = self.state.play(action,0)
child_node = Node(next_state, parent=self, parent_action=action)
self.children.append(child_node)
return child_node
def untried_actions(self):
untried_actions = copy.deepcopy(self.state.get_legal_actions())
return untried_actions
#生成树的逻辑是先判断该节点是否是最终状态,如果不是再判断是否完全扩展,如果不是则继续扩展该节点的子节点,否则,
#从子结点中随机选择一个作为下一个要扩展节点
def _tree_policy(self):
current_node=self
while not current_node.is_terminal_node():
if not current_node.is_fully_expanded():
return current_node.expand()
else:
current_node=current_node.rd_child()
return current_node
def is_terminal_node(self):
return self.state.is_game_over()
def is_fully_expanded(self):
return len(self._untried_actions) == 0
#从所有的子节点(下一状态)中选择一个最优节点
def best_child(self, c_param=0.1):
current_state = self.state
#UCT算法
choices_weights = [(c.q() / c.n()) + c_param * np.sqrt((2*np.log(self.n()) / c.n())) for c in self.children]
order = current_state.state[0]
if order==1: #如果当前玩家是先手,则选取权重最大的子节点作为最优action
return self.children[np.argmax(choices_weights)]
else: #如果当前玩家是后手,则选取权重最小的子节点(先手胜率最低的状态)作为最优action
return self.children[np.argmin(choices_weights)]
def rd_child(self):
current_state = self.state
return rd.choice(self.children)
#self.children[np.argmax(choices_weights)]
#自我对弈模拟,从子结点中随机选取action
def rollout(self):
current_rollout_state = self.state
while not current_rollout_state.is_game_over():
possible_moves = current_rollout_state.get_legal_actions()
action = self.rollout_policy(possible_moves)
current_rollout_state = current_rollout_state.play(action,0)
return current_rollout_state.game_result()
def rollout_policy(self, possible_moves):
return possible_moves[np.random.randint(len(possible_moves))] #rd.choice(possible_moves)
#向上回溯,将该节点的胜负信息传到父节点
def backpropagate(self, result):
self._number_of_visits += 1.
self._results[result] += 1.
if self.parent:
self.parent.backpropagate(result)
#每个节点计算best action都要自我对弈(次数是SIMU_EPSDS)并记录结果,从中选出最优的子节点
def best_action(self):
for i in range(SIMU_EPSDS):
v=self._tree_policy()
reward=v.rollout()
v.backpropagate(reward)
return self.best_child()
创建状态类
class State():
def __init__(self,ini_state): #初始化时导入棋盘的初始状态
self.state=ini_state
def play(self,action,is_expl):#player:1 - attack, -1 - defensive
order = self.state[0]
board = copy.deepcopy(self.state[1])
x=action[0]
y=action[1]
board[x][y]=order
new_state=[-order,board]
return State(new_state)
def is_game_over(self):
return self.game_result()!=999
#判断当前游戏结果,返回结果是1 - 先手赢,-1 - 后手赢,0 - 未分胜负,没有空余点,游戏和棋结束,999 - 未分胜负,继续游戏
def game_result(self):
#遍历判断纵向是否五连,前两行和倒数两行不用判断
for i in range(2,VECTOR-2):
for j in range(VECTOR):
if self.state[1][i-2][j]==self.state[1][i-1][j]==self.state[1][i][j]==self.state[1][i+1][j]==self.state[1][i+2][j]!=0:
return self.state[1][i][j]
#遍历判断横向是否五连,前两列和倒数两列不用判断
for i in range(VECTOR):
for j in range(2,VECTOR-2):
if self.state[1][i][j-2]==self.state[1][i][j-1]==self.state[1][i][j]==self.state[1][i][j+1]==self.state[1][i][j+1]!=0:
return self.state[1][i][j]
#遍历斜向是否五连,前两行、前两列、倒数两行、倒数两列不用判断
for i in range(2,VECTOR-2):
for j in range(2,VECTOR-2):
if (self.state[1][i][j-2]==self.state[1][i][j-1]==self.state[1][i][j]==self.state[1][i][j+1]==self.state[1][i][j+1]!=0)\
|\
(self.state[1][i][j-2]==self.state[1][i][j-1]==self.state[1][i][j]==self.state[1][i][j+1]==self.state[1][i][j+1]!=0):
return self.state[1][i][j]
#若未分胜负,遍历是否还有空余落子点,若没有,结果是和棋结束游戏,返回0
blank_count=0
for i in range(VECTOR):
for j in range(VECTOR):
if (self.state[1][i][j]==0):
blank_count+=1
if blank_count==0:
return 0
else: #未分胜负胜负,游戏继续
return 999
#获得当前棋盘中所有可落子的点
def get_legal_actions(self):
legal_actions=[]
for i in range(VECTOR):
for j in range(VECTOR):
if self.state[1][i][j] == 0:
legal_actions.append([i,j])
return legal_actions
主函数,人机对弈模式,可选先后手
if __name__ == '__main__':
#初始化状态
state=np.zeros([VECTOR,VECTOR])
order=1
state=State([order,state])
first='player' #'player' - 玩家先手 'ai' - 玩家后手
while True:
if first=='player':
for i in state.state[1]:
print(i)
player_choice=[int(i) for i in input('input the loacation (x,y):').split(',')]
state=state.play(player_choice,0)
print('player:',player_choice)
state=State([state.state[0],state.state[1]])
for i in state.state[1]:
print(i)
tree=Node(state)
ai_choice=tree.best_action().parent_action
state=state.play(ai_choice,0)
state=State([state.state[0],state.state[1]])
print('ai:',ai_choice)
else:
state=State([state.state[0],state.state[1]])
tree=Node(state)
ai_choice=tree.best_action().state.state[1][-1]
state=state.play(ai_choice,0)
print('ai:',ai_choice)
for i in state.state[1]:
print(i)
player_choice=[int(i) for i in input('input the loacation (x,y):').split(',')]
state=state.play(player_choice,0)
print('player:',player_choice)
for i in state.state[1]:
print(i)
暂时没有添加游戏结束后选项,游戏结束时会报错,while循环自动终止
开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!
更多推荐
已为社区贡献1条内容
所有评论(0)