如何使我的AI算法播放9板井字游戏？

Question

如何使我的AI算法播放9板井字游戏？

Ver*_*era 5 python algorithm montecarlo monte-carlo-tree-search

为了让其他人轻松地帮助我， 我将所有代码都放在此处https://pastebin.com/WENzM41k ，它将在2个代理相互竞争时开始。

我正在尝试实现Monte Carlo树搜索以在Python中播放9板tic-tac-toe。游戏规则类似于常规的井字游戏，但带有9个3x3子板。最后一块的放置位置决定放置一块子板。这有点像最终的井字游戏，但如果赢得了一个分牌，游戏就会结束。

我正在尝试学习MCTS，并且在这里找到了一些代码：http : //mcts.ai/code/python.html

我在网站上使用了节点类和UCT类，并添加了我的9局井字游戏状态类和一些其他代码。所有代码都在这里：

from math import log, sqrt
import random
import numpy as np
from copy import deepcopy


class BigGameState:
    def __init__(self):
        self.board = np.zeros((10, 10), dtype="int8")
        self.curr = 1
        self.playerJustMoved = 2 # At the root pretend the player just moved is player 2 - player 1 has the first move

    def Clone(self):
        """ Create a deep clone of this game state.
        """
        st = BigGameState()
        st.playerJustMoved = self.playerJustMoved
        st.curr = self.curr
        st.board = deepcopy(self.board)
        return st

    def DoMove(self, move):
        """ Update a state by carrying out the given move.
            Must update playerJustMoved.
        """
        self.playerJustMoved = 3 - self.playerJustMoved
        if move >= 1 and move <= 9 and move == int(move) and self.board[self.curr][move] == 0:
            self.board[self.curr][move] = self.playerJustMoved
            self.curr = move

    def GetMoves(self):
        """ Get all possible moves from this state.
        """
        return [i for i in range(1, 10) if self.board[self.curr][i] == 0]

    def GetResult(self, playerjm):
        """ Get the game result from the viewpoint of playerjm. 
        """
        for bo in self.board:
            for (x,y,z) in [(1,2,3),(4,5,6),(7,8,9),(1,4,7),(2,5,8),(3,6,9),(1,5,9),(3,5,7)]:
                if bo[x] == [y] == bo[z]:
                    if bo[x] == playerjm:
                        return 1.0
                    else:
                        return 0.0
        if self.GetMoves() == []: return 0.5 # draw

    def drawboard(self):
        print_board_row(self.board, 1, 2, 3, 1, 2, 3)
        print_board_row(self.board, 1, 2, 3, 4, 5, 6)
        print_board_row(self.board, 1, 2, 3, 7, 8, 9)
        print(" ------+-------+------")
        print_board_row(self.board, 4, 5, 6, 1, 2, 3)
        print_board_row(self.board, 4, 5, 6, 4, 5, 6)
        print_board_row(self.board, 4, 5, 6, 7, 8, 9)
        print(" ------+-------+------")
        print_board_row(self.board, 7, 8, 9, 1, 2, 3)
        print_board_row(self.board, 7, 8, 9, 4, 5, 6)
        print_board_row(self.board, 7, 8, 9, 7, 8, 9)
        print()


def print_board_row(board, a, b, c, i, j, k):
    # The marking script doesn't seem to like this either, so just take it out to submit
    print("", board[a][i], board[a][j], board[a][k], end = " | ")
    print(board[b][i], board[b][j], board[b][k], end = " | ")
    print(board[c][i], board[c][j], board[c][k])


class Node:
    """ A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
        Crashes if state not specified.
    """
    def __init__(self, move = None, parent = None, state = None):
        self.move = move # the move that got us to this node - "None" for the root node
        self.parentNode = parent # "None" for the root node
        self.childNodes = []
        self.wins = 0
        self.visits = 0
        self.untriedMoves = state.GetMoves() # future child nodes
        self.playerJustMoved = state.playerJustMoved # the only part of the state that the Node needs later


    def UCTSelectChild(self):
        """ Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
            lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
            exploration versus exploitation.
        """
        s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + 0.2 * sqrt(2*log(self.visits)/c.visits))[-1]
        return s

    def AddChild(self, m, s):
        """ Remove m from untriedMoves and add a new child node for this move.
            Return the added child node
        """
        n = Node(move = m, parent = self, state = s)
        self.untriedMoves.remove(m)
        self.childNodes.append(n)
        return n

    def Update(self, result):
        """ Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
        """
        self.visits += 1
        self.wins += result

    def __repr__(self):
        return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"

    def TreeToString(self, indent):
        s = self.IndentString(indent) + str(self)
        for c in self.childNodes:
             s += c.TreeToString(indent+1)
        return s

    def IndentString(self,indent):
        s = "\n"
        for i in range (1,indent+1):
            s += "| "
        return s

    def ChildrenToString(self):
        s = ""
        for c in self.childNodes:
             s += str(c) + "\n"
        return s


def UCT(rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []: # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m,state) # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    if (verbose): print(rootnode.TreeToString(0))
    else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited

def UCTPlayGame():
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """

    state = BigGameState() # uncomment to play OXO

    while (state.GetMoves() != []):
        state.drawboard()
        m = UCT(rootstate = state, itermax = 1000, verbose = False) # play with values for itermax and verbose = True
        print("Best Move: " + str(m) + "\n")
        state.DoMove(m)
    if state.GetResult(state.playerJustMoved) == 1.0:
        print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        print("Player " + str(3 - state.playerJustMoved) + " wins!")
    else: print("Nobody wins!")

if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    UCTPlayGame()

Run Code Online (Sandbox Code Playgroud)

运行代码，它将在2个代理相互竞争时开始。但是，代理不能很好地玩游戏。糟糕的表现是不能接受的。例如，如果ai在子板上连续获得2个，并且又是轮到他了，那么他就不会出局。我应该从哪里开始改进？我试图更改Node类和UCT类的代码，但没有任何效果。

更新：如果该板处于下面状态，并且是我的AI（正在玩X）在8号板（第三行的中间子板）上玩。我应该写一些特定的代码让AI不在1或5上玩（因为对手会获胜），或者我应该让AI做出决定。我试图写代码告诉AI，但是那样我必须遍历所有子板。

--O|---|---
-O-|---|---
---|---|---
-----------
---|-O-|---
---|-O-|---
---|---|---
-----------
---|---|---
---|---|---
---|---|---

Run Code Online (Sandbox Code Playgroud)

Answer 1

Ale*_*tin 3

我花了一些时间阅读有关 MCTS 的内容，并花了更多时间来捕获其余的错误：

\n\n

我添加了 OXOState（井字游戏），这样我就可以使用熟悉且简单的游戏进行调试。这只是http://mcts.ai/code/python.html的原始源代码的一个问题：在有人赢得比赛后它会继续玩。所以，我解决了这个问题。
为了调试和乐趣添加了 HumanPlayer。
为了评估游戏水平添加了RandomPlayer和NegamaxPlayer（negamax算法https://en.wikipedia.org/wiki/Negamax）

\n\n

NegamaxPlayer 与 UCT（蒙特卡罗树搜索）

\n\n

itermax= won lost draw total_time\n       1 964   0   36       172.8\n      10 923   0   77       173.4\n     100 577   0  423       182.1\n    1000  48   0  952       328.9\n   10000   0   0 1000      1950.3\n

Run Code Online (Sandbox Code Playgroud)\n\n

UTC 对完美玩家的表现相当令人印象深刻（minimax 进行了完整的三项搜索）：当 itermax=1000 时，分数和比赛时间几乎相等 - 1000 场比赛中只有 48 场输了。

\n\n

修复了 BigGameState 的其余（我认为）问题。现在打得很熟练，所以我赢不了。

\n\n

我在 NegamaxPlayer 中添加了玩 9 板井字游戏的深度限制，因为可能需要一段时间才能找到该游戏中的最佳动作。

\n\n

NegamaxPlayer（深度）与 UCT（itermax）

\n\n

depth itermax won  lost draw total_time\n    4       1   9     1    0       18.4\n    4      10   9     1    0       20.7\n    4     100   5     5    0       36.2\n    4    1000   2     8    0      188.8\n    5   10000   2     8    0      318.0\n    6   10000   0    10    0      996.5\n

Run Code Online (Sandbox Code Playgroud)\n\n

现在 UTC(itermax=100) 与 NegamaxPlayer(深度 4) 玩相同的级别，并在下一个级别 8 到 2 中获胜。我很惊讶！;-)

\n\n

我在该级别 (itermax=100) 下赢得了第一场比赛，但在 1000 级时输了第二场比赛：

\n\n

Game 1, Move 40:\n\xe2\x94\x8f\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xb3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xb3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x93\n\xe2\x94\x83 X X . \xe2\x94\x83*O O O \xe2\x94\x83 O . . \xe2\x94\x83\n\xe2\x94\x83 . O O \xe2\x94\x83 . . X \xe2\x94\x83 . X O \xe2\x94\x83\n\xe2\x94\x83 O X X \xe2\x94\x83 X . . \xe2\x94\x83 . X . \xe2\x94\x83\n\xe2\x94\xa3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x95\x8b\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x95\x8b\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xab\n\xe2\x94\x83 X . . \xe2\x94\x83 . X . \xe2\x94\x83 O . . \xe2\x94\x83\n\xe2\x94\x83 . X . \xe2\x94\x83 O O X \xe2\x94\x83 O X . \xe2\x94\x83\n\xe2\x94\x83 . O . \xe2\x94\x83 O . . \xe2\x94\x83 X . . \xe2\x94\x83\n\xe2\x94\xa3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x95\x8b\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x95\x8b\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xab\n\xe2\x94\x83 X X O \xe2\x94\x83 O . X \xe2\x94\x83 . O X \xe2\x94\x83\n\xe2\x94\x83 X . . \xe2\x94\x83 . . . \xe2\x94\x83 . . . \xe2\x94\x83\n\xe2\x94\x83 . . O \xe2\x94\x83 O . X \xe2\x94\x83 . O . \xe2\x94\x83\n\xe2\x94\x97\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xbb\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xbb\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x9b\n\nPlayer 2 wins!\nwon 0 lost 1 draw 0\n

Run Code Online (Sandbox Code Playgroud)\n\n

这是完整的代码：

\n\n

from math import *\nimport random\nimport time\nfrom copy import deepcopy\n\n\nclass BigGameState:\n    def __init__(self):\n        self.board = [[0 for i in range(10)] for j in range(10)]\n        self.curr = 1\n        # At the root pretend the player just moved is player 2,\n        # so player 1 will have the first move\n        self.playerJustMoved = 2\n        self.ended = False\n        # to put * in __str__\n        self.last_move = None\n        self.last_curr = None\n\n    def Clone(self):\n        return deepcopy(self)\n\n    def DoMove(self, move):\n        # 1 2 3\n        # 4 5 6\n        # 7 8 9\n        winning_pairs = [[],  # 0\n                         [[2, 3], [5, 9], [4, 7]],  # for 1\n                         [[1, 3], [5, 8]],  # for 2\n                         [[1, 2], [5, 7], [6, 9]],  # for 3\n                         [[1, 7], [5, 6]],  # for 4\n                         [[1, 9], [2, 8], [3, 7], [4, 6]],  # for 5\n                         [[3, 9], [4, 5]],  # for 6\n                         [[1, 4], [5, 3], [8, 9]],  # for 7\n                         [[7, 9], [2, 5]],  # for 8\n                         [[7, 8], [1, 5], [3, 6]],  # for 9\n                         ]\n        if not isinstance(move, int) or 1 < move > 9 or \\\n                self.board[self.curr][move] != 0:\n            raise ValueError\n        self.playerJustMoved = 3 - self.playerJustMoved\n        self.board[self.curr][move] = self.playerJustMoved\n        for index1, index2 in winning_pairs[move]:\n            if self.playerJustMoved == self.board[self.curr][index1] == \\\n                    self.board[self.curr][index2]:\n                self.ended = True\n        self.last_move = move\n        self.last_curr = self.curr\n        self.curr = move\n\n    def GetMoves(self):\n        if self.ended:\n            return []\n        return [i for i in range(1, 10) if self.board[self.curr][i] == 0]\n\n    def GetResult(self, playerjm):\n        # Get the game result from the viewpoint of playerjm.\n        for bo in self.board:\n            for x, y, z in [(1, 2, 3), (4, 5, 6), (7, 8, 9),\n                            (1, 4, 7), (2, 5, 8), (3, 6, 9),\n                            (1, 5, 9), (3, 5, 7)]:\n                if bo[x] == bo[y] == bo[z]:\n                    if bo[x] == playerjm:\n                        return 1.0\n                    elif bo[x] != 0:\n                        return 0.0\n        if not self.GetMoves():\n            return 0.5 # draw\n        raise ValueError\n\n    def _one_board_string(self, a, row):\n        return \'\'.join([\' \' + \'.XO\'[self.board[a][i+row]] for i in range(3)])\n\n    def _three_board_line(self, index, row):\n        return \'\xe2\x94\x83\' + \'\'.join([self._one_board_string(i + index, row) + \' \xe2\x94\x83\' for i in range(3)])\n\n    def __repr__(self):\n        # \xe2\x94\x8f\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xb3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xb3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x93\n        # \xe2\x94\x83 . . . \xe2\x94\x83 . . . \xe2\x94\x83 . . . \xe2\x94\x83\n        # \xe2\x94\x83 . . . \xe2\x94\x83 X . X \xe2\x94\x83 . . O \xe2\x94\x83\n        # \xe2\x94\x83 . X . \xe2\x94\x83 . . O \xe2\x94\x83 . . . \xe2\x94\x83\n        # \xe2\x94\xa3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x95\x8b\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x95\x8b\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xab\n        # \xe2\x94\x83 . . . \xe2\x94\x83 . . . \xe2\x94\x83*X X X \xe2\x94\x83\n        # \xe2\x94\x83 X . O \xe2\x94\x83 . . . \xe2\x94\x83 O . O \xe2\x94\x83\n        # \xe2\x94\x83 . . O \xe2\x94\x83 . . . \xe2\x94\x83 . . . \xe2\x94\x83\n        # \xe2\x94\xa3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x95\x8b\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x95\x8b\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xab\n        # \xe2\x94\x83 . . . \xe2\x94\x83 . O . \xe2\x94\x83 . O . \xe2\x94\x83\n        # \xe2\x94\x83 . . . \xe2\x94\x83 . . . \xe2\x94\x83 . . X \xe2\x94\x83\n        # \xe2\x94\x83 . . . \xe2\x94\x83 . . . \xe2\x94\x83 . . X \xe2\x94\x83\n        # \xe2\x94\x97\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xbb\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xbb\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x9b\n        s = \'\xe2\x94\x8f\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xb3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xb3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x93\\n\'\n        for i in [1, 4, 7]:\n            for j in [1, 4, 7]:\n                s += self._three_board_line(i, j) + \'\\n\'\n            if i != 7:\n                s += \'\xe2\x94\xa3\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x95\x8b\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x95\x8b\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xab\\n\'\n            else:\n                s += \'\xe2\x94\x97\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xbb\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\xbb\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x81\xe2\x94\x9b\\n\'\n        # Hack: by rows and colums of move and current board\n        # calculate place to put \'*\' so it is visible\n        c = self.last_curr - 1\n        c_c = c % 3\n        c_r = c // 3\n        m_c = (self.last_move - 1) % 3\n        m_r = (self.last_move - 1)// 3\n        p = 26 + c_r * (26 * 4) + c_c * 8 + m_r * 26 + m_c * 2 + 1\n        s = s[:p] + \'*\' + s[p+1:]\n        return s\n\n\nclass OXOState:\n    def __init__(self):\n        self.playerJustMoved = 2\n        self.ended = False\n        self.board = [0, 0, 0, 0, 0, 0, 0, 0, 0]\n\n    def Clone(self):\n        return deepcopy(self)\n\n    def DoMove(self, move):\n        #  0 1 2\n        #  3 4 5\n        #  6 7 8\n        winning_pairs = [[[1, 2], [4, 8], [3, 6]],  # for 0\n                         [[0, 2], [4, 7]],  # for 1\n                         [[0, 1], [4, 6], [5, 8]],  # for 2\n                         [[0, 6], [4, 5]],  # for 3\n                         [[0, 8], [1, 7], [2, 6], [3, 5]],  # for 4\n                         [[2, 8], [3, 4]],  # for 5\n                         [[0, 3], [4, 2], [7, 8]],  # for 6\n                         [[6, 8], [1, 4]],  # for 7\n                         [[6, 7], [0, 4], [2, 5]],  # for 6\n                         ]\n        if not isinstance(move, int) or 0 < move > 8 or \\\n                self.board[move] != 0:\n            raise ValueError\n        self.playerJustMoved = 3 - self.playerJustMoved\n        self.board[move] = self.playerJustMoved\n        for index1, index2 in winning_pairs[move]:\n            if self.playerJustMoved == self.board[index1] == self.board[index2]:\n                self.ended = True\n\n    def GetMoves(self):\n        return [] if self.ended else [i for i in range(9) if self.board[i] == 0]\n\n    def GetResult(self, playerjm):\n        for (x, y, z) in [(0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7),\n                          (2, 5, 8), (0, 4, 8), (2, 4, 6)]:\n            if self.board[x] == self.board[y] == self.board[z]:\n                if self.board[x] == playerjm:\n                    return 1.0\n                elif self.board[x] != 0:\n                    return 0.0\n        if self.GetMoves() == []:\n            return 0.5  # draw\n        raise ValueError\n\n    def __repr__(self):\n        s = ""\n        for i in range(9):\n            s += \'.XO\'[self.board[i]]\n            if i % 3 == 2: s += "\\n"\n        return s\n\n\nclass Node:\n    """ A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.\n        Crashes if state not specified.\n    """\n\n    def __init__(self, move=None, parent=None, state=None):\n        self.move = move  # the move that got us to this node - "None" for the root node\n        self.parentNode = parent  # "None" for the root node\n        self.childNodes = []\n        self.wins = 0\n        self.visits = 0\n        self.untriedMoves = state.GetMoves()  # future child nodes\n        self.playerJustMoved = state.playerJustMoved  # the only part of the state that the Node needs later\n\n    def UCTSelectChild(self):\n        """ Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have\n            lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of\n            exploration versus exploitation.\n        """\n        s = sorted(self.childNodes, key=lambda c: c.wins / c.visits + sqrt(\n            2 * log(self.visits) / c.visits))[-1]\n        return s\n\n    def AddChild(self, m, s):\n        """ Remove m from untriedMoves and add a new child node for this move.\n            Return the added child node\n        """\n        n = Node(move=m, parent=self, state=s)\n        self.untriedMoves.remove(m)\n        self.childNodes.append(n)\n        return n\n\n    def Update(self, result):\n        """ Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.\n        """\n        self.visits += 1\n        self.wins += result\n\n    def __repr__(self):\n        return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(\n            self.visits) + " U:" + str(self.untriedMoves) + "]"\n\n    def TreeToString(self, indent):\n        s = self.IndentString(indent) + str(self)\n        for c in self.childNodes:\n            s += c.TreeToString(indent + 1)\n        return s\n\n    def IndentString(self, indent):\n        s = "\\n"\n        for i in range(1, indent + 1):\n            s += "| "\n        return s\n\n    def ChildrenToString(self):\n        s = ""\n        for c in self.childNodes:\n            s += str(c) + "\\n"\n        return s\n\n\ndef UCT(rootstate, itermax, verbose=False):\n    """ Conduct a UCT search for itermax iterations starting from rootstate.\n        Return the best move from the rootstate.\n        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""\n\n    rootnode = Node(state=rootstate)\n\n    for i in range(itermax):\n        node = rootnode\n        state = rootstate.Clone()\n\n        # Select\n        while node.untriedMoves == [] and node.childNodes != []:  # node is fully expanded and non-terminal\n            node = node.UCTSelectChild()\n            state.DoMove(node.move)\n\n        # Expand\n        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)\n            m = random.choice(node.untriedMoves)\n            state.DoMove(m)\n            node = node.AddChild(m, state)  # add child and descend tree\n\n        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function\n        while state.GetMoves() != []:  # while state is non-terminal\n            state.DoMove(random.choice(state.GetMoves()))\n\n        # Backpropagate\n        while node != None:  # backpropagate from the expanded node and work back to the root node\n            node.Update(state.GetResult(\n                node.playerJustMoved))  # state is terminal. Update node with result from POV of node.playerJustMoved\n            node = node.parentNode\n\n    # Output some information about the tree - can be omitted\n    # if (verbose):\n    #     print(rootnode.TreeToString(0))\n    # else:\n    #     print(rootnode.ChildrenToString())\n\n    return sorted(rootnode.childNodes, key=lambda c: c.visits)[\n        -1].move  # return the move that was most visited\n\n\ndef HumanPlayer(state):\n    moves = state.GetMoves()\n    while True:\n        try:\n            m = int(input("Your move " + str(moves) + " : "))\n            if m in moves:\n                return m\n        except ValueError:\n            pass\n\n\ndef RandomPlayer(state):\n    return random.choice(state.GetMoves())\n\n\ndef negamax(board, color, depth):  # ##################################################\n    moves = board.GetMoves()\n    if not moves:\n        x = board.GetResult(board.playerJustMoved)\n        if x == 0.0:\n            print(\'negamax ERROR:\')\n            print(board)\n            print(board.playerJustMoved)\n            print(board.curr, board.ended)\n            print(board.GetMoves())\n            raise ValueError\n        if x == 0.5:\n            return 0.0, None\n        if color == 1 and board.playerJustMoved == 1:\n            return 1.0, None\n        else:\n            return -1.0, None\n    if depth == 0:\n        return 0.0, None\n    v = float("-inf")\n    best_move = []\n    for m in moves:\n        new_board = board.Clone()\n        new_board.DoMove(m)\n        x, _ = negamax(new_board, -color, depth - 1)\n        x = - x\n        if x >= v:\n            if x > v:\n                best_move = []\n            v = x\n            best_move.append(m)\n    if depth >=8:\n        print(depth, v, best_move)\n    return v, best_move\n\n\ndef NegamaxPlayer(game):\n        best_moves = game.GetMoves()\n        if len(best_moves) != 9:\n            _, best_moves = negamax(game, 1, 4)\n            print(best_moves)\n        return random.choice(best_moves)\n\n\nif __name__ == "__main__":\n    def main():\n        random.seed(123456789)\n        won = 0\n        lost = 0\n        draw = 0\n        for i in range(10):\n            # state = OXOState() # uncomment to play OXO\n            state = BigGameState()\n            move = 0\n            while (state.GetMoves() != []):\n                if state.playerJustMoved == 1:\n                    # m = RandomPlayer(state)\n                    m = UCT(rootstate=state, itermax=100, verbose=False)\n                else:\n                    # m = UCT(rootstate=state, itermax=100, verbose=False)\n                    # m = NegamaxPlayer(state)\n                    m = HumanPlayer(state)\n                    # m = RandomPlayer(state)\n                state.DoMove(m)\n                move += 1\n                print(\'Game \', i + 1, \', Move \', move, \':\\n\', state, sep=\'\')\n            if state.GetResult(1) == 1.0:\n                won += 1\n                print("Player 1 wins!")\n            elif state.GetResult(1) == 0.0:\n                lost += 1\n                print("Player 2 wins!")\n            else:\n                draw += 1\n                print("Nobody wins!")\n            print(\'won\', won, \'lost\', lost, \'draw\', draw)\n\n    start_time = time.perf_counter()\n    main()\n    total_time = time.perf_counter() - start_time\n    print(\'total_time\', total_time)\n

Run Code Online (Sandbox Code Playgroud)\n

归档时间：	6 年，8 月前
查看次数：	627 次
最近记录：	6 年，8 月前