基礎のゲームアルゴリズムの――極大極小とAB剪定

4458 ワード

極小アルゴリズム及びAlpha_ベタカット
# python3
# author  :   
# time    :2018.10.28
# email   :[email protected]

class TicTacToe(object):
    '''
         :
        player:      ,-1  AI,1    
        board:  
    '''
    #      ,     AI  
    def __init__(self,depth,first = -1):
        self.depth = depth
        self.restart(first)

    #       ,player  
    def restart(self,player):
        self.board = [0]*9
        self.player = player
        self.bestmove = -1

    #     
    def go(self,pos):
        self.board[pos]=self.player
        self.player = -self.player

    #       
    def cancel_go(self,pos):
        self.board[pos]=0
        self.player = -self.player

    def print_board(self):
        print("-"*20)
        for i in range(3):
            for j in range(3):
                if self.board[i*3+j]==-1:
                    print("",end="X ")
                elif self.board[i*3+j]==1:
                    print("",end="O ")
                else:
                    print("",end="_ ")
            print()
        print("-"*20)

    #        0,AI    -1,      1,    2
    def get_winner(self):
        if any([var==3 or var ==-3 for var in [sum(self.board[i:i+3]) for i in range(0,9,3)]]):
            return -self.player
        if any([var==3 or var ==-3 for var in [sum(self.board[i:7+i:3]) for i in range(0,3)]]):
            return -self.player
        if any([var==3 or var ==-3 for var in [sum(self.board[:9:4]),sum(self.board[2:7:2])]]):
            return -self.player

        return 0 if self.board.count(0)>0 else 2

    def evaluate_minmax(self):
        winner = self.get_winner()
        if winner == 2:
            return 0
        return 1000000*winner

    def evaluate_nega_max(self):
        winner = self.get_winner()
        if winner == 2:
            return 0
        return 1000000*winner*self.player

    #       
    def min_max(self,depth):
        #              
        winner = self.get_winner()
        if winner!=0 or depth==0:
            return self.evaluate_minmax()

        #     
        #     
        moves = [x for x in range(9) if self.board[x]==0]
        #       
        bestvalue = -1000000*self.player

        for pos in moves:
            self.go(pos)
            value = self.min_max(depth-1)
            self.cancel_go(pos)

            if self.player==-1:
                if value<=bestvalue:
                    bestvalue = value
                    if depth==self.depth:
                        self.bestmove = pos
            else:
                if value>=bestvalue:
                    bestvalue = value
                    if depth==self.depth:
                        self.bestmove = pos
        return bestvalue

    #      
    def nega_max(self,depth):
        #              
        winner = self.get_winner()
        if winner!=0 or depth==0:
            return self.evaluate_nega_max()

        #     
        #     
        moves = [x for x in range(9) if self.board[x]==0]
        #       
        bestvalue = -1000000

        for pos in moves:
            self.go(pos)
            value = -self.nega_max(depth-1)
            self.cancel_go(pos)
            if value>=bestvalue:
                bestvalue = value
                if depth == self.depth:
                    self.bestmove = pos

        return bestvalue

    # alpha_beta  
    def alpha_beta(self,depth,alpha,beta):
        #              
        winner = self.get_winner()
        if winner!=0 or depth==0:
            return self.evaluate_nega_max()

        #     
        #     
        moves = [x for x in range(9) if self.board[x]==0]

        for pos in moves:
            self.go(pos)
            value = -self.alpha_beta(depth-1,-beta,-alpha)
            self.cancel_go(pos)
            if value>=beta:
                if self.depth==depth:
                    self.bestmove = pos
                return beta
            if value>alpha:
                if self.depth==depth:
                    self.bestmove = pos
                alpha = value
            
        return alpha


# 8   ,         ,         8       
game = TicTacToe(8)
while game.get_winner()==0:
    # game.min_max(game.depth)
    # game.nega_max(game.depth)
    game.alpha_beta(game.depth,-1000000,1000000)
    game.go(game.bestmove)
    game.print_board()

print("result:",game.get_winner())