Minimax算法与Alpha-Beta算法

博弈树
~描述：节点是状态，边表示动作或落子。
~原则：己方利益最大化，对方利益最小化。
Minimax算法
~定义:在理想对手的假设下，MIN方总是最小化MAX方的最大化努力。
~原理：使用了简单的深度递归搜索技术来确定每一个节点的值：它从根节点开始，一直前进到叶子节点，然后在递归回溯时，将叶子节点的效用值往上回传——对于MAX方，计算最大值，对于MIN方，计算最小值。
~示例：以下是一个2层（Two-Ply）博弈树：
![在这里插入图片描述](https://img-blog.csdnimg.cn/20200921210447752.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0wzWjNM,size_16,color_FFFFFF,t_70#pic_center 1.叶子节点的效用值越大，对MAX方越有利。在MIN方会选择该节点下效用值最小的节点。即 3，2，2（第三层从左向右依次）。
2.MAX方要在“3，2，2”选择一个，选择对MAX方最有利的3。

~注：上图有颜色的方框是用Alpha-Beta算法分析的，用于将两种算法进行对比，下面将会对该算法扩展分析。

Alpha-Beta算法
以下是将上面的博弈树用Alpha-Beta算法的分解分析：
(a).

(b).
在这里插入图片描述

©.
在这里插入图片描述
(d).

(e).

(f).

总结分析：
Alpha-Beta算法用了剪枝而Minimax算法没有，从代码的运行可以明显看出二者速度的差别。

Minimax算法的代码：

# -*- coding: utf-8 -*-
"""
Created on Fri Oct 26 14:41:12 2018

@author: duxiaoqin
Functions:
    (1) Minimax Algorithm for TicTacToe
"""

from graphics import *
from tictactoe import *
from tttdraw import *
from tttinput import *
import sys

def Minimax(node, depth):
    result = node.isGameOver()
    if result != None:
        return result, (), depth
    if node.getPlayer() == TicTacToe.BLACK:
        bestValue = -sys.maxsize
        bestMove = ()
        bestDepth = sys.maxsize
        moves = node.getAllMoves()
        for move in moves:
            child = node.clone()
            child.play(*move)
            v, _, leafDepth = Minimax(child, depth+1)
            if bestValue == v and bestDepth > leafDepth:
                bestValue = v
                bestMove = move
                bestDepth = leafDepth
            if bestValue < v:
                bestValue = v
                bestMove = move
                bestDepth = leafDepth
        return bestValue, bestMove, bestDepth
    else:
        bestValue = sys.maxsize
        bestMove = ()
        bestDepth = sys.maxsize
        moves = node.getAllMoves()
        for move in moves:
            child = node.clone()
            child.play(*move)
            v, _, leafDepth = Minimax(child, depth+1)
            if bestValue == v and bestDepth > leafDepth:
                bestValue = v
                bestMove = move
                bestDepth = leafDepth
            if bestValue > v:
                bestValue = v
                bestMove = move
                bestDepth = leafDepth
        return bestValue, bestMove, bestDepth
    
def main():
    win = GraphWin('Minimax for TicTacToe', 600, 600, autoflush=False)
    ttt = TicTacToe()
    tttdraw = TTTDraw(win)
    tttinput = TTTInput(win)
    tttdraw.draw(ttt)

    while win.checkKey() != 'Escape':
        if ttt.getPlayer() == TicTacToe.WHITE:
            v, move, _ = Minimax(ttt, 0)
            if move != ():
                ttt.play(*move)
        tttinput.input(ttt)
        tttdraw.draw(ttt)
        if ttt.isGameOver() != None:
            time.sleep(1)
            ttt.reset()
            tttdraw.draw(ttt)
            #win.getMouse()
    win.close()
    
if __name__ == '__main__':
    main()

Alpha-Beta算法的代码：

# -*- coding: utf-8 -*-
"""
Created on Fri Oct 26 20:53:08 2018

@author: duxiaoqin
Functions:
    (1) Alpha-Beta Algorithm for TicTacToe
"""

from graphics import *
from tictactoe import *
from tttdraw import *
from tttinput import *
import sys
import time

def AlphaBeta(node, depth, alpha, beta):
    result = node.isGameOver()
    if result != None:#游戏结束
        return result, (), depth
    if node.getPlayer() == TicTacToe.BLACK:
        bestValue = -sys.maxsize
        bestMove = ()
        bestDepth = sys.maxsize
        moves = node.getAllMoves()
        for move in moves:
            child = node.clone()
            child.play(*move)
            v, _, leafDepth = AlphaBeta(child, depth+1, alpha, beta)
            if bestValue == v and bestDepth > leafDepth:
                bestValue = v
                bestMove = move
                bestDepth = leafDepth
            if bestValue < v:
                bestValue = v
                bestMove = move
                bestDepth = leafDepth
            alpha = max(alpha, bestValue)
            if beta <= alpha:#进行剪枝
                break #beta pruning
        return bestValue, bestMove, bestDepth
    else:
        bestValue = sys.maxsize
        bestMove = ()
        bestDepth = sys.maxsize
        moves = node.getAllMoves()
        for move in moves:
            child = node.clone()
            child.play(*move)
            v, _, leafDepth = AlphaBeta(child, depth+1, alpha, beta)
            #‘_’在该函数内不起作用
            if bestValue == v and bestDepth > leafDepth:
                bestValue = v
                bestMove = move
                bestDepth = leafDepth
            if bestValue > v:
                bestValue = v
                bestMove = move
                bestDepth = leafDepth
            beta = min(beta, bestValue)
            if beta <= alpha:
                break #alpha pruning
        return bestValue, bestMove, bestDepth
    
def main():
    win = GraphWin('Minimax for TicTacToe', 600, 600, autoflush=False)
    ttt = TicTacToe()
    tttdraw = TTTDraw(win)
    tttinput = TTTInput(win)
    tttdraw.draw(ttt)

    while win.checkKey() != 'Escape':
        if ttt.getPlayer() == TicTacToe.WHITE:
            v, move, _ = AlphaBeta(ttt, 0, -sys.maxsize, sys.maxsize)
            if move != ():
                ttt.play(*move)
        tttinput.input(ttt)
        tttdraw.draw(ttt)
        if ttt.isGameOver() != None:
            time.sleep(1)
            ttt.reset()
            tttdraw.draw(ttt)
            #win.getMouse()
    win.close()
    
if __name__ == '__main__':
    main()

博弈树代码：

# -*- coding: utf-8 -*-
"""
Created on Mon Sep 10 22:25:08 2018

@author: duxiaoqin

Functions:
    (1) TicTacToe class;
"""

from myarray2d import Array2D

class TicTacToe:
    
    BLACK = True
    WHITE = False
    EMPTY = None
    
    BLACKWIN = 1
    WHITEWIN = -1
    DRAW = 0
    
    def __init__(self):
        self.board = Array2D(3, 3)
        self.player = TicTacToe.BLACK
        self.black = []
        self.white = []
        
        self.magic = Array2D(3, 3)
        self.magic[0, 0] = 2
        self.magic[0, 1] = 9
        self.magic[0, 2] = 4

        self.magic[1, 0] = 7
        self.magic[1, 1] = 5
        self.magic[1, 2] = 3

        self.magic[2, 0] = 6
        self.magic[2, 1] = 1
        self.magic[2, 2] = 8
        
    def reset(self):
        self.board.clear(None)
        self.player = TicTacToe.BLACK
        self.black = []
        self.white = []
        
    def clone(self):
        newttt = TicTacToe()
        for row in range(3):
            for col in range(3):
                newttt.board[row, col] = self.board[row, col]
        newttt.player = self.player
        newttt.black = self.black[:]
        newttt.white = self.white[:]
        
        return newttt
    
    def ToString(self):
        l = []
        for row in range(3):
            for col in range(3):
                if self.board[row, col] == TicTacToe.BLACK:
                    l.append('X')
                elif self.board[row, col] == TicTacToe.WHITE:
                    l.append('O')
                else:
                    l.append('_')
        return ''.join(l)
    
    def print(self):
        for row in range(3):
            for col in range(3):
                if self.board[row, col] == TicTacToe.BLACK:
                    print('X', end=' ')
                elif self.board[row, col] == TicTacToe.WHITE:
                    print('O', end=' ')
                else:
                    print('_', end=' ')
            print()
    
    def play(self, row, col):
        self.board[row, col] = self.player
        if self.player == TicTacToe.BLACK:
            self.black.append(self.magic[row, col])
        else:
            self.white.append(self.magic[row, col])
        self.player = not self.player
        
    def getPlayer(self):
        return self.player
    
    def getAllMoves(self):
        return [(row, col) for row in range(3) \
                               for col in range(3) \
                                   if self.board[row, col] == TicTacToe.EMPTY]
    
    def isWin(self, n, goal, moves):
        moves_clone = moves[:]
        if n == 0:
            return goal == 0
        elif goal <= 0:
            return False
        elif len(moves_clone) == 0:
            return False
        else:
            item = moves_clone.pop(0)
            if self.isWin(n-1, goal-item, moves_clone[:]):
                return True
            elif self.isWin(n, goal, moves_clone[:]):
                return True
        return False
    
    def isGameOver(self):
        if self.isWin(3, 15, self.black):
            return TicTacToe.BLACKWIN
        elif self.isWin(3, 15, self.white):
            return TicTacToe.WHITEWIN
        elif len(self.black)+len(self.white) == 9:
            return TicTacToe.DRAW
        else:
            return None
    
def main():
    ttt = TicTacToe()
    ttt.play(1, 1)
    ttt.play(0, 0)
    ttt.play(2, 0)
    ttt.play(0, 1)
    ttt.play(0, 2)
    ttt.print()
    print(ttt.isGameOver())
    print(ttt.ToString())
    
if __name__ == '__main__':
    main()

原文链接：https://blog.csdn.net/L3Z3L/article/details/108656511