tic tac toe 的 minimax c++ 实现答案

【问题标题】：minimax c++ implementation for tic tac toetic tac toe 的 minimax c++ 实现
【发布时间】：2014-07-17 15:46:02
【问题描述】：

void generate_moves(int gameBoard[9], list<int> &moves)
{
   for (int i = 0; i < 9; i++)
   {
        if (gameBoard[i] == 0){
            moves.push_back(i);
        }
   }
}



 int evaluate_position(int gameBoard[9], int playerTurn)        
 {
   state currentGameState = checkWin(gameBoard);

   if (currentGameState != PLAYING)
    {
       if ((playerTurn == 1 && currentGameState == XWIN) || (playerTurn == -1 && currentGameState == OWIN))
          return +infinity;
       else if ((playerTurn == -1 && currentGameState == XWIN) || (playerTurn == 1 && currentGameState == OWIN))
          return -infinity;
       else if (currentGameState == DRAW)
            return 0;
   }

     return -1;
  }




 int MinMove(int gameBoard[9], int playerTurn)
 {
     //if (checkWin(gameBoard) != PLAYING) { return evaluate_board(gameBoard); };
    int pos_val = evaluate_position(gameBoard, playerTurn);
     if (pos_val != -1) return pos_val;

     int bestScore = +infinity;
     list<int> movesList;
     generate_moves(gameBoard, movesList);

     while (!movesList.empty())
     {
         gameBoard[movesList.front()] = playerTurn;
         int score = MaxMove(gameBoard, playerTurn*-1); 
         if (score < bestScore)
         {
            bestScore = score;
         }
         gameBoard[movesList.front()] = 0;
         movesList.pop_front();
     }

    return bestScore;
 }



  int MaxMove(int gameBoard[9], int playerTurn)
  {
     //if (checkWin(gameBoard) != PLAYING) { return evaluate_board(gameBoard); };
     int pos_val = evaluate_position(gameBoard, playerTurn);
     if (pos_val != -1) return pos_val;

      int bestScore = -infinity;
      list<int> movesList;
      generate_moves(gameBoard, movesList);

      while (!movesList.empty())
      {
         gameBoard[movesList.front()] = playerTurn;
         int score = MinMove(gameBoard, playerTurn*-1);
         if (score > bestScore)
         {
             bestScore = score;
         }
         gameBoard[movesList.front()] = 0;
         movesList.pop_front();
       }

      return bestScore;
  }


  int MiniMax(int gameBoard[9], int playerTurn)
  {
     int bestScore = -infinity;
     int index = 0;
     list<int> movesList;
     vector<int> bestMoves;
     generate_moves(gameBoard, movesList);

     while (!movesList.empty())
     {
          gameBoard[movesList.front()] = playerTurn;
          int score = MinMove(gameBoard, playerTurn);
          if (score > bestScore)
          {
             bestScore = score;
             bestMoves.clear();
             bestMoves.push_back(movesList.front());
          }
          else if (score == bestScore)
          {
             bestMoves.push_back(movesList.front());
          }
        gameBoard[movesList.front()] = 0;
        movesList.pop_front();
     }

    index = bestMoves.size();
    if (index > 0) {
        time_t secs;
        time(&secs);
        srand((uint32_t)secs);
        index = rand() % index;
     }

    return bestMoves[index];
  }

我用 C++ 创建了一个井字游戏程序，并尝试使用穷举搜索树实现 MiniMax 算法。这些是我使用 wiki 并在一些网站的帮助下编写的功能。但人工智能就是不能正常工作，有时根本没有轮到它。

有人可以看看，如果逻辑有问题，请指出？

我认为它是这样工作的：

Minimax ：此函数以非常大的 -ve 数字作为最佳得分开始，目标是最大化该数字。它调用minMove 函数。如果新分数 > 最佳分数，那么最佳分数 = 新分数...

MinMove：此函数评估游戏板。如果游戏结束，则返回 -infinity 或 +infinity 取决于谁赢了。如果游戏正在进行，则此函数以 max +infinity 值作为最佳分数，目标是尽可能地最小化它。它在轮到对手玩家时调用MaxMove。（因为玩家交替轮流）。如果分数

MaxMove：此函数评估游戏板。如果游戏结束，则返回 -infinity 或 +infinity 取决于谁赢了。如果游戏正在进行，则此函数以最小 -infinity 值作为最佳得分，目标是尽可能最大化它。它在轮到对手玩家时调用MinMove。（因为玩家交替轮流）。如果分数 > 最佳分数，则最佳分数 = 分数。 ...

Minmove 和 MaxMove 相互递归调用，MaxMove 最大化值，MinMove 最小化它。最后，它返回最好的移动列表。

如果有超过 1 个最佳移动，则随机选择其中一个作为计算机的移动。

【问题讨论】：

[OT]：你应该只调用一次srand((uint32_t)secs);（主要）。
除了不每次都播种prng，更喜欢<random>而不是Crand()

标签： c++ brute-force minimax

【解决方案1】：

在 MiniMax 中，MinMove(gameBoard, playerTurn) 应该是 MinMove(gameBoard, -playerTurn)，就像在 MaxMove 中一样。

当您使用MinMove 和MaxMove 时，您的评估函数应该是绝对的。我的意思是+infinity 代表XWIN 和-infinity 为OWIN。所以MinMove只能在player == -1时使用，MaxMove在player == 1时才能使用，这样参数就没有用了。所以MiniMax只能被player == 1使用。

我对您的代码进行了一些更改，并且可以正常工作 (https://ideone.com/Ihy1SR)。

【讨论】：

您发布的代码有效。你的解释帮助我理解了。只是一个问题......为什么MaxMove用来返回-ve？是因为 O 是一个最小化玩家，它总是会评估 -ve 并且我们需要一个正分数来与 bestScore（在这种情况下是 -infinity）进行比较？
@ChandanPednekar：对不起，我不明白你的问题，你能澄清一下吗？