|
- # Third party code
- #
- # The following code are copied or modified from:
- # https://github.com/suragnair/alpha-zero-general
-
- from tqdm import tqdm
- from parl.utils import logger
-
-
- class Arena():
- """
- An Arena class where any 2 agents can be pit against each other.
- """
-
- def __init__(self, player1, player2, game):
- """
- Input:
- player 1,2: two functions that takes board as input, return action
- game: Game object
-
- see othello/OthelloPlayers.py for an example. See pit.py for pitting
- human players/other baselines with each other.
- """
- self.player1 = player1
- self.player2 = player2
- self.game = game
-
- def playGame(self, verbose=True):
- """
- Executes one episode of a game.
-
- Returns:
- either
- winner: player who won the game (1 if player1, -1 if player2)
- or
- draw result returned from the game that is neither 1, -1, nor 0.
- """
- players = [self.player2, None, self.player1]
- curPlayer = 1
- board = self.game.getInitBoard()
- it = 0
- while self.game.getGameEnded(board, curPlayer) == 0:
- it += 1
- if verbose:
- # assert self.display
- print("Turn ", str(it), "Player ", str(curPlayer))
- # self.display(board)
- self.game.display(board)
- action = players[curPlayer + 1](self.game.getCanonicalForm(
- board, curPlayer))
-
- valids = self.game.getValidMoves(
- self.game.getCanonicalForm(board, curPlayer), 1)
-
- if valids[action] == 0:
- logger.error('Action {} is not valid!'.format(action))
- logger.debug('valids = {}'.format(valids))
- assert valids[action] > 0
- board, curPlayer = self.game.getNextState(board, curPlayer, action)
- if verbose:
- # assert self.display
- print("Game over: Turn ", str(it), "Result ",
- str(self.game.getGameEnded(board, 1)))
- # self.display(board)
- self.game.display(board)
- return curPlayer * self.game.getGameEnded(board, curPlayer)
-
- def playGames(self, num, verbose=True):
- """
- Plays num games in which player1 starts num/2 games and player2 starts
- num/2 games.
-
- Returns:
- oneWon: games won by player1
- twoWon: games won by player2
- draws: games won by nobody
- """
-
- num = int(num / 2)
- oneWon = 0
- twoWon = 0
- draws = 0
- for _ in tqdm(range(num), desc="Arena.playGames (1)"):
- gameResult = self.playGame(verbose=verbose)
- if gameResult == 1:
- oneWon += 1
- elif gameResult == -1:
- twoWon += 1
- else:
- draws += 1
-
- self.player1, self.player2 = self.player2, self.player1
-
- for _ in tqdm(range(num), desc="Arena.playGames (2)"):
- gameResult = self.playGame(verbose=verbose)
- if gameResult == -1:
- oneWon += 1
- elif gameResult == 1:
- twoWon += 1
- else:
- draws += 1
-
- return oneWon, twoWon, draws
|