quarto_gen/quarto/client/ai.py

from __future__ import annotations

import math
import time
from typing import Optional, Tuple, List

from quarto.shared.game import (
    GameState,
    InitialMove,
    NormalMove,
    Player,
    generate_initial_moves,
    generate_normal_moves,
    detect_quarto,
)


class QuartoAI:
    """
    Time-bounded AI using iterative deepening alpha-beta search.
    Designed to avoid obvious blunders and always return a legal move.
    """

    def __init__(self) -> None:
        # Evaluation parameters
        self.win_value = 10_000
        self.lose_value = -10_000

    def choose_initial_piece(self, state: GameState, time_limit: float) -> InitialMove:
        moves = generate_initial_moves(state)
        if not moves:
            raise RuntimeError("No initial moves available")
        # For initial piece selection, use a simple heuristic: avoid obviously strong pieces
        # like "balanced" ones (7, 8) – but this is minor; just pick first for now.
        return moves[0]

    def choose_normal_move(self, state: GameState, time_limit: float) -> NormalMove:
        legal_moves = generate_normal_moves(state)
        if not legal_moves:
            raise RuntimeError("No legal moves available")

        start = time.time()
        deadline = start + max(0.1, time_limit - 0.2)

        best_move = legal_moves[0]
        best_val = -math.inf

        depth = 1
        while True:
            if time.time() >= deadline:
                break
            val, move = self._search_root(state, depth, deadline)
            if move is not None:
                best_move = move
                best_val = val
            depth += 1
            # Early stopping if decisive win found
            if best_val >= self.win_value - 1:
                break

        return best_move

    def _search_root(
        self, state: GameState, depth: int, deadline: float
    ) -> Tuple[float, Optional[NormalMove]]:
        legal_moves = generate_normal_moves(state)
        if not legal_moves:
            return self._evaluate(state), None

        best_val = -math.inf
        best_move: Optional[NormalMove] = None
        alpha = -math.inf
        beta = math.inf

        for move in legal_moves:
            if time.time() >= deadline:
                break
            # Clone state for simulation
            sim_state = self._clone_state(state)
            self._apply_simulated_move(sim_state, move)
            val = -self._alphabeta(sim_state, depth - 1, -beta, -alpha, deadline)
            if val > best_val:
                best_val = val
                best_move = move
            if val > alpha:
                alpha = val
            if alpha >= beta:
                break

        return best_val, best_move

    def _alphabeta(
        self,
        state: GameState,
        depth: int,
        alpha: float,
        beta: float,
        deadline: float,
    ) -> float:
        if time.time() >= deadline:
            # Return heuristic evaluation at cutoff
            return self._evaluate(state)

        if state.game_over or depth == 0:
            return self._evaluate(state)

        legal_moves = generate_normal_moves(state)
        if not legal_moves:
            return self._evaluate(state)

        value = -math.inf
        for move in legal_moves:
            if time.time() >= deadline:
                break
            sim_state = self._clone_state(state)
            self._apply_simulated_move(sim_state, move)
            score = -self._alphabeta(sim_state, depth - 1, -beta, -alpha, deadline)
            if score > value:
                value = score
            if score > alpha:
                alpha = score
            if alpha >= beta:
                break
        return value

    def _evaluate(self, state: GameState) -> float:
        """
        Static evaluation from perspective of current_player.
        """
        if state.game_over:
            if state.winner is None:
                return 0.0
            return self.win_value if state.winner == state.current_player else self.lose_value

        # Heuristic: count potential lines for both players
        board = state.board
        my_score = 0
        opp_score = 0

        from quarto.shared.game import LINES

        for line in LINES:
            pieces = [board.squares[i] for i in line]
            if all(p is not None for p in pieces):
                # Completed line; check if it is a Quarto
                if detect_quarto(board):
                    # If Quarto exists and nobody claimed, it's neutral until claimed.
                    # Slight bonus to current player for potential claim.
                    my_score += 20
                continue
            # For partially filled lines, count how many shared attributes are still possible.
            occupied = [p for p in pieces if p is not None]
            if not occupied:
                continue
            common_bits = 0b1111
            p0 = occupied[0]
            for p in occupied[1:]:
                common_bits &= ~(p ^ p0)
            if common_bits:
                # More shared bits and more empty slots => more potential
                empty_slots = sum(1 for p in pieces if p is None)
                my_score += (bin(common_bits).count("1") * empty_slots)

        # Very rough balancing: opponent score approximated similarly by symmetry
        # but we don't track explicit opponent perspective, so we just scale.
        return float(my_score - opp_score)

    def _clone_state(self, state: GameState) -> GameState:
        new_state = GameState()
        new_state.board = state.board.clone()
        new_state.current_player = state.current_player
        new_state.assigned_piece = state.assigned_piece
        new_state.remaining_pieces = set(state.remaining_pieces)
        new_state.move_count = state.move_count
        new_state.game_over = state.game_over
        new_state.winner = state.winner
        new_state.last_move_was_claim = state.last_move_was_claim
        new_state.last_move_was_draw_marker = state.last_move_was_draw_marker
        return new_state

    def _apply_simulated_move(self, state: GameState, move: NormalMove) -> None:
        """
        Apply a move in simulation only; we don't need full validation here
        because moves are generated from generate_normal_moves.
        """
        from quarto.shared.game import _apply_normal_move_no_rules, detect_quarto

        # Apply the move structure
        _apply_normal_move_no_rules(state, move)

        # Post-apply rules for claims and draw markers in simulation
        if move.claim_quarto:
            # Evaluate claim after placement
            has_quarto = detect_quarto(state.board)
            if has_quarto:
                state.game_over = True
                state.winner = state.current_player
            else:
                state.game_over = True
                state.winner = state.current_player.other()
        elif move.final_pass:
            state.game_over = True
            state.winner = None
        else:
            # If board full and no pieces left, it's draw
            if state.board.is_full() and not state.remaining_pieces:
                state.game_over = True
                state.winner = None