'Why does utility computation in is_terminal include the # of blanks left in a terminal state? What does mean with performance measure for the agent?

Consider:

import numpy as np

def actions(state):
    # Returns the indices of all blank spaces on the board (represented by '.')
    return [i for i,s in np.ndenumerate(state) if s=='.']


def result(state, action, player):
    # Returns a new state with the 'action' space taken by 'player'
    new_state = state.copy()   # Don't modify the passed-in array!
    new_state[action] = player
    return new_state


def is_terminal(state, k):
    # Test whether 'state' is a terminal state or not
    # Also return the final game score if yes
    num_blanks = np.count_nonzero(state=='.')

    # If X has k-in-a-row from any position, this is a terminal state
    X_indices = [i for i,s in np.ndenumerate(state) if s=='X']
    if has_k_in_a_row(X_indices, k):
        return True, 1+num_blanks

    # If O has k-in-a-row from any position, this is a terminal state
    O_indices = [i for i,s in np.ndenumerate(state) if s=='O']
    if has_k_in_a_row(O_indices, k):
        return True, -(1+num_blanks)

    # If there are no blanks left, the game ends with a tie
    if num_blanks == 0:
        return True, 0

    # Otherwise, the game is not over
    return False, None


#--------------------------------------------
# Helper functions used by is_terminal() (above)

def has_k_in_a_row(indices, k):
    # Test whether there are k consecutive indices in a row in the given list of indices

    # Get the indices as a set, for efficient subset testing
    index_set = set(indices)

    # For each starting position...
    for start_pos in indices:
        # Determine the length-k sequence of indices (starting at the current position)
        # in each of four possible directions
        winning_sequences = sequences(start_pos, k)

        # If we have any of these sequences covered, we have 'k in a row'
        if any([seq.issubset(index_set) for seq in winning_sequences]):
            return True

    # If we get here, we don't have 'k in a row'
    return False


def sequences(start_pos, k):
    # Return the 4 sets of k indices 'in a row' starting from index start_pos
    # A win can be down, across, diagonally down, or diagonally up
    across = set([(start_pos[0], start_pos[1]+j) for j in range(k)])
    down = set([(start_pos[0]+j, start_pos[1]) for j in range(k)])
    diagdown = set([(start_pos[0]+j, start_pos[1]+j) for j in range(k)])
    diagup = set([(start_pos[0]+j, start_pos[1]-j) for j in range(k)])

    # Return all 4 sets of indices
    return across, down, diagdown, diagup

Review the code above, and make sure you understand it, especially actions, result, and is_terminal.

(Note: we could have gone with the standard ±1 utility for an X win/loss, but the extra "bonus" provides some useful information.)



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source