-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagent.py
324 lines (256 loc) · 11.8 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
"""
An AI player for Othello.
"""
# gotta make moves within 10 seconds
import random
import sys
import time
import math
import heapq
# need a dctionary that maps a board state to its minimax value
# the key is the board and the value is the
cache = {}
# You can use the functions in othello_shared to write your AI
from othello_shared import find_lines, get_possible_moves, get_score, play_move
def eprint(*args, **kwargs): #you can use this for debugging, as it will print to sterr and not stdout
print(*args, file=sys.stderr, **kwargs)
# Method to compute utility value of terminal state
def compute_utility(board, color):
score = get_score(board) # (# of dark disks, # of light disks), color 1 for dark and 2 for light
final = (score[0] - score[1]) if color == 1 else (score[1] - score[0])
return final
# Better heuristic value of board
def compute_heuristic(board, color): #not implemented, optional
#IMPLEMENT
return 0 #change this!
############ MINIMAX ###############################
# returns lowest possible utility
# TODO: add cache to minimax
# TODO: should compute utility be for the opponent or for the color
def minimax_min_node(board, color, limit, caching = 0):
opponent = 1 if color == 2 else 2
moves = get_possible_moves(board, opponent) # get moves the opponent can make
min_util = math.inf
best_move = None
sorted_moves = []
if caching and board in cache:
return cache[board]
if not moves or limit == 0:
return (None, compute_utility(board, color)) # score of current player
if not moves or limit == 0:
return (None, compute_utility(board,color))
for move in moves:
board_after_play = play_move(board, opponent, move[0], move[1])
util = compute_utility(board_after_play, color)
sorted_moves.append((board_after_play, move, util))
for item in sorted_moves:
board_after_play = item[0]
result = minimax_max_node(board_after_play, color, limit - 1, caching)
utility = result[1]
if utility < min_util:
best_move = item[1]
min_util = utility
return (best_move, min_util)
# returns highest possible utility
def minimax_max_node(board, color, limit, caching = 0):
opponent = 1 if color == 2 else 2
moves = get_possible_moves(board, color)
max_util = -math.inf
best_move = None
sorted_moves = []
if caching and board in cache:
return cache[board]
if not moves or limit == 0:
return (None, compute_utility(board, color))
for move in moves:
board_after_play = play_move(board, color, move[0], move[1])
util = compute_utility(board_after_play, color)
sorted_moves.append((board_after_play, move, util))
for item in sorted_moves:
board_after_play = item[0]
# board_after_play = play_move(board, color, move[0], move[1])
result = minimax_min_node(board_after_play, color, limit - 1, caching)
utility = result[1]
if utility > max_util:
best_move = item[1]
max_util = utility
return (best_move, max_util)
def select_move_minimax(board, color, limit, caching = 0):
"""
Given a board and a player color, decide on a move.
The return value is a tuple of integers (i,j), where
i is the column and j is the row on the board.
Note that other parameters are accepted by this function:
If limit is a positive integer, your code should enfoce a depth limit that is equal to the value of the parameter.
Search only to nodes at a depth-limit equal to the limit. If nodes at this level are non-terminal return a heuristic
value (see compute_utility)
If caching is ON (i.e. 1), use state caching to reduce the number of state evaluations.
If caching is OFF (i.e. 0), do NOT use state caching to reduce the number of state evaluations.
"""
result = minimax_max_node(board, color, limit, caching)
return result[0]
############ ALPHA-BETA PRUNING #####################
# should my cache keep track of who the player is
def alphabeta_min_node(board, color, alpha, beta, limit, caching = 0, ordering = 0):
opponent = 1 if color == 2 else 2
best_move = None
min_util = math.inf
moves = get_possible_moves(board, opponent)
sorted_moves = []
unsorted_moves = []
select_list = []
if caching and board in cache:
return cache[board]
if not moves or limit == 0:
return (None, compute_utility(board,color))
# SORT
for move in moves:
board_after_play = play_move(board, opponent, move[0], move[1])
util = compute_utility(board_after_play, color)
unsorted_moves.append((util, board_after_play, move))
heapq.heappush(sorted_moves, (util, board_after_play, move))
if ordering:
select_list = sorted_moves
else:
select_list = unsorted_moves
# sorted_moves.sort(key = lambda x: x[2]) # sort by utility value
# CACHE
for item in select_list:
board_after_play = item[1]
result = alphabeta_max_node(board_after_play, color, alpha, beta, limit - 1, caching, ordering)
if result[1] < min_util:
best_move = item[2]
min_util = result[1]
if min_util <= alpha:
if caching: cache[board] = (best_move, min_util)
return (best_move, min_util)
beta = min(beta, min_util)
if caching: cache[board] = (best_move, min_util)
return (best_move, min_util)
# maximizer
# TODO: maybe change caching so that it stores the max min shit
def alphabeta_max_node(board, color, alpha, beta, limit, caching = 0, ordering = 0):
opponent = 1 if color == 2 else 2
best_move = None
max_util = -math.inf
moves = get_possible_moves(board, color)
sorted_moves = []
unsorted_moves = []
select_list = []
if caching and board in cache:
return cache[board]
if not moves or limit == 0:
return (None, compute_utility(board, color))
for move in moves:
board_after_play = play_move(board, color, move[0], move[1])
util = compute_utility(board_after_play, color)
# seperate into sorted and unsorted
unsorted_moves.append((util, board_after_play, move))
heapq.heappush(sorted_moves, (-util, board_after_play, move))
if ordering:
select_list = sorted_moves
else:
select_list = unsorted_moves
# if ordering:
# sorted_moves.sort(reverse = True, key = lambda x: x[2]) # sort by utility value
for item in select_list:
board_after_play = item[1]
result = alphabeta_min_node(board_after_play, color, alpha, beta, limit - 1, caching, ordering)
if result[1] > max_util:
best_move = item[2]
max_util = result[1]
if max_util >= beta:
if caching: cache[board] = (best_move, max_util)
return (best_move, max_util)
alpha = max(alpha, max_util)
if caching: cache[board] = (best_move, max_util)
return (best_move, max_util)
def select_move_alphabeta(board, color, limit, caching = 0, ordering = 0):
"""
Given a board and a player color, decide on a move.
The return value is a tuple of integers (i,j), where
i is the column and j is the row on the board.
Note that other parameters are accepted by this function:
If limit is a positive integer, your code should enfoce a depth limit that is equal to the value of the parameter.
Search only to nodes at a depth-limit equal to the limit. If nodes at this level are non-terminal return a heuristic
value (see compute_utility)
If caching is ON (i.e. 1), use state caching to reduce the number of state evaluations.
If caching is OFF (i.e. 0), do NOT use state caching to reduce the number of state evaluations.
If ordering is ON (i.e. 1), use node ordering to expedite pruning and reduce the number of state evaluations.
If ordering is OFF (i.e. 0), do NOT use node ordering to expedite pruning and reduce the number of state evaluations.
"""
alpha = -math.inf
beta = math.inf
result = alphabeta_max_node(board, color, alpha, beta, limit, caching, ordering)
cache.clear()
return result[0]
####################################################
def run_ai():
"""
This function establishes communication with the game manager.
It first introduces itself and receives its color.
Then it repeatedly receives the current score and current board state
until the game is over.
"""
print("Othello AI") # First line is the name of this AI
arguments = input().split(",")
color = int(arguments[0]) #Player color: 1 for dark (goes first), 2 for light.
limit = int(arguments[1]) #Depth limit
minimax = int(arguments[2]) #Minimax or alpha beta
caching = int(arguments[3]) #Caching
ordering = int(arguments[4]) #Node-ordering (for alpha-beta only)
if (minimax == 1): eprint("Running MINIMAX")
else: eprint("Running ALPHA-BETA")
if (caching == 1): eprint("State Caching is ON")
else: eprint("State Caching is OFF")
if (ordering == 1): eprint("Node Ordering is ON")
else: eprint("Node Ordering is OFF")
if (limit == -1): eprint("Depth Limit is OFF")
else: eprint("Depth Limit is ", limit)
if (minimax == 1 and ordering == 1): eprint("Node Ordering should have no impact on Minimax")
while True: # This is the main loop
# Read in the current game status, for example:
# "SCORE 2 2" or "FINAL 33 31" if the game is over.
# The first number is the score for player 1 (dark), the second for player 2 (light)
next_input = input()
status, dark_score_s, light_score_s = next_input.strip().split()
dark_score = int(dark_score_s)
light_score = int(light_score_s)
if status == "FINAL": # Game is over.
print
else:
board = eval(input()) # Read in the input and turn it into a Python
# object. The format is a list of rows. The
# squares in each row are represented by
# 0 : empty square
# 1 : dark disk (player 1)
# 2 : light disk (player 2)
# Select the move and send it to the manager
if (minimax == 1): #run this if the minimax flag is given
movei, movej = select_move_minimax(board, color, limit, caching)
else: #else run alphabeta
movei, movej = select_move_alphabeta(board, color, limit, caching, ordering)
print("{} {}".format(movei, movej))
if __name__ == "__main__":
run_ai()
# def alphabeta_max_node_OG(board, color, alpha, beta, limit, caching = 0, ordering = 0):
# opponent = 1 if color == 2 else 2
# best_move = None
# max_util = -math.inf
# moves = get_possible_moves(board, color)
# if not moves or limit == 0:
# return (None, compute_utility(board,color))
# for move in moves:
# board_after_play = play_move(board, color, move[0], move[1])
# if caching and board_after_play in cache:
# result = cache[board_after_play]
# else:
# result = alphabeta_min_node(board_after_play, color, alpha, beta, limit - 1, caching, ordering)
# if caching: cache[board_after_play] = result
# if result[1] > max_util:
# best_move = move
# max_util = result[1]
# if max_util >= beta:
# return (best_move, max_util)
# alpha = max(alpha, max_util)
# return (best_move, max_util)