-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
275 lines (251 loc) · 10.4 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
import logging
import os
from collections import defaultdict
import itertools
import random
# TODO Update constraints
# If we knew that there was an A somewhere, but no other A
# If we find the position, we can move this "allowed" A into the "fixed" category
def is_good_candidate(
word,
fixed_letters: dict,
allowed_letters: list,
forbidden_letters: list,
frequencies: dict, # TODO Add frequency
):
# Example of edge case : we could have a 'P' in 1st position,
# another 'P' in `allowed_letters`,
# and yet another 'P' in `forbidden_letters`,
# that would rule out the word 'PUPPY'
# Other case : We tried "TEPEE" and we got Green Green Grey Yellow Grey
# We can rule out "TENET", because we know that the 4th letter is not an E
# So we need to remember the position of allowed letters
# Check frequencies
for l in set(word):
# I'm using a set since we don't need to count the letters twice
if word.count(l) not in frequencies[l]:
return False
# Check fixed letters
if any(word[i] != l for i, l in fixed_letters.items()):
return False
# Discard the fixed letters, we already checked them
trimmed = {i: l for i, l in enumerate(word) if i not in fixed_letters}
# Check that the other allowed letters are present, but not in the position that we already tried
for pos, letter in allowed_letters:
if trimmed[pos] == letter or letter not in trimmed.values():
return False
# If we computed the frequencies right, we only have to check that the forbidden
# letters are not in the specified places
return all(trimmed[i] != l for i, l in forbidden_letters)
def filter_candidates(
corpus: list,
fixed_letters: dict,
allowed_letters: list,
forbidden_letters: list,
frequencies: dict,
):
"""Return a smaller corpus of all the words that fit the given constraints"""
return [
w
for w in corpus
if is_good_candidate(
w, fixed_letters, allowed_letters, forbidden_letters, frequencies
)
]
def split_words_by_size(filepath):
with open(filepath, "r") as fp:
data = fp.read()
logging.debug(f"Read {len(data.splitlines())} lines.")
size_range = range(3, 6)
split_data = defaultdict(set)
for word in data.splitlines():
if len(word) in size_range:
split_data[len(word)].add(word)
for n in size_range:
start, ext = os.path.splitext(filepath)
with open(f"{start}_{n}{ext}", "w") as file_out:
file_out.write("\n".join(split_data[n]))
def biggest_corpus(guess, corpus):
# TODO add fixed_letters, allowed_letters, forbidden_letters:
word_size = len(guess)
best_corpus = None
best_fixed, best_allowed, best_forbidden = None, None, None
set_indices = list(itertools.product(range(3), repeat=word_size))
for comb in set_indices:
# constraints contains [fixed_letters, allowed_letters and forbidden_letters]
constraints = [dict() for _ in range(3)]
for i, letter in enumerate(guess):
constraints[comb[i]][i] = letter
fixed = constraints[0]
allowed = list(constraints[1].items())
forbidden = list(constraints[2].items())
frequencies = defaultdict(lambda: range(0, len(guess)))
for letter in set(guess):
f_min = list(fixed.values()).count(letter)
f_min += [l for (_, l) in allowed].count(letter)
if letter in [l for (_, l) in forbidden]:
f_max = f_min
else:
f_max = 5
frequencies[letter] = range(f_min, f_max + 1)
filtered_corpus = filter_candidates(
corpus, fixed, allowed, forbidden, frequencies
)
if (
not best_corpus
or len(filtered_corpus) > len(best_corpus)
or (
len(filtered_corpus) == (len(best_corpus))
and (len(best_fixed) > len(fixed))
)
):
best_corpus = filtered_corpus
best_fixed = fixed
best_allowed = allowed
best_forbidden = forbidden
return best_corpus, best_fixed, best_allowed, best_forbidden
def all_tests():
# Tests :
freqs = defaultdict(lambda: range(0, 6))
# Two fixed letters:
assert is_good_candidate("tenet", {0: "t", 1: "e"}, [], [], freqs)
# One allowed letter, not in the word
assert not is_good_candidate("tenet", {0: "z"}, [], [], freqs)
# One allowed letter, in the word
assert is_good_candidate("tenet", {}, [(0, "e")], [], freqs)
# One allowed letter, in the word but in the wrong place
# The test should fail because "TENET" cannot have a yellow E in the second position
assert not is_good_candidate("tenet", {}, [(1, "e")], [], freqs)
# One fixed letter, that is not repeated
assert is_good_candidate("tenet", {2: "n"}, [], [(0, "n")], freqs)
# One forbidden letter, that is in the word
assert not is_good_candidate("tenet", {}, [], [], {**freqs, "n": range(1)})
# One fixed E, another E somewhere else, but no other E
assert is_good_candidate(
"tenet", {1: "e"}, [(0, "e")], [(4, "e")], {**freqs, "e": range(2, 3)}
)
# One fixed E, and the word should not contain another E
assert not is_good_candidate(
"tenet", {1: "e"}, [], [(4, "e")], {**freqs, "e": range(1, 2)}
) # Tests :
freqs = defaultdict(lambda: range(0, 6))
# Two fixed letters:
assert is_good_candidate("tenet", {0: "t", 1: "e"}, [], [], freqs)
# One allowed letter, not in the word
assert not is_good_candidate("tenet", {0: "z"}, [], [], freqs)
# One allowed letter, in the word
assert is_good_candidate("tenet", {}, [(0, "e")], [], freqs)
# One allowed letter, in the word but in the wrong place
# The test should fail because "TENET" cannot have a yellow E in the second position
assert not is_good_candidate("tenet", {}, [(1, "e")], [], freqs)
# One fixed letter, that is not repeated
assert is_good_candidate("tenet", {2: "n"}, [], [(0, "n")], freqs)
# One forbidden letter, that is in the word
assert not is_good_candidate("tenet", {}, [], [], {**freqs, "n": range(1)})
# One fixed E, another E somewhere else, but no other E
assert is_good_candidate(
"tenet", {1: "e"}, [(0, "e")], [(4, "e")], {**freqs, "e": range(2, 3)}
)
# One fixed E, and the word should not contain another E
assert not is_good_candidate(
"tenet", {1: "e"}, [], [(4, "e")], {**freqs, "e": range(1, 2)}
)
# Testing the filtering of corpus
smol_corpus = ["bled", "sled", "fled"]
# Checking all words have an L as their 2nd letter
assert filter_candidates(smol_corpus, {1: "l"}, [], [], freqs) == smol_corpus
# Checking no words have an L as their 3rd letter
assert filter_candidates(smol_corpus, {2: "l"}, [], [], freqs) == []
# Checking only one word starts with a B
assert filter_candidates(smol_corpus, {0: "b"}, [], [], freqs) == ["bled"]
# Checking that no word has a B elsewhere
assert filter_candidates(smol_corpus, dict(), [(0, "b")], [], freqs) == []
# Checking no words contain a Z
assert (
filter_candidates(
smol_corpus, dict(), [(0, "z")], [], {**freqs, "z": range(1, 6)}
)
== []
)
# Checking two of the three words do not contain a B as their 2nd letter
assert filter_candidates(
smol_corpus, dict(), [], [(1, "b")], {**freqs, "b": range(0, 1)}
) == ["sled", "fled"]
# Checking no word fit the "no E" constraint
assert (
filter_candidates(
smol_corpus, dict(), [], [(1, "e")], {**freqs, "e": range(0, 1)}
)
== []
)
def auto_play(n_letters=3):
words = []
with open(f"data/words_en_{n_letters}.txt", "r") as f_in:
words = f_in.read().splitlines()
print(f"[Computer] I'm starting with {len(words)} possible words, good luck")
guess = "TRACE"
while len(words) > 1:
print(f"[Player ] I'm playing: {guess}")
candidates, fix, allow, forbid = biggest_corpus(guess, words)
char_green = "🟩"
char_grey = "⬛"
char_yellow = "🟨"
print("[Computer] Result: ", end="")
for i in range(n_letters):
if i in fix.keys():
print(char_green, end="")
elif i in [x for x, _ in allow]:
print(char_yellow, end="")
else:
print(char_grey, end="")
print("")
if len(words) > 1:
print(f"[Computer] I still have {len(candidates)} choices left.")
words = candidates
guess = random.choice(words)
print("[Computer] Well played...")
def load_corpus(filepath):
if not os.path.isabs(filepath):
root = os.path.split(__file__)[0]
filepath = os.path.join(root, filepath)
with open(filepath, "r") as fp:
return fp.read().splitlines()
def play_round(guess, words):
candidates, fix, allow, forbid = biggest_corpus(guess, words)
char_green = "🟩"
char_grey = "⬛"
char_yellow = "🟨"
display = ""
for i in range(len(guess)):
if i in fix.keys():
display += char_green
elif i in [x for x, _ in allow]:
display += char_yellow
else:
display += char_grey
return display, candidates
def play():
n_letters = 5
words = load_corpus(f"data/words_en_{n_letters}.txt")
print(f"[Computer] I'm starting with {len(words)} possible words, good luck")
while len(words) > 1:
guess = input("Your guess: ")
print(f"[Player ] I'm playing: {guess}")
display, words = play_round(guess, words)
print(f"[Computer] {display}")
if len(words) > 1:
print(f"[Computer] I still have {len(words)} choices left.")
print("[Computer] Well played...")
if __name__ == "__main__":
logging.getLogger().setLevel(logging.DEBUG)
all_tests()
# auto_play(n_letters=5)
play()
# Good examples : EERIE, TEPEE
# If the word is "EERIE", and we tried "TEPEE", we know that there are 3 'E'
# If we tried "EERIE" and we got Allowed Allowed Forbidden Forbidden Forbidden,
# We know that there are only 2 'E'
# The frequency for an unkown letter is 0-5
# The lower bound is the max number of green+yellow squares we got in a single guess
# The exact frequency is the number of green+yellow squares the moment we got a grey square.
# Therefore, until we get a grey square, we don't know the upper limit of a letter frequency