From eb7725055269158b5e3ed827c05293812c37636f Mon Sep 17 00:00:00 2001 From: Yaron Date: Sun, 2 Oct 2016 11:40:36 -0700 Subject: [PATCH] Fix whitespace --- gym_el_farol/envs/equilibria.py | 1 + scripts/erev_roth_agent.py | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/gym_el_farol/envs/equilibria.py b/gym_el_farol/envs/equilibria.py index c13c6ef..050b408 100644 --- a/gym_el_farol/envs/equilibria.py +++ b/gym_el_farol/envs/equilibria.py @@ -11,6 +11,7 @@ def step(self, action): def in_equilibria(self): for action_counts in self.action_counts_by_agent.values(): + print action_counts if max(action_counts.values()) / float(sum(action_counts.values())) < self.threshold: return False return True diff --git a/scripts/erev_roth_agent.py b/scripts/erev_roth_agent.py index c24e6a5..4bf881f 100644 --- a/scripts/erev_roth_agent.py +++ b/scripts/erev_roth_agent.py @@ -35,11 +35,12 @@ def act(self): def learn(self, reward): self.q[self.prev_action] += reward * self.config["learning_rate"] for key in self.q: - self.q[key] *= .9 + self.q[key] *= .99 def iterate(agents, env): actions = [a.act() for a in agents] obs, reward, _, _ = env.step(actions) + print actions, sum(actions) for agent, reward in zip(agents, reward): agent.learn(reward) return actions @@ -47,7 +48,7 @@ def iterate(agents, env): def iterations_to_equilbira(agents, env): nash = FuzzyPureNash() for iter in range(0, 5000000): - if iter % 100 == 0 and iter > 0: + if iter % 50 == 0 and iter > 0: print iter if nash.in_equilibria(): return iter @@ -57,8 +58,8 @@ def iterations_to_equilbira(agents, env): for agent in agents: print agent.q[0] / (agent.q[0] + agent.q[1]) return False - -n_agents = 20 + +n_agents = 100 env = ElFarolEnv(n_agents=n_agents, threshold=5) agents = [] for i in range(0, n_agents):