Skip to content


Merge pull request #1 from ygreif/initial_commit
Browse files Browse the repository at this point in the history
El Farol env, equilibria, Erev Roth Agent
  • Loading branch information
ygreif authored Oct 2, 2016
2 parents 6674819 + e4d4865 commit af14c8f
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 0 deletions.
3 changes: 3 additions & 0 deletions
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
# gym-el-farol
The El Farol Bar problem is a framework to understand bounded rationality in economics. In the problem 100 people want to visit a bar every Thursday, but won't be happy if it's too crowded. Therefore, they're happiest if they visit the bar, and less than 60 other people do so. Are intermediately happy if they stay home, and are unhappy if they visit the bar and more than 60 others do so.

If agents are perfectly rational there are a huge number of nash equilibria (100 choose 60 pure equilibria alone), which is implausible. Instead, we need to explore what equilbria occur when agents use different cognition models. For instance, if agents use q-learning they achieve a pure nash equilibria. If they use learning on a set of naive predition models a mixed nash equilbria is achieved. See [Whitehead]( for more information.
8 changes: 8 additions & 0 deletions gym_el_farol/
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from gym.envs.registration import register

2 changes: 2 additions & 0 deletions gym_el_farol/envs/
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from gym_el_farol.envs.el_farol_env import ElFarolEnv
from gym_el_farol.envs.equilibria import FuzzyPureNash
42 changes: 42 additions & 0 deletions gym_el_farol/envs/
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from __future__ import print_function

from gym import Env
from gym.spaces import Discrete

class ElFarolEnv(Env):

metadata = {'render.modes': ['human']}

def __init__(self, n_agents=100, threshold=60, g=10, s=5, b=1):
if g < s or s < b:
raise Exception("rewards must be ordered g > s > b")

self.n_agents = n_agents
self.action_space = Discrete(2)
# observe 0 if did not attend, otherwise observe number of agents who atteneded
self.observation_space = Discrete(n_agents)
self.reward_range = (b, g)
def reward_func(action, n_attended):
if action == 0:
return s
elif n_attended <= threshold:
return g
return b
self.reward_func = reward_func
self.prev_action = [self.action_space.sample() for _ in range(n_agents)]

def _step(self, action):
n_attended = sum(action)
observation = [n_attended if a else 0 for a in action]
reward = [self.reward_func(a, n_attended) for a in action]

self.prev_action = action
return observation, reward, False, ()

def _reset(self):

def _render(self, mode='human', close=False):
if mode == 'human':
17 changes: 17 additions & 0 deletions gym_el_farol/envs/
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from collections import defaultdict

class FuzzyPureNash(object):
def __init__(self, threshold=.95):
self.action_counts_by_agent = defaultdict(lambda: defaultdict(lambda: int()))
self.threshold = threshold

def step(self, action):
for agent, a in enumerate(action):
self.action_counts_by_agent[agent][a] += 1

def in_equilibria(self):
for action_counts in self.action_counts_by_agent.values():
print action_counts
if max(action_counts.values()) / float(sum(action_counts.values())) < self.threshold:
return False
return True
66 changes: 66 additions & 0 deletions scripts/
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import random
from collections import defaultdict

from gym.spaces import Discrete, Tuple
from gym_el_farol.envs import ElFarolEnv, FuzzyPureNash

class ErevRothAgent(object):
def __init__(self, observation_space, action_space, **userconfig):
if not isinstance(observation_space, Discrete):
raise Exception('Observation space {} incompatible with {}. (Only supports Discrete observation spaces.)'.format(observation_space, self))
if not isinstance(action_space, Discrete):
raise Exception('Action space {} incompatible with {}. (Only supports Discrete action spaces.)'.format(action_space, self))
self.observation_space = observation_space
self.action_space = action_space
self.action_n = action_space.n
self.config = {
"init_mean" : 1.0, # Initialize Q values with this mean
"init_std" : 0.0, # Initialize Q values with this standard deviation
"learning_rate" : 1.0}
self.q = defaultdict(lambda: random.normalvariate(self.config["init_mean"], self.config["init_std"]))

def act(self):
# replace with numpy
total = sum([self.q[a] for a in range(0, self.action_space.n)])
r = random.random()
cum = 0
for a, p in self.q.items():
cum += p / total
if r < cum:
self.prev_action = a
return a
raise Exception("No value selected", p)

def learn(self, reward):
self.q[self.prev_action] += reward * self.config["learning_rate"]
for key in self.q:
self.q[key] *= .99

def iterate(agents, env):
actions = [a.act() for a in agents]
obs, reward, _, _ = env.step(actions)
for agent, reward in zip(agents, reward):
return actions

def iterations_to_equilbira(agents, env):
nash = FuzzyPureNash()
for iter in range(0, 5000000):
if iter % 50 == 0 and iter > 0:
print iter
if nash.in_equilibria():
return iter
nash = FuzzyPureNash()
actions = iterate(agents, env)
for agent in agents:
print agent.q[0] / (agent.q[0] + agent.q[1])
return False

n_agents = 100
env = ElFarolEnv(n_agents=n_agents, threshold=5)
agents = []
for i in range(0, n_agents):
agents.append(ErevRothAgent(env.observation_space, env.action_space))
print iterations_to_equilbira(agents, env)
6 changes: 6 additions & 0 deletions
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from setuptools import setup

install_requires=['gym'] # And any other dependencies foo needs

0 comments on commit af14c8f

Please sign in to comment.