Merge pull request #1 from ygreif/initial_commit

El Farol env, equilibria, Erev Roth Agent
ygreif · Oct 2, 2016 · af14c8f · af14c8f
2 parents 6674819 + e4d4865
commit af14c8f
Show file tree

Hide file tree

Showing 7 changed files with 144 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -1 +1,4 @@
 # gym-el-farol
+The El Farol Bar problem is a framework to understand bounded rationality in economics.  In the problem 100 people want to visit a bar every Thursday, but won't be happy if it's too crowded.  Therefore, they're happiest if they visit the bar, and less than 60 other people do so.  Are intermediately happy if they stay home, and are unhappy if they visit the bar and more than 60 others do so.
+
+If agents are perfectly rational there are a huge number of nash equilibria (100 choose 60 pure equilibria alone), which is implausible.  Instead, we need to explore what equilbria occur when agents use different cognition models.  For instance, if agents use q-learning they achieve a pure nash equilibria.  If they use learning on a set of naive predition models a mixed nash equilbria is achieved.  See [Whitehead](http://www.econ.ed.ac.uk/papers/id186_esedps.pdf) for more information.
diff --git a/gym_el_farol/__init__.py b/gym_el_farol/__init__.py
@@ -0,0 +1,8 @@
+from gym.envs.registration import register
+
+register(
+    id='ElFarolEnv-v0',
+    entry_point='gym.envs.multi_agent:ElFarolEnv',
+    timestep_limit=200,
+    local_only=True
+)
diff --git a/gym_el_farol/envs/__init__.py b/gym_el_farol/envs/__init__.py
@@ -0,0 +1,2 @@
+from gym_el_farol.envs.el_farol_env import ElFarolEnv
+from gym_el_farol.envs.equilibria import FuzzyPureNash
diff --git a/gym_el_farol/envs/el_farol_env.py b/gym_el_farol/envs/el_farol_env.py
@@ -0,0 +1,42 @@
+from __future__ import print_function
+
+from gym import Env
+from gym.spaces import Discrete
+
+class ElFarolEnv(Env):
+
+    metadata = {'render.modes': ['human']}
+
+    def __init__(self, n_agents=100, threshold=60, g=10, s=5, b=1):
+        if g < s or s < b:
+            raise Exception("rewards must be ordered g > s > b")
+
+        self.n_agents = n_agents
+        self.action_space = Discrete(2)
+        # observe 0 if did not attend, otherwise observe number of agents who atteneded
+        self.observation_space = Discrete(n_agents)
+        self.reward_range = (b, g)
+        def reward_func(action, n_attended):
+            if action == 0:
+                return s
+            elif n_attended <= threshold:
+                return g
+            else:
+                return b
+        self.reward_func = reward_func
+        self.prev_action = [self.action_space.sample() for _ in range(n_agents)]
+
+    def _step(self, action):
+        n_attended = sum(action)
+        observation = [n_attended if a else 0 for a in action]
+        reward = [self.reward_func(a, n_attended) for a in action]
+
+        self.prev_action = action
+        return observation, reward, False, ()
+
+    def _reset(self):
+        pass
+
+    def _render(self, mode='human', close=False):
+        if mode == 'human':
+            print(str(sum(self.prev_action)))
diff --git a/gym_el_farol/envs/equilibria.py b/gym_el_farol/envs/equilibria.py
@@ -0,0 +1,17 @@
+from collections import defaultdict
+
+class FuzzyPureNash(object):
+    def __init__(self, threshold=.95):
+        self.action_counts_by_agent = defaultdict(lambda: defaultdict(lambda: int()))
+        self.threshold = threshold
+
+    def step(self, action):
+        for agent, a in enumerate(action):
+            self.action_counts_by_agent[agent][a] += 1
+
+    def in_equilibria(self):
+        for action_counts in self.action_counts_by_agent.values():
+            print action_counts
+            if max(action_counts.values()) / float(sum(action_counts.values())) < self.threshold:
+                return False
+        return True
diff --git a/scripts/erev_roth_agent.py b/scripts/erev_roth_agent.py
@@ -0,0 +1,66 @@
+import random
+from collections import defaultdict
+
+from gym.spaces import Discrete, Tuple
+from gym_el_farol.envs import ElFarolEnv, FuzzyPureNash
+
+class ErevRothAgent(object):
+    def __init__(self, observation_space, action_space, **userconfig):
+        if not isinstance(observation_space, Discrete):
+            raise Exception('Observation space {} incompatible with {}. (Only supports Discrete observation spaces.)'.format(observation_space, self))
+        if not isinstance(action_space, Discrete):
+            raise Exception('Action space {} incompatible with {}. (Only supports Discrete action spaces.)'.format(action_space, self))
+        self.observation_space = observation_space
+        self.action_space = action_space
+        self.action_n = action_space.n
+        self.config = {
+            "init_mean" : 1.0,      # Initialize Q values with this mean
+            "init_std" : 0.0,       # Initialize Q values with this standard deviation
+            "learning_rate" : 1.0}
+        self.config.update(userconfig)
+        self.q = defaultdict(lambda: random.normalvariate(self.config["init_mean"], self.config["init_std"]))
+
+    def act(self):
+        # replace with numpy
+        total = sum([self.q[a] for a in range(0, self.action_space.n)])
+        r = random.random()
+        cum = 0
+        for a, p in self.q.items():
+            cum += p / total
+            if r < cum:
+                self.prev_action = a
+                return a
+        raise Exception("No value selected", p)
+
+    def learn(self, reward):
+        self.q[self.prev_action] += reward * self.config["learning_rate"]
+        for key in self.q:
+            self.q[key] *= .99
+
+def iterate(agents, env):
+    actions = [a.act() for a in agents]
+    obs, reward, _, _ = env.step(actions)
+    for agent, reward in zip(agents, reward):
+        agent.learn(reward)
+    return actions
+
+def iterations_to_equilbira(agents, env):
+    nash = FuzzyPureNash()
+    for iter in range(0, 5000000):
+        if iter % 50 == 0 and iter > 0:
+            print iter
+            if nash.in_equilibria():
+                return iter
+            nash = FuzzyPureNash()
+        actions = iterate(agents, env)
+        nash.step(actions)
+    for agent in agents:
+        print agent.q[0] / (agent.q[0] + agent.q[1])
+    return False
+
+n_agents = 100
+env = ElFarolEnv(n_agents=n_agents, threshold=5)
+agents = []
+for i in range(0, n_agents):
+    agents.append(ErevRothAgent(env.observation_space, env.action_space))
+print iterations_to_equilbira(agents, env)
diff --git a/setup.py b/setup.py
@@ -0,0 +1,6 @@
+from setuptools import setup
+
+setup(name='gym_el_farol',
+      version='0.0.1',
+      install_requires=['gym']  # And any other dependencies foo needs
+)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from gym_el_farol.envs.el_farol_env import ElFarolEnv
		from gym_el_farol.envs.equilibria import FuzzyPureNash