-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from ygreif/initial_commit
El Farol env, equilibria, Erev Roth Agent
- Loading branch information
Showing
7 changed files
with
144 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,4 @@ | ||
# gym-el-farol | ||
The El Farol Bar problem is a framework to understand bounded rationality in economics. In the problem 100 people want to visit a bar every Thursday, but won't be happy if it's too crowded. Therefore, they're happiest if they visit the bar, and less than 60 other people do so. Are intermediately happy if they stay home, and are unhappy if they visit the bar and more than 60 others do so. | ||
|
||
If agents are perfectly rational there are a huge number of nash equilibria (100 choose 60 pure equilibria alone), which is implausible. Instead, we need to explore what equilbria occur when agents use different cognition models. For instance, if agents use q-learning they achieve a pure nash equilibria. If they use learning on a set of naive predition models a mixed nash equilbria is achieved. See [Whitehead](http://www.econ.ed.ac.uk/papers/id186_esedps.pdf) for more information. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from gym.envs.registration import register | ||
|
||
register( | ||
id='ElFarolEnv-v0', | ||
entry_point='gym.envs.multi_agent:ElFarolEnv', | ||
timestep_limit=200, | ||
local_only=True | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from gym_el_farol.envs.el_farol_env import ElFarolEnv | ||
from gym_el_farol.envs.equilibria import FuzzyPureNash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from __future__ import print_function | ||
|
||
from gym import Env | ||
from gym.spaces import Discrete | ||
|
||
class ElFarolEnv(Env): | ||
|
||
metadata = {'render.modes': ['human']} | ||
|
||
def __init__(self, n_agents=100, threshold=60, g=10, s=5, b=1): | ||
if g < s or s < b: | ||
raise Exception("rewards must be ordered g > s > b") | ||
|
||
self.n_agents = n_agents | ||
self.action_space = Discrete(2) | ||
# observe 0 if did not attend, otherwise observe number of agents who atteneded | ||
self.observation_space = Discrete(n_agents) | ||
self.reward_range = (b, g) | ||
def reward_func(action, n_attended): | ||
if action == 0: | ||
return s | ||
elif n_attended <= threshold: | ||
return g | ||
else: | ||
return b | ||
self.reward_func = reward_func | ||
self.prev_action = [self.action_space.sample() for _ in range(n_agents)] | ||
|
||
def _step(self, action): | ||
n_attended = sum(action) | ||
observation = [n_attended if a else 0 for a in action] | ||
reward = [self.reward_func(a, n_attended) for a in action] | ||
|
||
self.prev_action = action | ||
return observation, reward, False, () | ||
|
||
def _reset(self): | ||
pass | ||
|
||
def _render(self, mode='human', close=False): | ||
if mode == 'human': | ||
print(str(sum(self.prev_action))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from collections import defaultdict | ||
|
||
class FuzzyPureNash(object): | ||
def __init__(self, threshold=.95): | ||
self.action_counts_by_agent = defaultdict(lambda: defaultdict(lambda: int())) | ||
self.threshold = threshold | ||
|
||
def step(self, action): | ||
for agent, a in enumerate(action): | ||
self.action_counts_by_agent[agent][a] += 1 | ||
|
||
def in_equilibria(self): | ||
for action_counts in self.action_counts_by_agent.values(): | ||
print action_counts | ||
if max(action_counts.values()) / float(sum(action_counts.values())) < self.threshold: | ||
return False | ||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import random | ||
from collections import defaultdict | ||
|
||
from gym.spaces import Discrete, Tuple | ||
from gym_el_farol.envs import ElFarolEnv, FuzzyPureNash | ||
|
||
class ErevRothAgent(object): | ||
def __init__(self, observation_space, action_space, **userconfig): | ||
if not isinstance(observation_space, Discrete): | ||
raise Exception('Observation space {} incompatible with {}. (Only supports Discrete observation spaces.)'.format(observation_space, self)) | ||
if not isinstance(action_space, Discrete): | ||
raise Exception('Action space {} incompatible with {}. (Only supports Discrete action spaces.)'.format(action_space, self)) | ||
self.observation_space = observation_space | ||
self.action_space = action_space | ||
self.action_n = action_space.n | ||
self.config = { | ||
"init_mean" : 1.0, # Initialize Q values with this mean | ||
"init_std" : 0.0, # Initialize Q values with this standard deviation | ||
"learning_rate" : 1.0} | ||
self.config.update(userconfig) | ||
self.q = defaultdict(lambda: random.normalvariate(self.config["init_mean"], self.config["init_std"])) | ||
|
||
def act(self): | ||
# replace with numpy | ||
total = sum([self.q[a] for a in range(0, self.action_space.n)]) | ||
r = random.random() | ||
cum = 0 | ||
for a, p in self.q.items(): | ||
cum += p / total | ||
if r < cum: | ||
self.prev_action = a | ||
return a | ||
raise Exception("No value selected", p) | ||
|
||
def learn(self, reward): | ||
self.q[self.prev_action] += reward * self.config["learning_rate"] | ||
for key in self.q: | ||
self.q[key] *= .99 | ||
|
||
def iterate(agents, env): | ||
actions = [a.act() for a in agents] | ||
obs, reward, _, _ = env.step(actions) | ||
for agent, reward in zip(agents, reward): | ||
agent.learn(reward) | ||
return actions | ||
|
||
def iterations_to_equilbira(agents, env): | ||
nash = FuzzyPureNash() | ||
for iter in range(0, 5000000): | ||
if iter % 50 == 0 and iter > 0: | ||
print iter | ||
if nash.in_equilibria(): | ||
return iter | ||
nash = FuzzyPureNash() | ||
actions = iterate(agents, env) | ||
nash.step(actions) | ||
for agent in agents: | ||
print agent.q[0] / (agent.q[0] + agent.q[1]) | ||
return False | ||
|
||
n_agents = 100 | ||
env = ElFarolEnv(n_agents=n_agents, threshold=5) | ||
agents = [] | ||
for i in range(0, n_agents): | ||
agents.append(ErevRothAgent(env.observation_space, env.action_space)) | ||
print iterations_to_equilbira(agents, env) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from setuptools import setup | ||
|
||
setup(name='gym_el_farol', | ||
version='0.0.1', | ||
install_requires=['gym'] # And any other dependencies foo needs | ||
) |