forked from eschenfeldt/stroke_locations
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpopulation.py
128 lines (106 loc) · 3.79 KB
/
population.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
'''Use census data to generate random points according to population density'''
import os
import argparse
import pandas as pd
import numpy as np
import shapely.geometry as sh_geo
import census
import tools
POINTS_DIR = os.path.join('data', 'points')
if not os.path.isdir(POINTS_DIR):
os.makedirs(POINTS_DIR)
NORTHEAST = [
'Maine',
'Vermont',
'New Hampshire',
'Massachusetts',
'Rhode Island',
'Connecticut',
'New York',
]
def generate_points(states=['Connecticut'], n=1000, name=None):
'''
Generate a set of points randomly distributed across the given states
according to population density. Points are returned as a dataframe
and saved to a csv, using the given name or the names of the states
'''
data, states = census.read_states(states)
grid = _get_points(data, n)
if name is None:
name = '_'.join(states)
#Create LOC_ID
grid = grid.reset_index(drop=True)
grid['LOC_ID'] = grid.index
grid['LOC_ID'] = grid.LOC_ID.apply(
lambda x: 'L' + tools.cast_to_int_then_str(x))
grid.set_index('LOC_ID', inplace=True)
grid.to_csv(os.path.join(POINTS_DIR, f'{name}_n={n}.csv'))
return grid
def load_points(points_path):
points = pd.read_csv(points_path)
if 'LOC_ID' in points.columns: points.set_index('LOC_ID', inplace=True)
return points
def generate_points_age_adjusted(states=['New York'], n=1000, name=None):
'''
Generate a set of points randomly distributed across the given states
according to population density. Points are returned as a dataframe
and saved to a csv, using the given name or the names of the states
'''
data, states = census.read_states_age_adjusted(states)
print(f"Generating {n} points")
grid = _get_points(data=data, n=n, weights='over_65')
if name is None:
name = '_'.join(states)
#Create LOC_ID
grid = grid.reset_index(drop=True)
grid['LOC_ID'] = grid.index
grid['LOC_ID'] = grid.LOC_ID.apply(
lambda x: 'L' + tools.cast_to_int_then_str(x))
grid.set_index('LOC_ID', inplace=True)
grid.to_csv(os.path.join(POINTS_DIR, f'{name}_n={n}.csv'))
return grid
def _get_random_point_in_polygon(poly):
'''From https://gis.stackexchange.com/a/6413'''
(minx, miny, maxx, maxy) = poly.bounds
while True:
p = sh_geo.Point(np.random.uniform(minx, maxx),
np.random.uniform(miny, maxy))
if poly.contains(p):
return p
def _get_points(data, n=1000, weights='POP10'):
samp = data.sample(n, replace=True, weights=weights)
points = samp.geometry.apply(_get_random_point_in_polygon)
out = pd.DataFrame({
'Latitude': points.apply(lambda p: p.y),
'Longitude': points.apply(lambda p: p.x)
})
return out
def main(args):
'''
Generate a file with points as described by command line arguments
'''
states = args.state
if not states:
states = NORTHEAST
n = args.points
name = args.filename
generate_points_age_adjusted(states, n, name)
if __name__ == '__main__':
n_default = 1000
name_default = None
parser = argparse.ArgumentParser()
state_help = 'One or more states to include. Defaults to the Northeast.'
parser.add_argument('state', nargs='*', help=state_help)
n_help = f'Number of points to generate (default {n_default})'
parser.add_argument('--points',
'-p',
type=int,
default=n_default,
help=n_help)
name_help = f'Name for the resulting file (defaults to state names)'
parser.add_argument('--filename',
'-f',
default=name_default,
help=name_help)
args = parser.parse_args()
main(args)