-
Notifications
You must be signed in to change notification settings - Fork 2
/
create_urls.py
112 lines (96 loc) · 3.4 KB
/
create_urls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import bisect
import random
import csv
import os
import sys
import urllib.request
import lzma
import math
import argparse
parser = argparse.ArgumentParser(description='Create a urls.txt for siege.')
parser.add_argument('--minzoom', type=int, default=0, help='Minimum zoom level, inclusive.')
parser.add_argument('--maxzoom', type=int, default=19, help='Maximum zoom level, inclusive.')
parser.add_argument('--bbox', type=str,help='Bounding box: min_lon,min_lat,max_lon,max_lat')
args = parser.parse_args()
def _xy(lon,lat):
x = lon/360.0 + 0.5
sinlat = math.sin(math.radians(lat))
y = 0.5 - 0.25 * math.log((1.0 + sinlat) / (1.0 - sinlat)) / math.pi
return x,y
def percentage_split(size, percentages):
prv = 0
cumsum = 0
for zoom, p in percentages.items():
cumsum += p
nxt = int(cumsum * size)
yield zoom, prv, nxt
prv = nxt
bounds = None
if args.bbox:
min_lon, min_lat, max_lon, max_lat = args.bbox.split(',')
min_x, min_y = _xy(float(min_lon),float(min_lat))
max_x, max_y = _xy(float(max_lon),float(max_lat))
bounds = [min_x,max_y,max_x,min_y] # invert Y
# one week of anonymized tile edge request logs from openstreetmap.org
FILENAME = 'tiles-2021-08-08.txt.xz'
OUTPUT_ROWS = 10000
if not os.path.isfile(FILENAME):
print("Downloading " + FILENAME)
urllib.request.urlretrieve(f'https://planet.openstreetmap.org/tile_logs/{FILENAME}', FILENAME)
# output should be pseudorandom + deterministic.
random.seed(3857)
minzoom = args.minzoom
maxzoom = args.maxzoom
distribution = [2,2,6,12,16,27,38,41,49,56,72,71,99,135,135,136,102,66,37,6] # the total distribution...
total_weight = 0
totals = {}
ranges = {}
tiles = {}
for zoom in range(minzoom, maxzoom+1):
total_weight = total_weight + distribution[zoom]
totals[zoom] = 0
ranges[zoom] = []
tiles[zoom] = []
with lzma.open(FILENAME,'rt') as f:
reader = csv.reader(f,delimiter=' ')
for row in reader:
split = row[0].split('/')
z = int(split[0])
x = int(split[1])
y = int(split[2])
count = int(row[1])
if z < minzoom or z > maxzoom:
continue
if bounds:
f = 1 << z
if (x >= math.floor(bounds[0] * f) and
x <= math.floor(bounds[2] * f) and
y >= math.floor(bounds[1] * f) and
y <= math.floor(bounds[3] * f)):
pass
else:
continue
ranges[z].append(totals[z])
tiles[z].append(row[0])
totals[z] = totals[z] + count
with open('urls.txt','w') as f:
f.write("PROT=http\n")
f.write("HOST=localhost\n")
f.write("PORT=8080\n")
f.write("PATH=\n")
f.write("EXT=pbf\n")
rows = 0
for zoom, start, end in percentage_split(
OUTPUT_ROWS, {zoom: distribution[zoom] / total_weight for zoom in range(minzoom, maxzoom + 1)}
):
rows_for_zoom = end - start
rows += rows_for_zoom
for sample in range(rows_for_zoom):
rand = random.randrange(totals[zoom])
i = bisect.bisect(ranges[zoom],rand)-1
f.write(f"$(PROT)://$(HOST):$(PORT)/$(PATH){tiles[zoom][i]}.$(EXT)\n")
p1 = ' ' if zoom < 10 else ''
p2 = ' ' * (len(str(10000)) - len(str(rows_for_zoom)))
bar = '█' * math.ceil(rows_for_zoom / OUTPUT_ROWS * 60)
print(f"{p1}{zoom} | {p2}{rows_for_zoom} {bar}")
print(f"wrote urls.txt with {rows} requests.")