-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtestdip.py
40 lines (33 loc) · 1.42 KB
/
testdip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import dip
import numpy as np
import bz2
# data
statfaculty = map(int, "1001011205411322223121252002013441001000102")
def test_statfaculty():
# check that we get the same result as the R implementation on the
# statfaculty data set.
assert round(dip.dip(statfaculty)[0], 8) == 0.05952381
assert round(dip.dip(statfaculty[::-1])[0], 8) == 0.05952381
def test_random_data():
# Check that our implementation produces the same dips as R on random data.
maxdiff = 1e-8,
files = ['test-5.csv.bz2', 'test-10.csv.bz2', 'test-100.csv.bz2', \
'test-1000.csv.bz2', 'test-repeated-5.csv.bz2', \
'test-repeated-10.csv.bz2', 'test-repeated-100.csv.bz2', \
'test-repeated-1000.csv.bz2']
for filename in files:
with bz2.BZ2File(filename) as infile:
data = np.array([np.fromstring(l, sep=',') for l in infile])
# Random samples are in the first n-1 columns, dips computed by R are
# in last column.
dips = np.array([dip.dip(idxs = d[:-1])[0] for d in data])
diff = np.abs(dips - data[:,-1])
assert max(diff) < maxdiff, "R computed %s where we computed %s" % \
(data[diff >= maxdiff][-1], dips[diff >= maxdiff])
if __name__ == '__main__':
print("Testing on statfaculty data.")
test_statfaculty()
print('OK.')
print("Testing on random data.")
test_random_data()
print('OK.')