-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsummary.py
129 lines (100 loc) · 3.79 KB
/
summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
A stand-alone script to print summary statistics about a data file.
Runs without arguments - GUI dialogs are used to select the
chain and info files.
"""
from prettytable import PrettyTable as pt
import data_loader
from super_gui import open_file_gui
from plot_options import default
import statslib.point as stats
import statslib.one_dim as one_dim
def _summary(name, param, posterior, chi_sq):
"""
Find summary statistics for a single parameter.
:param name: Name of parameter
:type name: string
:param param: Data column of parameter
:type param:
:param posterior:
:type posterior:
:param chi_sq:
:type chi_sq:
:returns: List of summary statistics for a particular parameter
:rtype: list
"""
# Best-fit point
bestfit = stats.best_fit(chi_sq, param)
# Posterior mean
post_mean = stats.posterior_mean(posterior, param)
# Credible regions
pdf_data = one_dim.posterior_pdf(param,
posterior,
nbins=default("nbins"),
bin_limits=default("bin_limits")
)
lower_credible_region = one_dim.credible_region(pdf_data.pdf,
pdf_data.bin_centers,
alpha=default("alpha")[1],
region="lower")
upper_credible_region = one_dim.credible_region(pdf_data.pdf,
pdf_data.bin_centers,
alpha=default("alpha")[1],
region="upper")
summary = [name,
bestfit,
post_mean,
lower_credible_region,
upper_credible_region
]
return summary
def _summary_table(labels, data, names=None, datafile=None, infofile=None):
"""
Summarize multiple parameters in a table.
:returns: Table of summary statistics for particular parameters
:rtype: string
"""
# Summarize all parameters by default
if names is None:
names = labels.values()
# Make a string describing credible interval
beta_percent = 100. * (1. - default("alpha")[1])
credible_name = "%.2g%% credible region" % beta_percent
# Headings for a table
headings = ["Name",
"best-fit",
"posterior mean",
credible_name,
""
]
param_table = pt(headings)
param_table.align = "l"
param_table.float_format = "4.2"
# Make summary data and add it to table
posterior = data[0]
chi_sq = data[1]
for key, name in labels.iteritems():
if name in names:
param = data[key]
param_table.add_row(_summary(name, param, posterior, chi_sq))
# Best-fit information and information about chain
min_chi_sq = data[1].min()
p_value = stats.p_value(data[1], default("dof"))
bestfit_table = pt(header=False)
bestfit_table.align = "l"
bestfit_table.float_format = "4.2"
bestfit_table.add_row(["File", datafile])
bestfit_table.add_row(["Info-file", infofile])
bestfit_table.add_row(["Minimum chi-squared", min_chi_sq])
bestfit_table.add_row(["p-value", p_value])
return bestfit_table.get_string() + "\n\n" + param_table.get_string()
def main():
# Select chain and info file with a GUI.
datafile = open_file_gui()
infofile = open_file_gui()
# Load and label data
labels, data = data_loader.load(infofile, datafile)
summary_table = _summary_table(labels, data, datafile=datafile, infofile=infofile)
return summary_table
if __name__ == "__main__":
print main()