-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgenerate_table.py
169 lines (141 loc) · 6.04 KB
/
generate_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/python
# Generates table 1 for the paper.
# depends on three suplemental files which describe their creation:
# 'linesOfSourceCode'
# 'diffCounts'
# 'callgraphSize'
num_stats = 6
def processFile(pathToFile):
# print(f"procesing {pathToFile}")
with open(pathToFile) as f:
isIncremental = 'incr' in pathToFile
lines = [line.strip() for line in f.readlines() if "SUMMARIES" not in line]
run_time = float(lines[-2].split()[-1])
stats = [int(stat.split(',')[0]) for stat in lines[-1].split()[-num_stats:]]
# stats is a list of ints, representing |D*|, |Delta|, |unique procedures rho in D*|, total cells, and nonempty cells after analysis, and number of self loops in Delta
# depending on the incr flag, also add on the nonempty cells before analysis
if isIncremental:
before_stats = [int(stat.split(',')[0]) for stat in lines[-3].split()[-num_stats:]]
stats.append(before_stats[4])
else:
stats.append(0)
return run_time, stats
def readFile(pathToFile):
with open(pathToFile) as f:
lines = [line.strip().split() for line in f.readlines()]
# [1:] to drop the file generation line
progMap = {line[0]: int(line[-1]) for line in lines[1:]}
return progMap # map of program names to some integer data
def percent(mode, file_prefix):
_, stats = processFile(file_prefix+mode)
_, batch_stats = processFile(file_prefix+postfixes[0])
nonEmpty_pre = stats[-1]
nonEmpty_post = stats[4]
nonEmpty_batch= batch_stats[4]
return (nonEmpty_post - nonEmpty_pre) / nonEmpty_batch
def multirow(cell):
return f"\multirow{{2}}{{*}}{{{cell}}}"
def citeProgram(prog):
return f"\citetalias{{bugswarm{prog.split('-')[-1]}}}"
def averageDict(dictionary):
return sum(dictionary.values())/len(dictionary)
def strPercent(percentage):
percent = percentage*100
if percent == 0:
return "0.0"
if percent < 0.1:
return "<0.1"
else:
return f"{percent:.1f}"
def strTime(ms_time):
seconds = ms_time/1000.0
if seconds == 0:
return "0.00"
if seconds < 0.01:
return "<0.01"
else:
return f"{seconds:.2f}"
def filePrefix(analysis_prefix, run_number, program):
return f'out/{analysis_prefix}{run_number}/{program}'
def generateDataRow(analysis, program):
row_output = ""
# the abstract work done should be the same for each program, so just use the first file for that
fprefix = filePrefix(analysis, 1, program)
for run_mode in postfixes:
average = 0
for exp_run in range(1,num_runs+1):
time, stats = processFile(filePrefix(analysis, exp_run, program)+run_mode)
average = average+time
average = average/num_runs
total_of_averages[analysis,run_mode] = total_of_averages[analysis,run_mode] + average
total_of_percents[analysis,run_mode] = total_of_percents[analysis,run_mode] + percent(run_mode, fprefix)
num_varphi = stats[4]-stats[-1]
total_of_absStates[analysis,run_mode] = total_of_absStates[analysis,run_mode] + num_varphi
if run_mode == postfixes[0]:
row_output = row_output + f" & {num_varphi}"
else:
row_output = row_output + f" & {strPercent(percent(run_mode, fprefix))}"
row_output = row_output + f' & {strTime(average)}'
return row_output + ' \\\\'
def generateAverageRow(analysis):
row_output = ""
for mode in postfixes:
if mode == postfixes[0]:
row_output = row_output + f' & {total_of_absStates[analysis,mode]/num_programs:.0f}'
else:
row_output = row_output + f' & {strPercent(total_of_percents[analysis,mode]/num_programs)}'
row_output = row_output + f' & {strTime(total_of_averages[analysis,mode]/num_programs)}'
return row_output + ' \\\\'
postfixes = [".batch", ".incr", ".dd", ".ddincr"]
runs = [f"run{i}/" for i in range(1, 11)]
interval_prefix = "run"
nullability_prefix = "log"
analysis_prefixes = [interval_prefix, nullability_prefix]
num_runs = 10 # 10
excluded_programs = \
[]
# ["tananaev-traccar-188473749", "tananaev-traccar-255051211", "raphw-byte-buddy-234970609"]
with open('experiment_inputs/query_artifacts') as f:
programs = [line.strip() for line in f.readlines() ]
programs = [program for program in programs if program not in excluded_programs]
num_programs = len(programs)
locs = readFile('linesOfSourceCode')
edited_locs = readFile('diffCounts')
callgraph_sizes = readFile('callgraphSize')
output = 'program'
for mode in postfixes:
output = output + ', ' + mode
print(output)
offset = 6*" "
total_of_averages = {}
total_of_percents = {}
total_of_absStates = {}
for run_mode in postfixes:
for analysis in analysis_prefixes:
total_of_averages[analysis,run_mode] = 0
total_of_percents[analysis,run_mode] = 0
total_of_absStates[analysis,run_mode] = 0
first = True
for program in programs:
if first:
first = False
else:
print(offset+"\\arrayrulecolor{gray}\\hline")
kloc = f'{locs[program]/1000.0:.1f}'
output = offset + f"{multirow(citeProgram(program))} & {multirow(kloc)} & {multirow(edited_locs[program])} & {multirow(callgraph_sizes[program])}"
print(output)
print(offset + 3*" " + "& I" + generateDataRow(interval_prefix, program))
output = offset + 3*"& "
print(output + "& N" + generateDataRow(nullability_prefix, program))
print(offset + '\\arrayrulecolor{black}\\midrule')
average_kloc = f'{averageDict(locs)/1000:.1f}'
average_eloc = f'{averageDict(edited_locs):.0f}'
average_cg = f'{averageDict(callgraph_sizes):.0f}'
output = offset + f'{multirow("average")} & {multirow(average_kloc)} & {multirow(average_eloc)} & {multirow(average_cg)}'
print(output)
output = offset + 3*" " + "& I"
print(output + generateAverageRow(interval_prefix))
output = offset + 3*"& " + "& N"
print(output + generateAverageRow(nullability_prefix))
# data = [(prog, [(postfix, processFile('out/'+runs[0]+prog+postfix)[1]) for postfix in postfixes]) for prog in programs]
# print(data)