-
Notifications
You must be signed in to change notification settings - Fork 4
/
autograder.py
executable file
·250 lines (230 loc) · 9.54 KB
/
autograder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#! /usr/bin/env python3
from curses import noecho
import subprocess
import signal
import sys
# C-style define
VOID_VOID = '0'
INT_INTINT = '1'
INT_INTINT_L = 1 << 4
class FuncUsage:
def __init__(self, funcList) -> None:
self.funcList = funcList
def compare(self, log):
# print("***printing from func_usage class!")
# print(log)
stderrLine = log.split('\n')
violate = 0
for line in stderrLine:
if len(line.split(':')) != 2:
# ignore if the line does not conform to <funcName>: <usage>
continue
funcName = line.split(":")[0]
usage = line.split(":")[1]
usageNum = int(usage)
if usageNum == 0:
continue
if funcName in self.funcList:
# only alert if real usage exceeds the reference solution
if usageNum > self.funcList[funcName]:
print("Function name:", funcName, "Expected usage:", self.funcList[funcName], "Real usage:", usageNum)
violate += 1
else:
# find a function that is not inside the function list
print("Function name:", funcName, "Expected usage: 0, Real usage:", usageNum)
violate += 1
if violate == 0:
return True
else:
return False
class TestCase:
dlopenExe = './build/run-dlopen'
customLdExe = './build/run-openlib'
debugExe = './build/debug-openlib'
def __init__(self, soName, funcName, funcType, *args):
self.soName = soName
self.soName = './test_lib/' + soName + '.so'
self.funcName = funcName
self.funcType = funcType
self.extraArg = len(args)
self.args = args
self.craftAns = 0
self.funcWrapper = None
def check_usage(self, log):
if self.funcWrapper is not None:
ret = self.funcWrapper.compare(log)
return ret
else:
# no function wrapper, we don't check it
return True
def add_wrapper(self, wrapList):
self.funcWrapper = FuncUsage(wrapList)
def assign_score(self, score):
self.score = score
self.claimedScore = 0
def assign_name(self, name):
self.testName = name
def debug(self):
self.debugArg = ['gdb', '--args']
self.debugArg.extend((self.debugExe, self.soName, self.funcName, self.funcType))
if self.extraArg:
self.debugArg.extend(self.args)
subprocess.call(self.debugArg)
def add_answer(self, ans):
self.craftAns = 1
self.ans = ans.decode("utf-8")
def run_task(self):
print("Test name:", self.testName)
argList = []
argList.extend((self.customLdExe, self.soName, self.funcName, self.funcType))
if self.extraArg:
argList.extend(self.args)
try:
# print(argList)
# The reason why sometimes I do not want to encode it is that it can print non-utf8 char
# typically from an uninitialized buffer
if sys.version_info >= (3, 7, 0):
RetProc = subprocess.run(argList, check=True, capture_output=True, encoding='utf-8')
# RetProc = subprocess.run(argList, check=True, capture_output=True)
else:
RetProc = subprocess.run(argList, check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
# print(type(RetProc), type(RetProc.stdout))
# -> subprocess.CompletedProcess, bytes
RetProc.stdout = RetProc.stdout.decode('utf-8')
RetProc.stderr = RetProc.stderr.decode('utf-8')
# I get lazy and don't decode the error string, for we do not need it to compare
except subprocess.CalledProcessError as e:
if e.returncode == -signal.SIGSEGV:
print("SIGSEGV received in custom loader. Maybe you want to debug it with gdb.")
elif e.returncode == -signal.SIGABRT:
print("SIGABRT received in custom loader. Exit as expected.")
print("Last words from stdout:", e.stdout)
print("Last words from stderr:", e.stderr)
else:
print("Oops, custom loader does not return normally.")
print("Last words from stdout:", e.stdout)
print("Last words from stderr:", e.stderr)
print("Bad return code:", e.returncode)
return
else:
# open test program in dlopen, if there is no problem in custom loader
argList[0] = self.dlopenExe
try:
# print(argList)
if sys.version_info >= (3, 7, 0):
DLProc = subprocess.run(argList, check=True, capture_output=True, encoding='utf-8')
# DLProc = subprocess.run(argList, check=True, capture_output=True)
else:
DLProc = subprocess.run(argList, check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
DLProc.stdout = DLProc.stdout.decode('utf-8')
DLProc.stderr = DLProc.stderr.decode('utf-8')
except subprocess.CalledProcessError as e:
if e.returncode == -signal.SIGSEGV:
print("SIGSEGV received while dlopen. Please contact the TA.")
else:
print("Oops, error in dlopen. Please contact the TA.")
print("Last words from stdout:", e.stdout)
print("Last words from stderr:", e.stderr)
print("Bad return code:", e.returncode)
return
else:
if self.craftAns:
realAns = self.ans
else:
realAns = DLProc.stdout
if RetProc.stdout == realAns:
if self.check_usage(RetProc.stderr) == False:
# handle usage comparation in FuncUsage class
print("Correct output while abusing system functions.\nYou are free to test your implementation in this way, but no score for you.")
print("Shim checking failed!")
else:
self.claimedScore += self.score
print("Expected output:", realAns)
print("Your output:", RetProc.stdout)
print("Passed!")
else:
print("TestCase failed.")
print("Expected output:", realAns)
print("Your output:", RetProc.stdout)
def pretty_print(i):
print('#' * 30)
print("#{}#".format(' ' * 28))
if i < 10:
print('# Evaluating TestCase ${} #'.format(i))
# guess we don't have over 100 testcases, hopefully...
else:
print('# Evaluating TestCase ${} #'.format(i))
print("#{}#".format(' ' * 28))
print('#' * 30)
print()
if __name__ == '__main__':
if sys.version_info < (3, 5, 0):
print("Your python version is too low, please update it to be at least 3.5")
quit()
allTests = []
test0 = TestCase('SimpleMul', 'multiply', INT_INTINT, '2', '3')
test0.assign_score(80)
test0.assign_name('zero relocation')
test0.add_wrapper(dict())
allTests.append(test0)
test1 = TestCase('lib1', 'foo', VOID_VOID)
test1.assign_score(5)
test1.assign_name('fake libc(one PLT relocation with answer known)')
test1.add_wrapper({"dlopen": 1, "dlsym": 2})
allTests.append(test1)
test2 = TestCase('SimpleIni', 'entry', VOID_VOID)
test2.assign_score(5)
test2.add_wrapper({"dlopen": 1, "dlsym": 2})
test2.assign_name('one initialization(depend on test 1)')
allTests.append(test2)
test3 = TestCase('SimpleDep', 'wrapper', INT_INTINT, '2', '3')
test3.assign_score(3)
test3.assign_name('one true PLT relocation')
test3.add_wrapper(dict())
allTests.append(test3)
test4 = TestCase('SimpleData', 'wrapper', VOID_VOID)
test4.assign_score(3)
test4.assign_name('one global data relocation')
test4.add_wrapper({"dlopen": 1, "dlsym": 2})
allTests.append(test4)
test5 = TestCase('IndirectDep', 'wrapperAgain', INT_INTINT, '2', '3')
test5.assign_score(2)
test5.assign_name('one 2-layer relocation')
test5.add_wrapper(dict())
allTests.append(test5)
# TODO: check whether the second call still calls trampoline
# requires a distinct function type
test6 = TestCase('SimpleDep', 'wrapper', str(INT_INTINT_L), '2', '3')
test6.assign_score(2)
test6.assign_name('lazy binding')
test6.add_answer(b'Resolving address for entry 0\n6\n')
test6.add_wrapper({"printf": 1})
allTests.append(test6)
# sanity tests end
"""
further tests can include:
order-related init;
rpath handling(single and multi layer);
register saving in assembly;
relocation with addend;
various relocation types;
syspath searching;
these tests are kinda like corner cases and contradict the goal of this project
"""
if len(sys.argv) > 2:
print("Please specify an individual testcase!")
exit(-1)
elif len(sys.argv) == 2:
allTests[int(sys.argv[1])].debug()
print('-'*50)
print("gdb exits")
exit(0)
totalScore = 0
claimedScore = 0
for i, t in list(enumerate(allTests)):
pretty_print(i)
t.run_task()
totalScore += t.score
claimedScore += t.claimedScore
print('-'*50)
print("Your Score:", claimedScore, "/", totalScore)