-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlua_executor.py
83 lines (70 loc) · 2.81 KB
/
lua_executor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from .executor_utils import function_with_timeout
from typing import List
from .executor_types import ExecuteResult, Executor
def exec_lua(code, timeout=5):
import tempfile
import subprocess
with tempfile.NamedTemporaryFile(mode="w", suffix=".lua") as f:
f.write(code)
f.flush()
proc = subprocess.Popen(
["lua", f.name], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
try:
outs, errs = proc.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
proc.kill()
outs, errs = proc.communicate()
return outs.decode("utf-8"), errs.decode("utf-8")
class LuaExecutor(Executor):
def execute(self, func: str, tests: List[str], timeout: int = 5) -> ExecuteResult:
# Combine function code and assert statement
imports = 'lu = require("luaunit")'
exec_tests = 'os.exit( lu.LuaUnit.run() )'
func_test_list = [
f'{imports}\n{func}\n{test}\n{exec_tests}' for test in tests]
# Run the tests and collect the results
success_tests = []
failed_tests = []
is_passing = True
num_tests = len(func_test_list)
for i in range(num_tests):
code = func_test_list[i]
_, errs = exec_lua(code, timeout=timeout)
# ex failed:
# lua: /tmp/luaunit_test.lua:3: LuaUnit test FAILURE: expected: 2, actual: 1
# TODO: this logic is not 100% correct. instead, we should just look for "actual: x", we
# don't care about expected.
if errs:
is_passing = False
reason = ""
if "LuaUnit test FAILURE" in errs:
reason = errs.split("LuaUnit test FAILURE: ")[1]
failed_tests += [tests[i] + f" -- {reason}"]
else:
success_tests += [tests[i]]
state = []
for test in tests:
if test in success_tests:
state += [True]
else:
state += [False]
state = tuple(state)
feedback = "Tested passed:"
for test in success_tests:
feedback += f"\n{test}"
feedback += "\n\nTests failed:"
for test in failed_tests:
feedback += f"\n{test}"
return ExecuteResult(is_passing, feedback, state)
def evaluate(self, name: str, func: str, test: str, timeout: int = 5) -> bool:
"""
Evaluates the implementation on Human-Eval Lua.
probably should be written in a dataset-agnostic way but not now
"""
test = "\n".join(test.split("\n")[0:-2])
test += "\ntest_humaneval()"
test += "\nos.exit( lu.LuaUnit.run() )"
code = f"{func}\n{test}"
_, errs = exec_lua(code, timeout=timeout)
print(errs)
return errs == ""