-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy patheio-stress.py
137 lines (109 loc) · 3.6 KB
/
eio-stress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
from bcc import BPF
from time import sleep
from subprocess import Popen
import argparse
import sys
import os
import ctypes as ct
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
BPF_HASH(seen, u64);
BPF_ARRAY(enabled, u64, 1);
BPF_PERF_OUTPUT(events);
BPF_STACK_TRACE(stack_traces, 10240);
int override_function(struct pt_regs *ctx, struct bio *bio)
{
unsigned long rc = RCVAL;
if (bio->bi_bdev->bd_disk->major != MAJOR || bio->bi_bdev->bd_disk->first_minor != MINOR)
return 0;
/* Make sure we're ready to inject errors. */
int index = 0;
u64 *e = enabled.lookup(&index);
if (!e || *e == 0)
return 0;
if (*e > 1)
goto fail;
/* Have we seen this stacktrace yet? */
u64 key = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID);
u64 zero = 0;
u64 *val = seen.lookup_or_init(&key, &zero);
if (*val == 1)
return 0;
lock_xadd(val, 1);
lock_xadd(e, 1);
events.perf_submit(ctx, &key, sizeof(key));
bpf_trace_printk("overrding something\\n");
fail:
bpf_override_return(ctx, rc);
return 0;
}
"""
error_tripped = 0
parser = argparse.ArgumentParser()
parser.add_argument("-o", "--override", required=True,
help="The function to override")
parser.add_argument("-r", "--retval", type=str, help="The return value to use")
parser.add_argument("-e", "--executable", type=str, required=True,
help="The command to run")
parser.add_argument("-d", "--device", type=str, required=True,
help="The device to error on")
args = parser.parse_args()
retval = "NULL"
if args.retval is not None:
retval = args.retval
dev_path = os.path.realpath(args.device)
dev_st = os.stat(dev_path)
bpf_text = bpf_text.replace("MAJOR", str(os.major(dev_st.st_rdev)))
bpf_text = bpf_text.replace("MINOR", str(os.minor(dev_st.st_rdev)))
bpf_text = bpf_text.replace("RCVAL", retval)
print("Loading error injection")
b = BPF(text=bpf_text)
# Load the kretprobe first, because we want the delete guy to be in place before
# the add guy is in place, otherwise we could error out pids that are no longer
# in our path and cause unfortunate things to happen.
b.attach_kprobe(event=args.override, fn_name="override_function")
p = None
def handle_error(cpu, data, size):
stackid = ct.cast(data, ct.POINTER(ct.c_ulonglong)).contents
stack_traces = b.get_table("stack_traces")
stack = stack_traces.walk(stackid.value)
print("Injected error here")
for addr in stack:
print(" %s" % b.ksym(addr))
globals()['error_tripped'] = 1
b["events"].open_perf_buffer(handle_error)
missed_errors = 0
while 1:
print("Running command")
error_tripped = 0
t = b.get_table("enabled")
t[0] = ct.c_int(1)
p = Popen(args.executable)
while error_tripped == 0:
b.kprobe_poll(timeout=10)
if p.poll() is not None:
print("The command exited, breaking")
break
print("Waiting for the command to exit")
p.wait()
p = Popen(["umount", "/mnt/test"])
p.wait()
if error_tripped == 0:
missed_errors += 1
print("Didn't hit anything, trying again")
if missed_errors == 3:
print("Error injection didn't trip anything, exiting")
break
else:
missed_errors = 0
t[0] = ct.c_int(0)
p = Popen("./check.sh")
if p.wait() == 1:
print("Things went wrong, breaking")
break
# We have to remove in this order otherwise we could end up with a half
# populated hasmap and overrding legitimate things.
b.detach_kprobe(args.override)
print("Exiting")