Skip to content

Commit a4d665a

Browse files
committed
examples/c: add hashing and naive substring search algo
Also benchmark it a little. Performance obviously will depend on haystack and needle strings and so on, but hashing implementation seems to be on par with naive implementation for short strings, but is getting relatively faster as strings become longer and/or pattern match happens further into the string. E.g., for searching "ra" in "abracadabra" (end of short string): substr-2084331 [012] ..... 2514091.887184: bpf_trace_printk: BENCH HASHED 156 ns/iter substr-2084331 [012] ..... 2514091.891784: bpf_trace_printk: BENCH NAIVE 183 ns/iter For searching "eaba" in "abacabadabacabaeabacabadabacaba" (middle of longer string): substr-2082624 [015] ..... 2514066.577106: bpf_trace_printk: BENCH HASHED 289 ns/iter substr-2082624 [015] ..... 2514066.588243: bpf_trace_printk: BENCH NAIVE 445 ns/iter But searching all occurences of "a" inside "abracadabra" (almost immediate match in rather short string): substr-2111313 [078] ..... 2514466.822019: bpf_trace_printk: BENCH HASHED 259 ns/iter substr-2111313 [078] ..... 2514466.827745: bpf_trace_printk: BENCH NAIVE 228 ns/iter Overall, hashed variant seems best from practical point of view. Signed-off-by: Andrii Nakryiko <[email protected]>
1 parent c1afaee commit a4d665a

File tree

4 files changed

+252
-1
lines changed

4 files changed

+252
-1
lines changed

examples/c/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@
1515
/lsm
1616
/cmake-build-debug/
1717
/cmake-build-release/
18+
/substr

examples/c/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ CFLAGS := -g -Wall
2525
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)
2626

2727
APPS = minimal minimal_legacy minimal_ns bootstrap uprobe kprobe fentry \
28-
usdt sockfilter tc ksyscall task_iter lsm
28+
usdt sockfilter tc ksyscall task_iter lsm substr
2929

3030
CARGO ?= $(shell which cargo)
3131
ifeq ($(strip $(CARGO)),)

examples/c/substr.bpf.c

+192
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2+
/* Copyright (c) 2020 Facebook */
3+
#include <vmlinux.h>
4+
#include <bpf/bpf_helpers.h>
5+
#include <bpf/bpf_core_read.h>
6+
7+
char LICENSE[] SEC("license") = "Dual BSD/GPL";
8+
9+
int my_pid = 0;
10+
11+
#ifndef likely
12+
#define likely(x) __builtin_expect(!!(x), 1)
13+
#endif
14+
15+
#define MAX_STR_LEN 128
16+
struct cstr {
17+
char data[MAX_STR_LEN];
18+
};
19+
20+
static __always_inline u64 cstr_pos(u64 pos)
21+
{
22+
/* prevent compiler reordering comparison below with array access in cstr_char() */
23+
barrier_var(pos);
24+
/* `pos >= MAX_STR_LEN` never happens, but we need to make verifier happy */
25+
pos = likely(pos < MAX_STR_LEN) ? pos : 0;
26+
barrier_var(pos);
27+
return pos;
28+
}
29+
30+
static __always_inline char cstr_char(const struct cstr *s, u64 pos)
31+
{
32+
return s->data[cstr_pos(pos)];
33+
}
34+
35+
unsigned zero = 0, one = 1; /* obfuscate integers for verifier */
36+
37+
static bool __substr_match(const struct cstr *haystack __arg_nonnull,
38+
const struct cstr *needle __arg_nonnull,
39+
int pos)
40+
{
41+
u64 i;
42+
char c;
43+
44+
bpf_for(i, 0, MAX_STR_LEN) {
45+
c = cstr_char(needle, i);
46+
if (c == '\0')
47+
return true;
48+
if (c != cstr_char(haystack, pos + i))
49+
return false;
50+
}
51+
52+
return true;
53+
}
54+
55+
/*
56+
* Find substring `needle` in a string `haystack`, starting from position
57+
* `start` (zero-indexed). Returns substring start position (>= `start`) if
58+
* match is found; negative result, otherwise.
59+
*/
60+
__noinline int substr_hashed(const struct cstr *haystack __arg_nonnull,
61+
const struct cstr *needle __arg_nonnull,
62+
int start)
63+
{
64+
u32 i, need_hash = zero, hay_hash = zero, mul = one;
65+
int need_len = zero, hay_len = zero, p;
66+
67+
bpf_for(i, 0, MAX_STR_LEN) {
68+
if (needle->data[i] == '\0')
69+
break;
70+
71+
need_len += 1;
72+
need_hash = need_hash * 31 + (u32)needle->data[i];
73+
mul *= 31;
74+
}
75+
76+
if (need_len == 0) /* emtpy substring always matches */
77+
return start;
78+
79+
bpf_for(i, start, MAX_STR_LEN) {
80+
if (haystack->data[i] == '\0')
81+
return -1;
82+
83+
hay_hash = hay_hash * 31 + (u32)haystack->data[i];
84+
hay_len += 1;
85+
if (hay_len < need_len) {
86+
continue;
87+
} else if (hay_len > need_len) {
88+
hay_len -= 1;
89+
hay_hash -= mul * cstr_char(haystack, i - hay_len);
90+
}
91+
92+
/* now hay_len == need_len */
93+
p = i - (hay_len - 1);
94+
if (hay_hash == need_hash && __substr_match(haystack, needle, p))
95+
return p;
96+
}
97+
98+
return -1;
99+
}
100+
101+
__noinline int substr_naive(const struct cstr *haystack __arg_nonnull,
102+
const struct cstr *needle __arg_nonnull,
103+
int start)
104+
{
105+
int *p;
106+
107+
bpf_for_each(num, p, start, MAX_STR_LEN) {
108+
if (cstr_char(haystack, *p) == '\0')
109+
break;
110+
111+
if (__substr_match(haystack, needle, *p))
112+
return *p;
113+
}
114+
115+
return -1;
116+
}
117+
118+
#define BENCH 0
119+
#define BENCH_ITERS 25000
120+
121+
#if BENCH
122+
static struct cstr haystack = { "abacabadabacabaeabacabadabacaba" };
123+
static struct cstr needle = { "eaba" };
124+
#else
125+
static struct cstr haystack = { "abracadabra" };
126+
static struct cstr needle = { "a" };
127+
#endif
128+
129+
SEC("raw_tp/sys_enter")
130+
int test_substr_hashed(void *ctx)
131+
{
132+
int pid = bpf_get_current_pid_tgid() >> 32;
133+
int i, p;
134+
135+
if (pid != my_pid)
136+
return 0;
137+
138+
#if BENCH
139+
u64 start, end;
140+
start = bpf_ktime_get_ns();
141+
bpf_repeat(BENCH_ITERS) {
142+
#endif
143+
p = -1;
144+
bpf_repeat(MAX_STR_LEN) {
145+
p = substr_hashed(&haystack, &needle, p + 1);
146+
if (p < 0)
147+
break;
148+
#if !BENCH
149+
bpf_printk("HASHED match at pos #%d!", p);
150+
#endif
151+
}
152+
153+
#if BENCH
154+
}
155+
end = bpf_ktime_get_ns();
156+
bpf_printk("BENCH HASHED %lu ns/iter", (end - start) / BENCH_ITERS);
157+
#endif
158+
return 0;
159+
}
160+
161+
SEC("raw_tp/sys_enter")
162+
int test_substr_naive(void *ctx)
163+
{
164+
int pid = bpf_get_current_pid_tgid() >> 32;
165+
int i, p;
166+
u64 start, end;
167+
168+
if (pid != my_pid)
169+
return 0;
170+
171+
#if BENCH
172+
start = bpf_ktime_get_ns();
173+
bpf_repeat(BENCH_ITERS) {
174+
#endif
175+
p = -1;
176+
bpf_repeat(MAX_STR_LEN) {
177+
p = substr_naive(&haystack, &needle, p + 1);
178+
if (p < 0)
179+
break;
180+
#if !BENCH
181+
bpf_printk("NAIVE match at pos #%d!", p);
182+
#endif
183+
}
184+
185+
#if BENCH
186+
}
187+
end = bpf_ktime_get_ns();
188+
bpf_printk("BENCH NAIVE %lu ns/iter", (end - start) / BENCH_ITERS);
189+
#endif
190+
191+
return 0;
192+
}

examples/c/substr.c

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2+
/* Copyright (c) 2020 Facebook */
3+
#include <stdio.h>
4+
#include <unistd.h>
5+
#include <sys/resource.h>
6+
#include <bpf/libbpf.h>
7+
#include "substr.skel.h"
8+
9+
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
10+
{
11+
return vfprintf(stderr, format, args);
12+
}
13+
14+
int main(int argc, char **argv)
15+
{
16+
struct substr_bpf *skel;
17+
int err;
18+
19+
/* Set up libbpf errors and debug info callback */
20+
libbpf_set_print(libbpf_print_fn);
21+
22+
/* Open BPF application */
23+
skel = substr_bpf__open();
24+
if (!skel) {
25+
fprintf(stderr, "Failed to open BPF skeleton\n");
26+
return 1;
27+
}
28+
29+
/* ensure BPF program only handles write() syscalls from our process */
30+
skel->bss->my_pid = getpid();
31+
32+
/* Load & verify BPF programs */
33+
err = substr_bpf__load(skel);
34+
if (err) {
35+
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
36+
goto cleanup;
37+
}
38+
39+
/* Attach tracepoint handler */
40+
err = substr_bpf__attach(skel);
41+
if (err) {
42+
fprintf(stderr, "Failed to attach BPF skeleton\n");
43+
goto cleanup;
44+
}
45+
46+
printf("Successfully started! Please run `sudo cat /sys/kernel/debug/tracing/trace_pipe` "
47+
"to see output of the BPF programs.\n");
48+
49+
for (;;) {
50+
/* trigger our BPF program */
51+
fprintf(stderr, ".");
52+
sleep(1);
53+
}
54+
55+
cleanup:
56+
substr_bpf__destroy(skel);
57+
return -err;
58+
}

0 commit comments

Comments
 (0)