From dec1c5004d3b737d39eea57cf1191e41ce81e621 Mon Sep 17 00:00:00 2001 From: Ivan Babrou Date: Fri, 6 Feb 2026 10:23:57 -0800 Subject: [PATCH 1/3] Allow fuzzy matching for kallsyms This is useful to decode addresses that are not start-of-the-symbol, like LBR. --- decoder/ksym_test.go | 2 +- kallsyms/decoder.go | 2 +- kallsyms/decoder_test.go | 5 +++-- kallsyms/kallsyms.txt | 12 ++++++++++++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/decoder/ksym_test.go b/decoder/ksym_test.go index b14ce513..37d0b1b9 100644 --- a/decoder/ksym_test.go +++ b/decoder/ksym_test.go @@ -28,7 +28,7 @@ func TestKsymDecoder(t *testing.T) { }, { in: []byte{0x78, 0x08, 0xd9, 0x19, 0xeb, 0xff, 0xff, 0xff}, - out: []byte("unknown_addr:0xffffffeb19d90878"), + out: []byte("pipe_unlock"), }, } diff --git a/kallsyms/decoder.go b/kallsyms/decoder.go index a92fb853..8930a703 100644 --- a/kallsyms/decoder.go +++ b/kallsyms/decoder.go @@ -145,7 +145,7 @@ func (d *Decoder) Stack(addrs []uintptr) []Addr { // saveSymLocked resolves the kernel symbol at the given address func (d *Decoder) saveSymLookupLocked(ptr uintptr) bool { addr := d.findFirstBeforePtrLocked(ptr) - if addr.Ptr == ptr { + if addr.Ptr <= ptr { d.found[ptr] = addr.Sym return true } diff --git a/kallsyms/decoder_test.go b/kallsyms/decoder_test.go index 07de9826..4da0bad7 100644 --- a/kallsyms/decoder_test.go +++ b/kallsyms/decoder_test.go @@ -61,10 +61,11 @@ func TestStack(t *testing.T) { func TestSymLookup(t *testing.T) { addrs := []Addr{ - {0xffffffeb19a8a478, ""}, + {0xffffffeb19a8a478, "mm_release"}, {0xffffffeb19a8a480, "__pidfd_prepare"}, - {0xffffffeb19a8a482, ""}, + {0xffffffeb19a8a482, "__pidfd_prepare"}, {0xffffffeb19cadfc0, "mark_page_accessed"}, + {0xffffffff8b704109, "srso_alias_safe_ret"}, } d, err := NewDecoder("kallsyms.txt") diff --git a/kallsyms/kallsyms.txt b/kallsyms/kallsyms.txt index 6322f89c..8c20b724 100644 --- a/kallsyms/kallsyms.txt +++ b/kallsyms/kallsyms.txt @@ -94,3 +94,15 @@ ffffffc0806499a8 t bpf_prog_ee0e253c78993a24_sd_devices [bpf] ffffffc0801cedf4 t bpf_prog_9a4f2895a09f572a_mark_page_accessed [bpf] ffffffc08012d000 t bpf_trampoline_6442507883 [bpf] ffffffc080b89000 t kprobe_insn_page [__builtin__kprobes] +-- +ffffffff8b601d30 T __pfx_nop_func +ffffffff8b601d40 T nop_func +ffffffff8b601d50 T __pfx_paravirt_ret0 +ffffffff8b601d60 T paravirt_ret0 +ffffffff8b601d6b T __entry_text_end +ffffffff8b704104 T srso_alias_safe_ret +ffffffff8b70410b T srso_alias_return_thunk +ffffffff8b800000 T _etext +ffffffff8b800000 d __func__.0 +ffffffff8b800000 D __start_rodata +ffffffff8b800018 d __func__.1 From b855fd02e371f39d51e5bc35aaee21deab09dd65 Mon Sep 17 00:00:00 2001 From: Ivan Babrou Date: Fri, 6 Feb 2026 14:06:27 -0800 Subject: [PATCH 2/3] Use a separate tagged span base for sock tracing Commit 8903706 introduced the userspace tag, but it only updated `sock-trace` to use it, which broke alignment in other users. Let's use a separate struct for the tagged version to make the distinction explicit. --- examples/sock-trace.bpf.c | 32 ++++++++++++++++---------------- examples/tracing.bpf.h | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/examples/sock-trace.bpf.c b/examples/sock-trace.bpf.c index 129e5265..a9a06352 100644 --- a/examples/sock-trace.bpf.c +++ b/examples/sock-trace.bpf.c @@ -11,22 +11,22 @@ extern int LINUX_KERNEL_VERSION __kconfig; struct stitch_span_t { - struct span_base_t span_base; + struct span_base_tagged_t span_base; u64 socket_cookie; }; struct sock_release_span_t { - struct span_base_t span_base; + struct span_base_tagged_t span_base; u64 span_id; }; struct sk_span_t { - struct span_base_t span_base; + struct span_base_tagged_t span_base; u64 ksym; }; struct sk_error_report_span_t { - struct span_base_t span_base; + struct span_base_tagged_t span_base; u64 kstack[MAX_STACK_DEPTH]; u32 sk_err; }; @@ -55,21 +55,21 @@ struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, 1024 * 10); __type(key, u64); - __type(value, struct span_parent_t); + __type(value, struct span_parent_tagged_t); } traced_socket_cookies SEC(".maps"); SEC("usdt/./tracing/demos/sock/demo:ebpf_exporter:sock_set_parent_span") int BPF_USDT(sock_set_parent_span, u64 socket_cookie, u64 trace_id_hi, u64 trace_id_lo, u64 span_id, u64 example_userspace_tag) { - struct span_parent_t parent = { .trace_id_hi = trace_id_hi, - .trace_id_lo = trace_id_lo, - .span_id = span_id, - .example_userspace_tag = example_userspace_tag }; + struct span_parent_tagged_t parent = { .trace_id_hi = trace_id_hi, + .trace_id_lo = trace_id_lo, + .span_id = span_id, + .example_userspace_tag = example_userspace_tag }; bpf_map_update_elem(&traced_socket_cookies, &socket_cookie, &parent, BPF_ANY); - submit_span(&stitch_spans, struct stitch_span_t, &parent, { span->socket_cookie = socket_cookie; }); + submit_span_tagged_base(&stitch_spans, struct stitch_span_t, &parent, { span->socket_cookie = socket_cookie; }); return 0; } @@ -78,13 +78,13 @@ SEC("fentry/__sock_release") int BPF_PROG(__sock_release, struct socket *sock) { u64 socket_cookie = bpf_get_socket_cookie(sock->sk); - struct span_parent_t *parent = bpf_map_lookup_elem(&traced_socket_cookies, &socket_cookie); + struct span_parent_tagged_t *parent = bpf_map_lookup_elem(&traced_socket_cookies, &socket_cookie); if (!parent) { return 0; } - submit_span(&sock_release_spans, struct sock_release_span_t, parent, { span->span_id = 0xdead; }); + submit_span_tagged_base(&sock_release_spans, struct sock_release_span_t, parent, { span->span_id = 0xdead; }); bpf_map_delete_elem(&traced_socket_cookies, &socket_cookie); @@ -93,13 +93,13 @@ int BPF_PROG(__sock_release, struct socket *sock) static int handle_sk(struct pt_regs *ctx, u64 socket_cookie) { - struct span_parent_t *parent = bpf_map_lookup_elem(&traced_socket_cookies, &socket_cookie); + struct span_parent_tagged_t *parent = bpf_map_lookup_elem(&traced_socket_cookies, &socket_cookie); if (!parent) { return 0; } - submit_span(&sk_spans, struct sk_span_t, parent, { + submit_span_tagged_base(&sk_spans, struct sk_span_t, parent, { // FIXME: PT_REGS_IP_CORE(ctx) does not work for fentry, so we abuse kstack bpf_get_stack(ctx, &span->ksym, sizeof(span->ksym), SKIP_FRAMES); span->ksym -= 8; @@ -173,13 +173,13 @@ SEC("fentry/sk_error_report") int BPF_PROG(sk_error_report, struct sock *sk) { u64 socket_cookie = bpf_get_socket_cookie(sk); - struct span_parent_t *parent = bpf_map_lookup_elem(&traced_socket_cookies, &socket_cookie); + struct span_parent_tagged_t *parent = bpf_map_lookup_elem(&traced_socket_cookies, &socket_cookie); if (!parent) { return 0; } - submit_span(&sk_error_report_spans, struct sk_error_report_span_t, parent, { + submit_span_tagged_base(&sk_error_report_spans, struct sk_error_report_span_t, parent, { bpf_get_stack(ctx, &span->kstack, sizeof(span->kstack), SKIP_FRAMES); span->sk_err = sk->sk_err; }); diff --git a/examples/tracing.bpf.h b/examples/tracing.bpf.h index 24f227dc..c6798609 100644 --- a/examples/tracing.bpf.h +++ b/examples/tracing.bpf.h @@ -2,7 +2,6 @@ struct span_parent_t { u64 trace_id_hi; u64 trace_id_lo; u64 span_id; - u64 example_userspace_tag; }; struct span_base_t { @@ -30,3 +29,37 @@ static inline void fill_span_base(struct span_base_t *span, struct span_parent_t fill; \ \ bpf_ringbuf_submit(span, 0); + +struct span_parent_tagged_t { + u64 trace_id_hi; + u64 trace_id_lo; + u64 span_id; + // extra info to carry in the parent + u64 example_userspace_tag; +}; + +struct span_base_tagged_t { + struct span_parent_tagged_t parent; + u64 span_id; + u64 span_monotonic_timestamp_ns; + u64 span_duration_ns; +}; + +static inline void fill_span_base_tagged(struct span_base_tagged_t *span, struct span_parent_tagged_t *parent) +{ + span->parent = *parent; + span->span_monotonic_timestamp_ns = bpf_ktime_get_ns(); + span->span_duration_ns = 0; +} + +#define submit_span_tagged_base(map, type, parent, fill) \ + type *span = bpf_ringbuf_reserve(map, sizeof(type), 0); \ + if (!span) { \ + return 0; \ + } \ + \ + fill_span_base_tagged(&span->span_base, parent); \ + \ + fill; \ + \ + bpf_ringbuf_submit(span, 0); From b6b0c67d3ae4b681c7b817234e24870309e1e328 Mon Sep 17 00:00:00 2001 From: Ivan Babrou Date: Fri, 6 Feb 2026 14:17:24 -0800 Subject: [PATCH 3/3] Add LBR decoder support --- .vscode/config-schema.yaml | 1 + cmd/ebpf_exporter/main.go | 10 +++++++ decoder/decoder.go | 1 + decoder/lbr.go | 38 +++++++++++++++++++++++++++ examples/lbr-trace.bpf.c | 54 ++++++++++++++++++++++++++++++++++++++ examples/lbr-trace.yaml | 35 ++++++++++++++++++++++++ exporter/lbr.go | 32 ++++++++++++++++++++++ 7 files changed, 171 insertions(+) create mode 100644 decoder/lbr.go create mode 100644 examples/lbr-trace.bpf.c create mode 100644 examples/lbr-trace.yaml create mode 100644 exporter/lbr.go diff --git a/.vscode/config-schema.yaml b/.vscode/config-schema.yaml index 294011eb..d527cf26 100644 --- a/.vscode/config-schema.yaml +++ b/.vscode/config-schema.yaml @@ -145,6 +145,7 @@ definitions: - inet_ip - kstack - ksym + - lbr - majorminor - pci_class - pci_device diff --git a/cmd/ebpf_exporter/main.go b/cmd/ebpf_exporter/main.go index 054b7ece..54a1ce06 100644 --- a/cmd/ebpf_exporter/main.go +++ b/cmd/ebpf_exporter/main.go @@ -42,6 +42,7 @@ func main() { capabilities := kingpin.Flag("capabilities.keep", "Comma separated list of capabilities to keep (cap_syslog, cap_bpf, etc.), 'all' or 'none'").Default("all").String() btfPath := kingpin.Flag("btf.path", "Optional BTF file path.").Default("").String() skipCacheSize := kingpin.Flag("config.skip-cache-size", "Size of the LRU skip cache").Int() + lbrEnable := kingpin.Flag("lbr.enable", "Enable LBR.").Bool() kingpin.Version(version.Print("ebpf_exporter")) kingpin.HelpFlag.Short('h') kingpin.Parse() @@ -75,6 +76,15 @@ func main() { started := time.Now() + if *lbrEnable { + notify("enabling lbr...") + + err := exporter.EnableLBR() + if err != nil { + log.Fatalf("Error enabling LBR: %v", err) + } + } + notify("parsing config...") configs, err := config.ParseConfigs(*configDir, strings.Split(*configNames, ",")) diff --git a/decoder/decoder.go b/decoder/decoder.go index 6b0504fc..71b3b523 100644 --- a/decoder/decoder.go +++ b/decoder/decoder.go @@ -46,6 +46,7 @@ func NewSet(skipCacheSize int, monitor *cgroup.Monitor) (*Set, error) { "inet_ip": &InetIP{}, "kstack": &KStack{ksym}, "ksym": &KSym{ksym}, + "lbr": &LBR{ksym}, "majorminor": &MajorMinor{}, "pci_class": &PCIClass{}, "pci_device": &PCIDevice{}, diff --git a/decoder/lbr.go b/decoder/lbr.go new file mode 100644 index 00000000..e59aaa7d --- /dev/null +++ b/decoder/lbr.go @@ -0,0 +1,38 @@ +package decoder + +import ( + "fmt" + "strings" + + "github.com/cloudflare/ebpf_exporter/v2/config" + "github.com/cloudflare/ebpf_exporter/v2/kallsyms" + "github.com/cloudflare/ebpf_exporter/v2/util" +) + +// struct perf_branch_entry is 24 bytes +const perfBranchEntrySize = 24 + +// LBR is a decoder that transforms LBR entry array into a stack +type LBR struct { + decoder *kallsyms.Decoder +} + +// Decode transforms LBR entry array into a stack +func (l *LBR) Decode(in []byte, _ config.Decoder) ([]byte, error) { + if len(in) == 0 { + return []byte(""), nil + } + + byteOrder := util.GetHostByteOrder() + + lines := make([]string, len(in)/perfBranchEntrySize) + + for i := range lines { + from := uintptr(byteOrder.Uint64(in[i*24 : i*24+8])) + to := uintptr(byteOrder.Uint64(in[i*24+8 : i*24+16])) + + lines[i] = fmt.Sprintf("0x%08x -> 0x%08x | %s -> %s", from, to, l.decoder.Sym(from), l.decoder.Sym(to)) + } + + return []byte(strings.Join(lines, "\n")), nil +} diff --git a/examples/lbr-trace.bpf.c b/examples/lbr-trace.bpf.c new file mode 100644 index 00000000..49db8023 --- /dev/null +++ b/examples/lbr-trace.bpf.c @@ -0,0 +1,54 @@ +#include +#include +#include "tracing.bpf.h" + +// On Zen v4 this is the depth. +#define LBR_DEPTH 16 + +struct failure_span_t { + struct span_base_t span_base; + struct perf_branch_entry entries[LBR_DEPTH]; + u32 errno; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} failure_spans SEC(".maps"); + +SEC("fexit/do_sys_openat2") +int BPF_PROG(do_sys_openat2, int dfd, const char *filename, struct open_how *how, int retval) +{ + struct failure_span_t span = { 0 }; + struct failure_span_t *submit; + u64 ts; + + if (retval >= 0) { + return 0; + } + + s64 snapshot_bytes = bpf_get_branch_snapshot(span.entries, sizeof(span.entries), 0); + if (snapshot_bytes == 0) { + return 0; + } + + ts = bpf_ktime_get_ns(); + + span.span_base.parent.trace_id_lo = ts; + span.span_base.span_id = ts; + span.span_base.span_monotonic_timestamp_ns = ts; + span.errno = -retval; + + submit = bpf_ringbuf_reserve(&failure_spans, sizeof(struct failure_span_t), 0); + if (!submit) { + return 0; + } + + *submit = span; + + bpf_ringbuf_submit(submit, 0); + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/examples/lbr-trace.yaml b/examples/lbr-trace.yaml new file mode 100644 index 00000000..fa62d1f3 --- /dev/null +++ b/examples/lbr-trace.yaml @@ -0,0 +1,35 @@ +tracing: + spans: + - name: failure + ringbuf: failure_spans + service: syscall-failures + labels: + - name: trace_id + size: 16 + decoders: + - name: hex + - name: parent_span_id + size: 8 + decoders: + - name: hex + - name: span_id + size: 8 + decoders: + - name: hex + - name: span_monotonic_timestamp_ns + size: 8 + decoders: + - name: uint + - name: span_duration_ns + size: 8 + decoders: + - name: uint + - name: data + size: 384 + decoders: + - name: lbr + - name: errno + size: 4 + decoders: + - name: uint + - name: errno diff --git a/exporter/lbr.go b/exporter/lbr.go new file mode 100644 index 00000000..d4f75f23 --- /dev/null +++ b/exporter/lbr.go @@ -0,0 +1,32 @@ +package exporter + +import ( + "fmt" + + "github.com/elastic/go-perf" + "github.com/iovisor/gobpf/pkg/cpuonline" + "golang.org/x/sys/unix" +) + +// EnableLBR configures a perf event that enables LBR +func EnableLBR() error { + attr := &perf.Attr{} + attr.Type = perf.HardwareEvent + attr.Config = unix.PERF_COUNT_HW_CPU_CYCLES + attr.SampleFormat = perf.SampleFormat{BranchStack: true} + attr.BranchSampleFormat = perf.BranchSampleFormat{Privilege: perf.BranchPrivilegeKernel, Sample: perf.BranchSampleAnyReturn} + + cpus, err := cpuonline.Get() + if err != nil { + return fmt.Errorf("failed to determine online cpus: %w", err) + } + + for _, cpu := range cpus { + _, err := perf.Open(attr, perf.AllThreads, int(cpu), nil) + if err != nil { + return fmt.Errorf("failed to open perf_event: %w", err) + } + } + + return nil +}